]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/i386.c
Use function_arg_info for TARGET_MUST_PASS_IN_STACK
[thirdparty/gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2019 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #define IN_TARGET_CODE 1
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "memmodel.h"
29 #include "gimple.h"
30 #include "cfghooks.h"
31 #include "cfgloop.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "expmed.h"
36 #include "optabs.h"
37 #include "regs.h"
38 #include "emit-rtl.h"
39 #include "recog.h"
40 #include "cgraph.h"
41 #include "diagnostic.h"
42 #include "cfgbuild.h"
43 #include "alias.h"
44 #include "fold-const.h"
45 #include "attribs.h"
46 #include "calls.h"
47 #include "stor-layout.h"
48 #include "varasm.h"
49 #include "output.h"
50 #include "insn-attr.h"
51 #include "flags.h"
52 #include "except.h"
53 #include "explow.h"
54 #include "expr.h"
55 #include "cfgrtl.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "gimplify.h"
60 #include "dwarf2.h"
61 #include "tm-constrs.h"
62 #include "params.h"
63 #include "cselib.h"
64 #include "sched-int.h"
65 #include "opts.h"
66 #include "tree-pass.h"
67 #include "context.h"
68 #include "pass_manager.h"
69 #include "target-globals.h"
70 #include "gimple-iterator.h"
71 #include "tree-vectorizer.h"
72 #include "shrink-wrap.h"
73 #include "builtins.h"
74 #include "rtl-iter.h"
75 #include "tree-iterator.h"
76 #include "dbgcnt.h"
77 #include "case-cfn-macros.h"
78 #include "dojump.h"
79 #include "fold-const-call.h"
80 #include "tree-vrp.h"
81 #include "tree-ssanames.h"
82 #include "selftest.h"
83 #include "selftest-rtl.h"
84 #include "print-rtl.h"
85 #include "intl.h"
86 #include "ifcvt.h"
87 #include "symbol-summary.h"
88 #include "ipa-prop.h"
89 #include "ipa-fnsummary.h"
90 #include "wide-int-bitmask.h"
91 #include "tree-vector-builder.h"
92 #include "debug.h"
93 #include "dwarf2out.h"
94 #include "i386-options.h"
95 #include "i386-builtins.h"
96 #include "i386-expand.h"
97 #include "i386-features.h"
98
99 /* This file should be included last. */
100 #include "target-def.h"
101
102 static rtx legitimize_dllimport_symbol (rtx, bool);
103 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
104 static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool);
105 static void ix86_emit_restore_reg_using_pop (rtx);
106
107
108 #ifndef CHECK_STACK_LIMIT
109 #define CHECK_STACK_LIMIT (-1)
110 #endif
111
112 /* Return index of given mode in mult and division cost tables. */
113 #define MODE_INDEX(mode) \
114 ((mode) == QImode ? 0 \
115 : (mode) == HImode ? 1 \
116 : (mode) == SImode ? 2 \
117 : (mode) == DImode ? 3 \
118 : 4)
119
120
121 /* Set by -mtune. */
122 const struct processor_costs *ix86_tune_cost = NULL;
123
124 /* Set by -mtune or -Os. */
125 const struct processor_costs *ix86_cost = NULL;
126
127 /* In case the average insn count for single function invocation is
128 lower than this constant, emit fast (but longer) prologue and
129 epilogue code. */
130 #define FAST_PROLOGUE_INSN_COUNT 20
131
132 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
133 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
134 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
135 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
136
137 /* Array of the smallest class containing reg number REGNO, indexed by
138 REGNO. Used by REGNO_REG_CLASS in i386.h. */
139
140 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
141 {
142 /* ax, dx, cx, bx */
143 AREG, DREG, CREG, BREG,
144 /* si, di, bp, sp */
145 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
146 /* FP registers */
147 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
148 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
149 /* arg pointer, flags, fpsr, frame */
150 NON_Q_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
151 /* SSE registers */
152 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS,
153 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
154 /* MMX registers */
155 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
156 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
157 /* REX registers */
158 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
159 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
160 /* SSE REX registers */
161 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
162 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
163 /* AVX-512 SSE registers */
164 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
165 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
166 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
167 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
168 /* Mask registers. */
169 ALL_MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS,
170 MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS
171 };
172
173 /* The "default" register map used in 32bit mode. */
174
175 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
176 {
177 /* general regs */
178 0, 2, 1, 3, 6, 7, 4, 5,
179 /* fp regs */
180 12, 13, 14, 15, 16, 17, 18, 19,
181 /* arg, flags, fpsr, frame */
182 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
183 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
184 /* SSE */
185 21, 22, 23, 24, 25, 26, 27, 28,
186 /* MMX */
187 29, 30, 31, 32, 33, 34, 35, 36,
188 /* extended integer registers */
189 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
190 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
191 /* extended sse registers */
192 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
193 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
194 /* AVX-512 registers 16-23 */
195 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
196 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
197 /* AVX-512 registers 24-31 */
198 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
199 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
200 /* Mask registers */
201 93, 94, 95, 96, 97, 98, 99, 100
202 };
203
204 /* The "default" register map used in 64bit mode. */
205
206 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
207 {
208 /* general regs */
209 0, 1, 2, 3, 4, 5, 6, 7,
210 /* fp regs */
211 33, 34, 35, 36, 37, 38, 39, 40,
212 /* arg, flags, fpsr, frame */
213 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
214 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
215 /* SSE */
216 17, 18, 19, 20, 21, 22, 23, 24,
217 /* MMX */
218 41, 42, 43, 44, 45, 46, 47, 48,
219 /* extended integer registers */
220 8, 9, 10, 11, 12, 13, 14, 15,
221 /* extended SSE registers */
222 25, 26, 27, 28, 29, 30, 31, 32,
223 /* AVX-512 registers 16-23 */
224 67, 68, 69, 70, 71, 72, 73, 74,
225 /* AVX-512 registers 24-31 */
226 75, 76, 77, 78, 79, 80, 81, 82,
227 /* Mask registers */
228 118, 119, 120, 121, 122, 123, 124, 125
229 };
230
231 /* Define the register numbers to be used in Dwarf debugging information.
232 The SVR4 reference port C compiler uses the following register numbers
233 in its Dwarf output code:
234 0 for %eax (gcc regno = 0)
235 1 for %ecx (gcc regno = 2)
236 2 for %edx (gcc regno = 1)
237 3 for %ebx (gcc regno = 3)
238 4 for %esp (gcc regno = 7)
239 5 for %ebp (gcc regno = 6)
240 6 for %esi (gcc regno = 4)
241 7 for %edi (gcc regno = 5)
242 The following three DWARF register numbers are never generated by
243 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
244 believed these numbers have these meanings.
245 8 for %eip (no gcc equivalent)
246 9 for %eflags (gcc regno = 17)
247 10 for %trapno (no gcc equivalent)
248 It is not at all clear how we should number the FP stack registers
249 for the x86 architecture. If the version of SDB on x86/svr4 were
250 a bit less brain dead with respect to floating-point then we would
251 have a precedent to follow with respect to DWARF register numbers
252 for x86 FP registers, but the SDB on x86/svr4 was so completely
253 broken with respect to FP registers that it is hardly worth thinking
254 of it as something to strive for compatibility with.
255 The version of x86/svr4 SDB I had does (partially)
256 seem to believe that DWARF register number 11 is associated with
257 the x86 register %st(0), but that's about all. Higher DWARF
258 register numbers don't seem to be associated with anything in
259 particular, and even for DWARF regno 11, SDB only seemed to under-
260 stand that it should say that a variable lives in %st(0) (when
261 asked via an `=' command) if we said it was in DWARF regno 11,
262 but SDB still printed garbage when asked for the value of the
263 variable in question (via a `/' command).
264 (Also note that the labels SDB printed for various FP stack regs
265 when doing an `x' command were all wrong.)
266 Note that these problems generally don't affect the native SVR4
267 C compiler because it doesn't allow the use of -O with -g and
268 because when it is *not* optimizing, it allocates a memory
269 location for each floating-point variable, and the memory
270 location is what gets described in the DWARF AT_location
271 attribute for the variable in question.
272 Regardless of the severe mental illness of the x86/svr4 SDB, we
273 do something sensible here and we use the following DWARF
274 register numbers. Note that these are all stack-top-relative
275 numbers.
276 11 for %st(0) (gcc regno = 8)
277 12 for %st(1) (gcc regno = 9)
278 13 for %st(2) (gcc regno = 10)
279 14 for %st(3) (gcc regno = 11)
280 15 for %st(4) (gcc regno = 12)
281 16 for %st(5) (gcc regno = 13)
282 17 for %st(6) (gcc regno = 14)
283 18 for %st(7) (gcc regno = 15)
284 */
285 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
286 {
287 /* general regs */
288 0, 2, 1, 3, 6, 7, 5, 4,
289 /* fp regs */
290 11, 12, 13, 14, 15, 16, 17, 18,
291 /* arg, flags, fpsr, frame */
292 IGNORED_DWARF_REGNUM, 9,
293 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
294 /* SSE registers */
295 21, 22, 23, 24, 25, 26, 27, 28,
296 /* MMX registers */
297 29, 30, 31, 32, 33, 34, 35, 36,
298 /* extended integer registers */
299 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
300 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
301 /* extended sse registers */
302 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
303 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
304 /* AVX-512 registers 16-23 */
305 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
306 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
307 /* AVX-512 registers 24-31 */
308 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
309 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
310 /* Mask registers */
311 93, 94, 95, 96, 97, 98, 99, 100
312 };
313
314 /* Define parameter passing and return registers. */
315
316 static int const x86_64_int_parameter_registers[6] =
317 {
318 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
319 };
320
321 static int const x86_64_ms_abi_int_parameter_registers[4] =
322 {
323 CX_REG, DX_REG, R8_REG, R9_REG
324 };
325
326 static int const x86_64_int_return_registers[4] =
327 {
328 AX_REG, DX_REG, DI_REG, SI_REG
329 };
330
331 /* Define the structure for the machine field in struct function. */
332
333 struct GTY(()) stack_local_entry {
334 unsigned short mode;
335 unsigned short n;
336 rtx rtl;
337 struct stack_local_entry *next;
338 };
339
340 /* Which cpu are we scheduling for. */
341 enum attr_cpu ix86_schedule;
342
343 /* Which cpu are we optimizing for. */
344 enum processor_type ix86_tune;
345
346 /* Which instruction set architecture to use. */
347 enum processor_type ix86_arch;
348
349 /* True if processor has SSE prefetch instruction. */
350 unsigned char x86_prefetch_sse;
351
352 /* Preferred alignment for stack boundary in bits. */
353 unsigned int ix86_preferred_stack_boundary;
354
355 /* Alignment for incoming stack boundary in bits specified at
356 command line. */
357 unsigned int ix86_user_incoming_stack_boundary;
358
359 /* Default alignment for incoming stack boundary in bits. */
360 unsigned int ix86_default_incoming_stack_boundary;
361
362 /* Alignment for incoming stack boundary in bits. */
363 unsigned int ix86_incoming_stack_boundary;
364
365 /* Calling abi specific va_list type nodes. */
366 tree sysv_va_list_type_node;
367 tree ms_va_list_type_node;
368
369 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
370 char internal_label_prefix[16];
371 int internal_label_prefix_len;
372
373 /* Fence to use after loop using movnt. */
374 tree x86_mfence;
375
376 /* Register class used for passing given 64bit part of the argument.
377 These represent classes as documented by the PS ABI, with the exception
378 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
379 use SF or DFmode move instead of DImode to avoid reformatting penalties.
380
381 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
382 whenever possible (upper half does contain padding). */
383 enum x86_64_reg_class
384 {
385 X86_64_NO_CLASS,
386 X86_64_INTEGER_CLASS,
387 X86_64_INTEGERSI_CLASS,
388 X86_64_SSE_CLASS,
389 X86_64_SSESF_CLASS,
390 X86_64_SSEDF_CLASS,
391 X86_64_SSEUP_CLASS,
392 X86_64_X87_CLASS,
393 X86_64_X87UP_CLASS,
394 X86_64_COMPLEX_X87_CLASS,
395 X86_64_MEMORY_CLASS
396 };
397
398 #define MAX_CLASSES 8
399
400 /* Table of constants used by fldpi, fldln2, etc.... */
401 static REAL_VALUE_TYPE ext_80387_constants_table [5];
402 static bool ext_80387_constants_init;
403
404 \f
405 static rtx ix86_function_value (const_tree, const_tree, bool);
406 static bool ix86_function_value_regno_p (const unsigned int);
407 static unsigned int ix86_function_arg_boundary (machine_mode,
408 const_tree);
409 static rtx ix86_static_chain (const_tree, bool);
410 static int ix86_function_regparm (const_tree, const_tree);
411 static void ix86_compute_frame_layout (void);
412 static tree ix86_canonical_va_list_type (tree);
413 static unsigned int split_stack_prologue_scratch_regno (void);
414 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
415
416 static bool ix86_can_inline_p (tree, tree);
417 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
418
419 \f
420 /* Whether -mtune= or -march= were specified */
421 int ix86_tune_defaulted;
422 int ix86_arch_specified;
423 \f
424 /* Return true if a red-zone is in use. We can't use red-zone when
425 there are local indirect jumps, like "indirect_jump" or "tablejump",
426 which jumps to another place in the function, since "call" in the
427 indirect thunk pushes the return address onto stack, destroying
428 red-zone.
429
430 TODO: If we can reserve the first 2 WORDs, for PUSH and, another
431 for CALL, in red-zone, we can allow local indirect jumps with
432 indirect thunk. */
433
434 bool
435 ix86_using_red_zone (void)
436 {
437 return (TARGET_RED_ZONE
438 && !TARGET_64BIT_MS_ABI
439 && (!cfun->machine->has_local_indirect_jump
440 || cfun->machine->indirect_branch_type == indirect_branch_keep));
441 }
442 \f
443 /* Return true, if profiling code should be emitted before
444 prologue. Otherwise it returns false.
445 Note: For x86 with "hotfix" it is sorried. */
446 static bool
447 ix86_profile_before_prologue (void)
448 {
449 return flag_fentry != 0;
450 }
451
452 /* Update register usage after having seen the compiler flags. */
453
454 static void
455 ix86_conditional_register_usage (void)
456 {
457 int i, c_mask;
458
459 /* If there are no caller-saved registers, preserve all registers.
460 except fixed_regs and registers used for function return value
461 since aggregate_value_p checks call_used_regs[regno] on return
462 value. */
463 if (cfun && cfun->machine->no_caller_saved_registers)
464 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
465 if (!fixed_regs[i] && !ix86_function_value_regno_p (i))
466 call_used_regs[i] = 0;
467
468 /* For 32-bit targets, disable the REX registers. */
469 if (! TARGET_64BIT)
470 {
471 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
472 CLEAR_HARD_REG_BIT (accessible_reg_set, i);
473 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
474 CLEAR_HARD_REG_BIT (accessible_reg_set, i);
475 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
476 CLEAR_HARD_REG_BIT (accessible_reg_set, i);
477 }
478
479 /* See the definition of CALL_USED_REGISTERS in i386.h. */
480 c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI);
481
482 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
483
484 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
485 {
486 /* Set/reset conditionally defined registers from
487 CALL_USED_REGISTERS initializer. */
488 if (call_used_regs[i] > 1)
489 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
490
491 /* Calculate registers of CLOBBERED_REGS register set
492 as call used registers from GENERAL_REGS register set. */
493 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
494 && call_used_regs[i])
495 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
496 }
497
498 /* If MMX is disabled, disable the registers. */
499 if (! TARGET_MMX)
500 AND_COMPL_HARD_REG_SET (accessible_reg_set,
501 reg_class_contents[(int) MMX_REGS]);
502
503 /* If SSE is disabled, disable the registers. */
504 if (! TARGET_SSE)
505 AND_COMPL_HARD_REG_SET (accessible_reg_set,
506 reg_class_contents[(int) ALL_SSE_REGS]);
507
508 /* If the FPU is disabled, disable the registers. */
509 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
510 AND_COMPL_HARD_REG_SET (accessible_reg_set,
511 reg_class_contents[(int) FLOAT_REGS]);
512
513 /* If AVX512F is disabled, disable the registers. */
514 if (! TARGET_AVX512F)
515 {
516 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
517 CLEAR_HARD_REG_BIT (accessible_reg_set, i);
518
519 AND_COMPL_HARD_REG_SET (accessible_reg_set,
520 reg_class_contents[(int) ALL_MASK_REGS]);
521 }
522 }
523
524 /* Canonicalize a comparison from one we don't have to one we do have. */
525
526 static void
527 ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
528 bool op0_preserve_value)
529 {
530 /* The order of operands in x87 ficom compare is forced by combine in
531 simplify_comparison () function. Float operator is treated as RTX_OBJ
532 with a precedence over other operators and is always put in the first
533 place. Swap condition and operands to match ficom instruction. */
534 if (!op0_preserve_value
535 && GET_CODE (*op0) == FLOAT && MEM_P (XEXP (*op0, 0)) && REG_P (*op1))
536 {
537 enum rtx_code scode = swap_condition ((enum rtx_code) *code);
538
539 /* We are called only for compares that are split to SAHF instruction.
540 Ensure that we have setcc/jcc insn for the swapped condition. */
541 if (ix86_fp_compare_code_to_integer (scode) != UNKNOWN)
542 {
543 std::swap (*op0, *op1);
544 *code = (int) scode;
545 }
546 }
547 }
548 \f
549 \f
550 /* Hook to determine if one function can safely inline another. */
551
552 static bool
553 ix86_can_inline_p (tree caller, tree callee)
554 {
555 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
556 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
557
558 /* Changes of those flags can be tolerated for always inlines. Lets hope
559 user knows what he is doing. */
560 const unsigned HOST_WIDE_INT always_inline_safe_mask
561 = (MASK_USE_8BIT_IDIV | MASK_ACCUMULATE_OUTGOING_ARGS
562 | MASK_NO_ALIGN_STRINGOPS | MASK_AVX256_SPLIT_UNALIGNED_LOAD
563 | MASK_AVX256_SPLIT_UNALIGNED_STORE | MASK_CLD
564 | MASK_NO_FANCY_MATH_387 | MASK_IEEE_FP | MASK_INLINE_ALL_STRINGOPS
565 | MASK_INLINE_STRINGOPS_DYNAMICALLY | MASK_RECIP | MASK_STACK_PROBE
566 | MASK_STV | MASK_TLS_DIRECT_SEG_REFS | MASK_VZEROUPPER
567 | MASK_NO_PUSH_ARGS | MASK_OMIT_LEAF_FRAME_POINTER);
568
569
570 if (!callee_tree)
571 callee_tree = target_option_default_node;
572 if (!caller_tree)
573 caller_tree = target_option_default_node;
574 if (callee_tree == caller_tree)
575 return true;
576
577 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
578 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
579 bool ret = false;
580 bool always_inline
581 = (DECL_DISREGARD_INLINE_LIMITS (callee)
582 && lookup_attribute ("always_inline",
583 DECL_ATTRIBUTES (callee)));
584
585 cgraph_node *callee_node = cgraph_node::get (callee);
586 /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
587 function can inline a SSE2 function but a SSE2 function can't inline
588 a SSE4 function. */
589 if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
590 != callee_opts->x_ix86_isa_flags)
591 || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2)
592 != callee_opts->x_ix86_isa_flags2))
593 ret = false;
594
595 /* See if we have the same non-isa options. */
596 else if ((!always_inline
597 && caller_opts->x_target_flags != callee_opts->x_target_flags)
598 || (caller_opts->x_target_flags & ~always_inline_safe_mask)
599 != (callee_opts->x_target_flags & ~always_inline_safe_mask))
600 ret = false;
601
602 /* See if arch, tune, etc. are the same. */
603 else if (caller_opts->arch != callee_opts->arch)
604 ret = false;
605
606 else if (!always_inline && caller_opts->tune != callee_opts->tune)
607 ret = false;
608
609 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath
610 /* If the calle doesn't use FP expressions differences in
611 ix86_fpmath can be ignored. We are called from FEs
612 for multi-versioning call optimization, so beware of
613 ipa_fn_summaries not available. */
614 && (! ipa_fn_summaries
615 || ipa_fn_summaries->get (callee_node) == NULL
616 || ipa_fn_summaries->get (callee_node)->fp_expressions))
617 ret = false;
618
619 else if (!always_inline
620 && caller_opts->branch_cost != callee_opts->branch_cost)
621 ret = false;
622
623 else
624 ret = true;
625
626 return ret;
627 }
628 \f
629 /* Return true if this goes in large data/bss. */
630
631 static bool
632 ix86_in_large_data_p (tree exp)
633 {
634 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
635 return false;
636
637 if (exp == NULL_TREE)
638 return false;
639
640 /* Functions are never large data. */
641 if (TREE_CODE (exp) == FUNCTION_DECL)
642 return false;
643
644 /* Automatic variables are never large data. */
645 if (VAR_P (exp) && !is_global_var (exp))
646 return false;
647
648 if (VAR_P (exp) && DECL_SECTION_NAME (exp))
649 {
650 const char *section = DECL_SECTION_NAME (exp);
651 if (strcmp (section, ".ldata") == 0
652 || strcmp (section, ".lbss") == 0)
653 return true;
654 return false;
655 }
656 else
657 {
658 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
659
660 /* If this is an incomplete type with size 0, then we can't put it
661 in data because it might be too big when completed. Also,
662 int_size_in_bytes returns -1 if size can vary or is larger than
663 an integer in which case also it is safer to assume that it goes in
664 large data. */
665 if (size <= 0 || size > ix86_section_threshold)
666 return true;
667 }
668
669 return false;
670 }
671
672 /* i386-specific section flag to mark large sections. */
673 #define SECTION_LARGE SECTION_MACH_DEP
674
675 /* Switch to the appropriate section for output of DECL.
676 DECL is either a `VAR_DECL' node or a constant of some sort.
677 RELOC indicates whether forming the initial value of DECL requires
678 link-time relocations. */
679
680 ATTRIBUTE_UNUSED static section *
681 x86_64_elf_select_section (tree decl, int reloc,
682 unsigned HOST_WIDE_INT align)
683 {
684 if (ix86_in_large_data_p (decl))
685 {
686 const char *sname = NULL;
687 unsigned int flags = SECTION_WRITE | SECTION_LARGE;
688 switch (categorize_decl_for_section (decl, reloc))
689 {
690 case SECCAT_DATA:
691 sname = ".ldata";
692 break;
693 case SECCAT_DATA_REL:
694 sname = ".ldata.rel";
695 break;
696 case SECCAT_DATA_REL_LOCAL:
697 sname = ".ldata.rel.local";
698 break;
699 case SECCAT_DATA_REL_RO:
700 sname = ".ldata.rel.ro";
701 break;
702 case SECCAT_DATA_REL_RO_LOCAL:
703 sname = ".ldata.rel.ro.local";
704 break;
705 case SECCAT_BSS:
706 sname = ".lbss";
707 flags |= SECTION_BSS;
708 break;
709 case SECCAT_RODATA:
710 case SECCAT_RODATA_MERGE_STR:
711 case SECCAT_RODATA_MERGE_STR_INIT:
712 case SECCAT_RODATA_MERGE_CONST:
713 sname = ".lrodata";
714 flags &= ~SECTION_WRITE;
715 break;
716 case SECCAT_SRODATA:
717 case SECCAT_SDATA:
718 case SECCAT_SBSS:
719 gcc_unreachable ();
720 case SECCAT_TEXT:
721 case SECCAT_TDATA:
722 case SECCAT_TBSS:
723 /* We don't split these for medium model. Place them into
724 default sections and hope for best. */
725 break;
726 }
727 if (sname)
728 {
729 /* We might get called with string constants, but get_named_section
730 doesn't like them as they are not DECLs. Also, we need to set
731 flags in that case. */
732 if (!DECL_P (decl))
733 return get_section (sname, flags, NULL);
734 return get_named_section (decl, sname, reloc);
735 }
736 }
737 return default_elf_select_section (decl, reloc, align);
738 }
739
740 /* Select a set of attributes for section NAME based on the properties
741 of DECL and whether or not RELOC indicates that DECL's initializer
742 might contain runtime relocations. */
743
744 static unsigned int ATTRIBUTE_UNUSED
745 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
746 {
747 unsigned int flags = default_section_type_flags (decl, name, reloc);
748
749 if (ix86_in_large_data_p (decl))
750 flags |= SECTION_LARGE;
751
752 if (decl == NULL_TREE
753 && (strcmp (name, ".ldata.rel.ro") == 0
754 || strcmp (name, ".ldata.rel.ro.local") == 0))
755 flags |= SECTION_RELRO;
756
757 if (strcmp (name, ".lbss") == 0
758 || strncmp (name, ".lbss.", 5) == 0
759 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
760 flags |= SECTION_BSS;
761
762 return flags;
763 }
764
765 /* Build up a unique section name, expressed as a
766 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
767 RELOC indicates whether the initial value of EXP requires
768 link-time relocations. */
769
770 static void ATTRIBUTE_UNUSED
771 x86_64_elf_unique_section (tree decl, int reloc)
772 {
773 if (ix86_in_large_data_p (decl))
774 {
775 const char *prefix = NULL;
776 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
777 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
778
779 switch (categorize_decl_for_section (decl, reloc))
780 {
781 case SECCAT_DATA:
782 case SECCAT_DATA_REL:
783 case SECCAT_DATA_REL_LOCAL:
784 case SECCAT_DATA_REL_RO:
785 case SECCAT_DATA_REL_RO_LOCAL:
786 prefix = one_only ? ".ld" : ".ldata";
787 break;
788 case SECCAT_BSS:
789 prefix = one_only ? ".lb" : ".lbss";
790 break;
791 case SECCAT_RODATA:
792 case SECCAT_RODATA_MERGE_STR:
793 case SECCAT_RODATA_MERGE_STR_INIT:
794 case SECCAT_RODATA_MERGE_CONST:
795 prefix = one_only ? ".lr" : ".lrodata";
796 break;
797 case SECCAT_SRODATA:
798 case SECCAT_SDATA:
799 case SECCAT_SBSS:
800 gcc_unreachable ();
801 case SECCAT_TEXT:
802 case SECCAT_TDATA:
803 case SECCAT_TBSS:
804 /* We don't split these for medium model. Place them into
805 default sections and hope for best. */
806 break;
807 }
808 if (prefix)
809 {
810 const char *name, *linkonce;
811 char *string;
812
813 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
814 name = targetm.strip_name_encoding (name);
815
816 /* If we're using one_only, then there needs to be a .gnu.linkonce
817 prefix to the section name. */
818 linkonce = one_only ? ".gnu.linkonce" : "";
819
820 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
821
822 set_decl_section_name (decl, string);
823 return;
824 }
825 }
826 default_unique_section (decl, reloc);
827 }
828
829 #ifdef COMMON_ASM_OP
830
831 #ifndef LARGECOMM_SECTION_ASM_OP
832 #define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t"
833 #endif
834
835 /* This says how to output assembler code to declare an
836 uninitialized external linkage data object.
837
838 For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for
839 large objects. */
840 void
841 x86_elf_aligned_decl_common (FILE *file, tree decl,
842 const char *name, unsigned HOST_WIDE_INT size,
843 int align)
844 {
845 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
846 && size > (unsigned int)ix86_section_threshold)
847 {
848 switch_to_section (get_named_section (decl, ".lbss", 0));
849 fputs (LARGECOMM_SECTION_ASM_OP, file);
850 }
851 else
852 fputs (COMMON_ASM_OP, file);
853 assemble_name (file, name);
854 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
855 size, align / BITS_PER_UNIT);
856 }
857 #endif
858
859 /* Utility function for targets to use in implementing
860 ASM_OUTPUT_ALIGNED_BSS. */
861
862 void
863 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
864 unsigned HOST_WIDE_INT size, int align)
865 {
866 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
867 && size > (unsigned int)ix86_section_threshold)
868 switch_to_section (get_named_section (decl, ".lbss", 0));
869 else
870 switch_to_section (bss_section);
871 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
872 #ifdef ASM_DECLARE_OBJECT_NAME
873 last_assemble_variable_decl = decl;
874 ASM_DECLARE_OBJECT_NAME (file, name, decl);
875 #else
876 /* Standard thing is just output label for the object. */
877 ASM_OUTPUT_LABEL (file, name);
878 #endif /* ASM_DECLARE_OBJECT_NAME */
879 ASM_OUTPUT_SKIP (file, size ? size : 1);
880 }
881 \f
882 /* Decide whether we must probe the stack before any space allocation
883 on this target. It's essentially TARGET_STACK_PROBE except when
884 -fstack-check causes the stack to be already probed differently. */
885
886 bool
887 ix86_target_stack_probe (void)
888 {
889 /* Do not probe the stack twice if static stack checking is enabled. */
890 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
891 return false;
892
893 return TARGET_STACK_PROBE;
894 }
895 \f
896 /* Decide whether we can make a sibling call to a function. DECL is the
897 declaration of the function being targeted by the call and EXP is the
898 CALL_EXPR representing the call. */
899
900 static bool
901 ix86_function_ok_for_sibcall (tree decl, tree exp)
902 {
903 tree type, decl_or_type;
904 rtx a, b;
905 bool bind_global = decl && !targetm.binds_local_p (decl);
906
907 if (ix86_function_naked (current_function_decl))
908 return false;
909
910 /* Sibling call isn't OK if there are no caller-saved registers
911 since all registers must be preserved before return. */
912 if (cfun->machine->no_caller_saved_registers)
913 return false;
914
915 /* If we are generating position-independent code, we cannot sibcall
916 optimize direct calls to global functions, as the PLT requires
917 %ebx be live. (Darwin does not have a PLT.) */
918 if (!TARGET_MACHO
919 && !TARGET_64BIT
920 && flag_pic
921 && flag_plt
922 && bind_global)
923 return false;
924
925 /* If we need to align the outgoing stack, then sibcalling would
926 unalign the stack, which may break the called function. */
927 if (ix86_minimum_incoming_stack_boundary (true)
928 < PREFERRED_STACK_BOUNDARY)
929 return false;
930
931 if (decl)
932 {
933 decl_or_type = decl;
934 type = TREE_TYPE (decl);
935 }
936 else
937 {
938 /* We're looking at the CALL_EXPR, we need the type of the function. */
939 type = CALL_EXPR_FN (exp); /* pointer expression */
940 type = TREE_TYPE (type); /* pointer type */
941 type = TREE_TYPE (type); /* function type */
942 decl_or_type = type;
943 }
944
945 /* Check that the return value locations are the same. Like
946 if we are returning floats on the 80387 register stack, we cannot
947 make a sibcall from a function that doesn't return a float to a
948 function that does or, conversely, from a function that does return
949 a float to a function that doesn't; the necessary stack adjustment
950 would not be executed. This is also the place we notice
951 differences in the return value ABI. Note that it is ok for one
952 of the functions to have void return type as long as the return
953 value of the other is passed in a register. */
954 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
955 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
956 cfun->decl, false);
957 if (STACK_REG_P (a) || STACK_REG_P (b))
958 {
959 if (!rtx_equal_p (a, b))
960 return false;
961 }
962 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
963 ;
964 else if (!rtx_equal_p (a, b))
965 return false;
966
967 if (TARGET_64BIT)
968 {
969 /* The SYSV ABI has more call-clobbered registers;
970 disallow sibcalls from MS to SYSV. */
971 if (cfun->machine->call_abi == MS_ABI
972 && ix86_function_type_abi (type) == SYSV_ABI)
973 return false;
974 }
975 else
976 {
977 /* If this call is indirect, we'll need to be able to use a
978 call-clobbered register for the address of the target function.
979 Make sure that all such registers are not used for passing
980 parameters. Note that DLLIMPORT functions and call to global
981 function via GOT slot are indirect. */
982 if (!decl
983 || (bind_global && flag_pic && !flag_plt)
984 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl))
985 || flag_force_indirect_call)
986 {
987 /* Check if regparm >= 3 since arg_reg_available is set to
988 false if regparm == 0. If regparm is 1 or 2, there is
989 always a call-clobbered register available.
990
991 ??? The symbol indirect call doesn't need a call-clobbered
992 register. But we don't know if this is a symbol indirect
993 call or not here. */
994 if (ix86_function_regparm (type, decl) >= 3
995 && !cfun->machine->arg_reg_available)
996 return false;
997 }
998 }
999
1000 /* Otherwise okay. That also includes certain types of indirect calls. */
1001 return true;
1002 }
1003
1004 /* This function determines from TYPE the calling-convention. */
1005
1006 unsigned int
1007 ix86_get_callcvt (const_tree type)
1008 {
1009 unsigned int ret = 0;
1010 bool is_stdarg;
1011 tree attrs;
1012
1013 if (TARGET_64BIT)
1014 return IX86_CALLCVT_CDECL;
1015
1016 attrs = TYPE_ATTRIBUTES (type);
1017 if (attrs != NULL_TREE)
1018 {
1019 if (lookup_attribute ("cdecl", attrs))
1020 ret |= IX86_CALLCVT_CDECL;
1021 else if (lookup_attribute ("stdcall", attrs))
1022 ret |= IX86_CALLCVT_STDCALL;
1023 else if (lookup_attribute ("fastcall", attrs))
1024 ret |= IX86_CALLCVT_FASTCALL;
1025 else if (lookup_attribute ("thiscall", attrs))
1026 ret |= IX86_CALLCVT_THISCALL;
1027
1028 /* Regparam isn't allowed for thiscall and fastcall. */
1029 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
1030 {
1031 if (lookup_attribute ("regparm", attrs))
1032 ret |= IX86_CALLCVT_REGPARM;
1033 if (lookup_attribute ("sseregparm", attrs))
1034 ret |= IX86_CALLCVT_SSEREGPARM;
1035 }
1036
1037 if (IX86_BASE_CALLCVT(ret) != 0)
1038 return ret;
1039 }
1040
1041 is_stdarg = stdarg_p (type);
1042 if (TARGET_RTD && !is_stdarg)
1043 return IX86_CALLCVT_STDCALL | ret;
1044
1045 if (ret != 0
1046 || is_stdarg
1047 || TREE_CODE (type) != METHOD_TYPE
1048 || ix86_function_type_abi (type) != MS_ABI)
1049 return IX86_CALLCVT_CDECL | ret;
1050
1051 return IX86_CALLCVT_THISCALL;
1052 }
1053
1054 /* Return 0 if the attributes for two types are incompatible, 1 if they
1055 are compatible, and 2 if they are nearly compatible (which causes a
1056 warning to be generated). */
1057
1058 static int
1059 ix86_comp_type_attributes (const_tree type1, const_tree type2)
1060 {
1061 unsigned int ccvt1, ccvt2;
1062
1063 if (TREE_CODE (type1) != FUNCTION_TYPE
1064 && TREE_CODE (type1) != METHOD_TYPE)
1065 return 1;
1066
1067 ccvt1 = ix86_get_callcvt (type1);
1068 ccvt2 = ix86_get_callcvt (type2);
1069 if (ccvt1 != ccvt2)
1070 return 0;
1071 if (ix86_function_regparm (type1, NULL)
1072 != ix86_function_regparm (type2, NULL))
1073 return 0;
1074
1075 return 1;
1076 }
1077 \f
1078 /* Return the regparm value for a function with the indicated TYPE and DECL.
1079 DECL may be NULL when calling function indirectly
1080 or considering a libcall. */
1081
1082 static int
1083 ix86_function_regparm (const_tree type, const_tree decl)
1084 {
1085 tree attr;
1086 int regparm;
1087 unsigned int ccvt;
1088
1089 if (TARGET_64BIT)
1090 return (ix86_function_type_abi (type) == SYSV_ABI
1091 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
1092 ccvt = ix86_get_callcvt (type);
1093 regparm = ix86_regparm;
1094
1095 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
1096 {
1097 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1098 if (attr)
1099 {
1100 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1101 return regparm;
1102 }
1103 }
1104 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
1105 return 2;
1106 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
1107 return 1;
1108
1109 /* Use register calling convention for local functions when possible. */
1110 if (decl
1111 && TREE_CODE (decl) == FUNCTION_DECL)
1112 {
1113 cgraph_node *target = cgraph_node::get (decl);
1114 if (target)
1115 target = target->function_symbol ();
1116
1117 /* Caller and callee must agree on the calling convention, so
1118 checking here just optimize means that with
1119 __attribute__((optimize (...))) caller could use regparm convention
1120 and callee not, or vice versa. Instead look at whether the callee
1121 is optimized or not. */
1122 if (target && opt_for_fn (target->decl, optimize)
1123 && !(profile_flag && !flag_fentry))
1124 {
1125 cgraph_local_info *i = &target->local;
1126 if (i && i->local && i->can_change_signature)
1127 {
1128 int local_regparm, globals = 0, regno;
1129
1130 /* Make sure no regparm register is taken by a
1131 fixed register variable. */
1132 for (local_regparm = 0; local_regparm < REGPARM_MAX;
1133 local_regparm++)
1134 if (fixed_regs[local_regparm])
1135 break;
1136
1137 /* We don't want to use regparm(3) for nested functions as
1138 these use a static chain pointer in the third argument. */
1139 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
1140 local_regparm = 2;
1141
1142 /* Save a register for the split stack. */
1143 if (flag_split_stack)
1144 {
1145 if (local_regparm == 3)
1146 local_regparm = 2;
1147 else if (local_regparm == 2
1148 && DECL_STATIC_CHAIN (target->decl))
1149 local_regparm = 1;
1150 }
1151
1152 /* Each fixed register usage increases register pressure,
1153 so less registers should be used for argument passing.
1154 This functionality can be overriden by an explicit
1155 regparm value. */
1156 for (regno = AX_REG; regno <= DI_REG; regno++)
1157 if (fixed_regs[regno])
1158 globals++;
1159
1160 local_regparm
1161 = globals < local_regparm ? local_regparm - globals : 0;
1162
1163 if (local_regparm > regparm)
1164 regparm = local_regparm;
1165 }
1166 }
1167 }
1168
1169 return regparm;
1170 }
1171
1172 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
1173 DFmode (2) arguments in SSE registers for a function with the
1174 indicated TYPE and DECL. DECL may be NULL when calling function
1175 indirectly or considering a libcall. Return -1 if any FP parameter
1176 should be rejected by error. This is used in siutation we imply SSE
1177 calling convetion but the function is called from another function with
1178 SSE disabled. Otherwise return 0. */
1179
1180 static int
1181 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
1182 {
1183 gcc_assert (!TARGET_64BIT);
1184
1185 /* Use SSE registers to pass SFmode and DFmode arguments if requested
1186 by the sseregparm attribute. */
1187 if (TARGET_SSEREGPARM
1188 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
1189 {
1190 if (!TARGET_SSE)
1191 {
1192 if (warn)
1193 {
1194 if (decl)
1195 error ("calling %qD with attribute sseregparm without "
1196 "SSE/SSE2 enabled", decl);
1197 else
1198 error ("calling %qT with attribute sseregparm without "
1199 "SSE/SSE2 enabled", type);
1200 }
1201 return 0;
1202 }
1203
1204 return 2;
1205 }
1206
1207 if (!decl)
1208 return 0;
1209
1210 cgraph_node *target = cgraph_node::get (decl);
1211 if (target)
1212 target = target->function_symbol ();
1213
1214 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
1215 (and DFmode for SSE2) arguments in SSE registers. */
1216 if (target
1217 /* TARGET_SSE_MATH */
1218 && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
1219 && opt_for_fn (target->decl, optimize)
1220 && !(profile_flag && !flag_fentry))
1221 {
1222 cgraph_local_info *i = &target->local;
1223 if (i && i->local && i->can_change_signature)
1224 {
1225 /* Refuse to produce wrong code when local function with SSE enabled
1226 is called from SSE disabled function.
1227 FIXME: We need a way to detect these cases cross-ltrans partition
1228 and avoid using SSE calling conventions on local functions called
1229 from function with SSE disabled. For now at least delay the
1230 warning until we know we are going to produce wrong code.
1231 See PR66047 */
1232 if (!TARGET_SSE && warn)
1233 return -1;
1234 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
1235 ->x_ix86_isa_flags) ? 2 : 1;
1236 }
1237 }
1238
1239 return 0;
1240 }
1241
1242 /* Return true if EAX is live at the start of the function. Used by
1243 ix86_expand_prologue to determine if we need special help before
1244 calling allocate_stack_worker. */
1245
1246 static bool
1247 ix86_eax_live_at_start_p (void)
1248 {
1249 /* Cheat. Don't bother working forward from ix86_function_regparm
1250 to the function type to whether an actual argument is located in
1251 eax. Instead just look at cfg info, which is still close enough
1252 to correct at this point. This gives false positives for broken
1253 functions that might use uninitialized data that happens to be
1254 allocated in eax, but who cares? */
1255 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
1256 }
1257
1258 static bool
1259 ix86_keep_aggregate_return_pointer (tree fntype)
1260 {
1261 tree attr;
1262
1263 if (!TARGET_64BIT)
1264 {
1265 attr = lookup_attribute ("callee_pop_aggregate_return",
1266 TYPE_ATTRIBUTES (fntype));
1267 if (attr)
1268 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
1269
1270 /* For 32-bit MS-ABI the default is to keep aggregate
1271 return pointer. */
1272 if (ix86_function_type_abi (fntype) == MS_ABI)
1273 return true;
1274 }
1275 return KEEP_AGGREGATE_RETURN_POINTER != 0;
1276 }
1277
1278 /* Value is the number of bytes of arguments automatically
1279 popped when returning from a subroutine call.
1280 FUNDECL is the declaration node of the function (as a tree),
1281 FUNTYPE is the data type of the function (as a tree),
1282 or for a library call it is an identifier node for the subroutine name.
1283 SIZE is the number of bytes of arguments passed on the stack.
1284
1285 On the 80386, the RTD insn may be used to pop them if the number
1286 of args is fixed, but if the number is variable then the caller
1287 must pop them all. RTD can't be used for library calls now
1288 because the library is compiled with the Unix compiler.
1289 Use of RTD is a selectable option, since it is incompatible with
1290 standard Unix calling sequences. If the option is not selected,
1291 the caller must always pop the args.
1292
1293 The attribute stdcall is equivalent to RTD on a per module basis. */
1294
1295 static poly_int64
1296 ix86_return_pops_args (tree fundecl, tree funtype, poly_int64 size)
1297 {
1298 unsigned int ccvt;
1299
1300 /* None of the 64-bit ABIs pop arguments. */
1301 if (TARGET_64BIT)
1302 return 0;
1303
1304 ccvt = ix86_get_callcvt (funtype);
1305
1306 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
1307 | IX86_CALLCVT_THISCALL)) != 0
1308 && ! stdarg_p (funtype))
1309 return size;
1310
1311 /* Lose any fake structure return argument if it is passed on the stack. */
1312 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1313 && !ix86_keep_aggregate_return_pointer (funtype))
1314 {
1315 int nregs = ix86_function_regparm (funtype, fundecl);
1316 if (nregs == 0)
1317 return GET_MODE_SIZE (Pmode);
1318 }
1319
1320 return 0;
1321 }
1322
1323 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
1324
1325 static bool
1326 ix86_legitimate_combined_insn (rtx_insn *insn)
1327 {
1328 int i;
1329
1330 /* Check operand constraints in case hard registers were propagated
1331 into insn pattern. This check prevents combine pass from
1332 generating insn patterns with invalid hard register operands.
1333 These invalid insns can eventually confuse reload to error out
1334 with a spill failure. See also PRs 46829 and 46843. */
1335
1336 gcc_assert (INSN_CODE (insn) >= 0);
1337
1338 extract_insn (insn);
1339 preprocess_constraints (insn);
1340
1341 int n_operands = recog_data.n_operands;
1342 int n_alternatives = recog_data.n_alternatives;
1343 for (i = 0; i < n_operands; i++)
1344 {
1345 rtx op = recog_data.operand[i];
1346 machine_mode mode = GET_MODE (op);
1347 const operand_alternative *op_alt;
1348 int offset = 0;
1349 bool win;
1350 int j;
1351
1352 /* A unary operator may be accepted by the predicate, but it
1353 is irrelevant for matching constraints. */
1354 if (UNARY_P (op))
1355 op = XEXP (op, 0);
1356
1357 if (SUBREG_P (op))
1358 {
1359 if (REG_P (SUBREG_REG (op))
1360 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
1361 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
1362 GET_MODE (SUBREG_REG (op)),
1363 SUBREG_BYTE (op),
1364 GET_MODE (op));
1365 op = SUBREG_REG (op);
1366 }
1367
1368 if (!(REG_P (op) && HARD_REGISTER_P (op)))
1369 continue;
1370
1371 op_alt = recog_op_alt;
1372
1373 /* Operand has no constraints, anything is OK. */
1374 win = !n_alternatives;
1375
1376 alternative_mask preferred = get_preferred_alternatives (insn);
1377 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
1378 {
1379 if (!TEST_BIT (preferred, j))
1380 continue;
1381 if (op_alt[i].anything_ok
1382 || (op_alt[i].matches != -1
1383 && operands_match_p
1384 (recog_data.operand[i],
1385 recog_data.operand[op_alt[i].matches]))
1386 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
1387 {
1388 win = true;
1389 break;
1390 }
1391 }
1392
1393 if (!win)
1394 return false;
1395 }
1396
1397 return true;
1398 }
1399 \f
1400 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
1401
1402 static unsigned HOST_WIDE_INT
1403 ix86_asan_shadow_offset (void)
1404 {
1405 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
1406 : HOST_WIDE_INT_C (0x7fff8000))
1407 : (HOST_WIDE_INT_1 << 29);
1408 }
1409 \f
1410 /* Argument support functions. */
1411
1412 /* Return true when register may be used to pass function parameters. */
1413 bool
1414 ix86_function_arg_regno_p (int regno)
1415 {
1416 int i;
1417 enum calling_abi call_abi;
1418 const int *parm_regs;
1419
1420 if (!TARGET_64BIT)
1421 {
1422 if (TARGET_MACHO)
1423 return (regno < REGPARM_MAX
1424 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1425 else
1426 return (regno < REGPARM_MAX
1427 || (TARGET_MMX && MMX_REGNO_P (regno)
1428 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
1429 || (TARGET_SSE && SSE_REGNO_P (regno)
1430 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
1431 }
1432
1433 if (TARGET_SSE && SSE_REGNO_P (regno)
1434 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
1435 return true;
1436
1437 /* TODO: The function should depend on current function ABI but
1438 builtins.c would need updating then. Therefore we use the
1439 default ABI. */
1440 call_abi = ix86_cfun_abi ();
1441
1442 /* RAX is used as hidden argument to va_arg functions. */
1443 if (call_abi == SYSV_ABI && regno == AX_REG)
1444 return true;
1445
1446 if (call_abi == MS_ABI)
1447 parm_regs = x86_64_ms_abi_int_parameter_registers;
1448 else
1449 parm_regs = x86_64_int_parameter_registers;
1450
1451 for (i = 0; i < (call_abi == MS_ABI
1452 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
1453 if (regno == parm_regs[i])
1454 return true;
1455 return false;
1456 }
1457
1458 /* Return if we do not know how to pass ARG solely in registers. */
1459
1460 static bool
1461 ix86_must_pass_in_stack (const function_arg_info &arg)
1462 {
1463 if (must_pass_in_stack_var_size_or_pad (arg))
1464 return true;
1465
1466 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
1467 The layout_type routine is crafty and tries to trick us into passing
1468 currently unsupported vector types on the stack by using TImode. */
1469 return (!TARGET_64BIT && arg.mode == TImode
1470 && arg.type && TREE_CODE (arg.type) != VECTOR_TYPE);
1471 }
1472
1473 /* It returns the size, in bytes, of the area reserved for arguments passed
1474 in registers for the function represented by fndecl dependent to the used
1475 abi format. */
1476 int
1477 ix86_reg_parm_stack_space (const_tree fndecl)
1478 {
1479 enum calling_abi call_abi = SYSV_ABI;
1480 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
1481 call_abi = ix86_function_abi (fndecl);
1482 else
1483 call_abi = ix86_function_type_abi (fndecl);
1484 if (TARGET_64BIT && call_abi == MS_ABI)
1485 return 32;
1486 return 0;
1487 }
1488
1489 /* We add this as a workaround in order to use libc_has_function
1490 hook in i386.md. */
1491 bool
1492 ix86_libc_has_function (enum function_class fn_class)
1493 {
1494 return targetm.libc_has_function (fn_class);
1495 }
1496
1497 /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
1498 specifying the call abi used. */
1499 enum calling_abi
1500 ix86_function_type_abi (const_tree fntype)
1501 {
1502 enum calling_abi abi = ix86_abi;
1503
1504 if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE)
1505 return abi;
1506
1507 if (abi == SYSV_ABI
1508 && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
1509 {
1510 static int warned;
1511 if (TARGET_X32 && !warned)
1512 {
1513 error ("X32 does not support %<ms_abi%> attribute");
1514 warned = 1;
1515 }
1516
1517 abi = MS_ABI;
1518 }
1519 else if (abi == MS_ABI
1520 && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
1521 abi = SYSV_ABI;
1522
1523 return abi;
1524 }
1525
1526 enum calling_abi
1527 ix86_function_abi (const_tree fndecl)
1528 {
1529 return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi;
1530 }
1531
1532 /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
1533 specifying the call abi used. */
1534 enum calling_abi
1535 ix86_cfun_abi (void)
1536 {
1537 return cfun ? cfun->machine->call_abi : ix86_abi;
1538 }
1539
1540 bool
1541 ix86_function_ms_hook_prologue (const_tree fn)
1542 {
1543 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
1544 {
1545 if (decl_function_context (fn) != NULL_TREE)
1546 error_at (DECL_SOURCE_LOCATION (fn),
1547 "%<ms_hook_prologue%> attribute is not compatible "
1548 "with nested function");
1549 else
1550 return true;
1551 }
1552 return false;
1553 }
1554
1555 bool
1556 ix86_function_naked (const_tree fn)
1557 {
1558 if (fn && lookup_attribute ("naked", DECL_ATTRIBUTES (fn)))
1559 return true;
1560
1561 return false;
1562 }
1563
1564 /* Write the extra assembler code needed to declare a function properly. */
1565
1566 void
1567 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
1568 tree decl)
1569 {
1570 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
1571
1572 if (is_ms_hook)
1573 {
1574 int i, filler_count = (TARGET_64BIT ? 32 : 16);
1575 unsigned int filler_cc = 0xcccccccc;
1576
1577 for (i = 0; i < filler_count; i += 4)
1578 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
1579 }
1580
1581 #ifdef SUBTARGET_ASM_UNWIND_INIT
1582 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
1583 #endif
1584
1585 ASM_OUTPUT_LABEL (asm_out_file, fname);
1586
1587 /* Output magic byte marker, if hot-patch attribute is set. */
1588 if (is_ms_hook)
1589 {
1590 if (TARGET_64BIT)
1591 {
1592 /* leaq [%rsp + 0], %rsp */
1593 fputs (ASM_BYTE "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n",
1594 asm_out_file);
1595 }
1596 else
1597 {
1598 /* movl.s %edi, %edi
1599 push %ebp
1600 movl.s %esp, %ebp */
1601 fputs (ASM_BYTE "0x8b, 0xff, 0x55, 0x8b, 0xec\n", asm_out_file);
1602 }
1603 }
1604 }
1605
1606 /* Implementation of call abi switching target hook. Specific to FNDECL
1607 the specific call register sets are set. See also
1608 ix86_conditional_register_usage for more details. */
1609 void
1610 ix86_call_abi_override (const_tree fndecl)
1611 {
1612 cfun->machine->call_abi = ix86_function_abi (fndecl);
1613 }
1614
1615 /* Return 1 if pseudo register should be created and used to hold
1616 GOT address for PIC code. */
1617 bool
1618 ix86_use_pseudo_pic_reg (void)
1619 {
1620 if ((TARGET_64BIT
1621 && (ix86_cmodel == CM_SMALL_PIC
1622 || TARGET_PECOFF))
1623 || !flag_pic)
1624 return false;
1625 return true;
1626 }
1627
1628 /* Initialize large model PIC register. */
1629
1630 static void
1631 ix86_init_large_pic_reg (unsigned int tmp_regno)
1632 {
1633 rtx_code_label *label;
1634 rtx tmp_reg;
1635
1636 gcc_assert (Pmode == DImode);
1637 label = gen_label_rtx ();
1638 emit_label (label);
1639 LABEL_PRESERVE_P (label) = 1;
1640 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
1641 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
1642 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
1643 label));
1644 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
1645 emit_insn (gen_add2_insn (pic_offset_table_rtx, tmp_reg));
1646 const char *name = LABEL_NAME (label);
1647 PUT_CODE (label, NOTE);
1648 NOTE_KIND (label) = NOTE_INSN_DELETED_LABEL;
1649 NOTE_DELETED_LABEL_NAME (label) = name;
1650 }
1651
1652 /* Create and initialize PIC register if required. */
1653 static void
1654 ix86_init_pic_reg (void)
1655 {
1656 edge entry_edge;
1657 rtx_insn *seq;
1658
1659 if (!ix86_use_pseudo_pic_reg ())
1660 return;
1661
1662 start_sequence ();
1663
1664 if (TARGET_64BIT)
1665 {
1666 if (ix86_cmodel == CM_LARGE_PIC)
1667 ix86_init_large_pic_reg (R11_REG);
1668 else
1669 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
1670 }
1671 else
1672 {
1673 /* If there is future mcount call in the function it is more profitable
1674 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
1675 rtx reg = crtl->profile
1676 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
1677 : pic_offset_table_rtx;
1678 rtx_insn *insn = emit_insn (gen_set_got (reg));
1679 RTX_FRAME_RELATED_P (insn) = 1;
1680 if (crtl->profile)
1681 emit_move_insn (pic_offset_table_rtx, reg);
1682 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
1683 }
1684
1685 seq = get_insns ();
1686 end_sequence ();
1687
1688 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
1689 insert_insn_on_edge (seq, entry_edge);
1690 commit_one_edge_insertion (entry_edge);
1691 }
1692
1693 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1694 for a call to a function whose data type is FNTYPE.
1695 For a library call, FNTYPE is 0. */
1696
1697 void
1698 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1699 tree fntype, /* tree ptr for function decl */
1700 rtx libname, /* SYMBOL_REF of library name or 0 */
1701 tree fndecl,
1702 int caller)
1703 {
1704 struct cgraph_local_info *i = NULL;
1705 struct cgraph_node *target = NULL;
1706
1707 memset (cum, 0, sizeof (*cum));
1708
1709 if (fndecl)
1710 {
1711 target = cgraph_node::get (fndecl);
1712 if (target)
1713 {
1714 target = target->function_symbol ();
1715 i = cgraph_node::local_info (target->decl);
1716 cum->call_abi = ix86_function_abi (target->decl);
1717 }
1718 else
1719 cum->call_abi = ix86_function_abi (fndecl);
1720 }
1721 else
1722 cum->call_abi = ix86_function_type_abi (fntype);
1723
1724 cum->caller = caller;
1725
1726 /* Set up the number of registers to use for passing arguments. */
1727 cum->nregs = ix86_regparm;
1728 if (TARGET_64BIT)
1729 {
1730 cum->nregs = (cum->call_abi == SYSV_ABI
1731 ? X86_64_REGPARM_MAX
1732 : X86_64_MS_REGPARM_MAX);
1733 }
1734 if (TARGET_SSE)
1735 {
1736 cum->sse_nregs = SSE_REGPARM_MAX;
1737 if (TARGET_64BIT)
1738 {
1739 cum->sse_nregs = (cum->call_abi == SYSV_ABI
1740 ? X86_64_SSE_REGPARM_MAX
1741 : X86_64_MS_SSE_REGPARM_MAX);
1742 }
1743 }
1744 if (TARGET_MMX)
1745 cum->mmx_nregs = MMX_REGPARM_MAX;
1746 cum->warn_avx512f = true;
1747 cum->warn_avx = true;
1748 cum->warn_sse = true;
1749 cum->warn_mmx = true;
1750
1751 /* Because type might mismatch in between caller and callee, we need to
1752 use actual type of function for local calls.
1753 FIXME: cgraph_analyze can be told to actually record if function uses
1754 va_start so for local functions maybe_vaarg can be made aggressive
1755 helping K&R code.
1756 FIXME: once typesytem is fixed, we won't need this code anymore. */
1757 if (i && i->local && i->can_change_signature)
1758 fntype = TREE_TYPE (target->decl);
1759 cum->stdarg = stdarg_p (fntype);
1760 cum->maybe_vaarg = (fntype
1761 ? (!prototype_p (fntype) || stdarg_p (fntype))
1762 : !libname);
1763
1764 cum->decl = fndecl;
1765
1766 cum->warn_empty = !warn_abi || cum->stdarg;
1767 if (!cum->warn_empty && fntype)
1768 {
1769 function_args_iterator iter;
1770 tree argtype;
1771 bool seen_empty_type = false;
1772 FOREACH_FUNCTION_ARGS (fntype, argtype, iter)
1773 {
1774 if (argtype == error_mark_node || VOID_TYPE_P (argtype))
1775 break;
1776 if (TYPE_EMPTY_P (argtype))
1777 seen_empty_type = true;
1778 else if (seen_empty_type)
1779 {
1780 cum->warn_empty = true;
1781 break;
1782 }
1783 }
1784 }
1785
1786 if (!TARGET_64BIT)
1787 {
1788 /* If there are variable arguments, then we won't pass anything
1789 in registers in 32-bit mode. */
1790 if (stdarg_p (fntype))
1791 {
1792 cum->nregs = 0;
1793 /* Since in 32-bit, variable arguments are always passed on
1794 stack, there is scratch register available for indirect
1795 sibcall. */
1796 cfun->machine->arg_reg_available = true;
1797 cum->sse_nregs = 0;
1798 cum->mmx_nregs = 0;
1799 cum->warn_avx512f = false;
1800 cum->warn_avx = false;
1801 cum->warn_sse = false;
1802 cum->warn_mmx = false;
1803 return;
1804 }
1805
1806 /* Use ecx and edx registers if function has fastcall attribute,
1807 else look for regparm information. */
1808 if (fntype)
1809 {
1810 unsigned int ccvt = ix86_get_callcvt (fntype);
1811 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
1812 {
1813 cum->nregs = 1;
1814 cum->fastcall = 1; /* Same first register as in fastcall. */
1815 }
1816 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
1817 {
1818 cum->nregs = 2;
1819 cum->fastcall = 1;
1820 }
1821 else
1822 cum->nregs = ix86_function_regparm (fntype, fndecl);
1823 }
1824
1825 /* Set up the number of SSE registers used for passing SFmode
1826 and DFmode arguments. Warn for mismatching ABI. */
1827 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
1828 }
1829
1830 cfun->machine->arg_reg_available = (cum->nregs > 0);
1831 }
1832
1833 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
1834 But in the case of vector types, it is some vector mode.
1835
1836 When we have only some of our vector isa extensions enabled, then there
1837 are some modes for which vector_mode_supported_p is false. For these
1838 modes, the generic vector support in gcc will choose some non-vector mode
1839 in order to implement the type. By computing the natural mode, we'll
1840 select the proper ABI location for the operand and not depend on whatever
1841 the middle-end decides to do with these vector types.
1842
1843 The midde-end can't deal with the vector types > 16 bytes. In this
1844 case, we return the original mode and warn ABI change if CUM isn't
1845 NULL.
1846
1847 If INT_RETURN is true, warn ABI change if the vector mode isn't
1848 available for function return value. */
1849
1850 static machine_mode
1851 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
1852 bool in_return)
1853 {
1854 machine_mode mode = TYPE_MODE (type);
1855
1856 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
1857 {
1858 HOST_WIDE_INT size = int_size_in_bytes (type);
1859 if ((size == 8 || size == 16 || size == 32 || size == 64)
1860 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
1861 && TYPE_VECTOR_SUBPARTS (type) > 1)
1862 {
1863 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
1864
1865 /* There are no XFmode vector modes. */
1866 if (innermode == XFmode)
1867 return mode;
1868
1869 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
1870 mode = MIN_MODE_VECTOR_FLOAT;
1871 else
1872 mode = MIN_MODE_VECTOR_INT;
1873
1874 /* Get the mode which has this inner mode and number of units. */
1875 FOR_EACH_MODE_FROM (mode, mode)
1876 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
1877 && GET_MODE_INNER (mode) == innermode)
1878 {
1879 if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
1880 {
1881 static bool warnedavx512f;
1882 static bool warnedavx512f_ret;
1883
1884 if (cum && cum->warn_avx512f && !warnedavx512f)
1885 {
1886 if (warning (OPT_Wpsabi, "AVX512F vector argument "
1887 "without AVX512F enabled changes the ABI"))
1888 warnedavx512f = true;
1889 }
1890 else if (in_return && !warnedavx512f_ret)
1891 {
1892 if (warning (OPT_Wpsabi, "AVX512F vector return "
1893 "without AVX512F enabled changes the ABI"))
1894 warnedavx512f_ret = true;
1895 }
1896
1897 return TYPE_MODE (type);
1898 }
1899 else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU)
1900 {
1901 static bool warnedavx;
1902 static bool warnedavx_ret;
1903
1904 if (cum && cum->warn_avx && !warnedavx)
1905 {
1906 if (warning (OPT_Wpsabi, "AVX vector argument "
1907 "without AVX enabled changes the ABI"))
1908 warnedavx = true;
1909 }
1910 else if (in_return && !warnedavx_ret)
1911 {
1912 if (warning (OPT_Wpsabi, "AVX vector return "
1913 "without AVX enabled changes the ABI"))
1914 warnedavx_ret = true;
1915 }
1916
1917 return TYPE_MODE (type);
1918 }
1919 else if (((size == 8 && TARGET_64BIT) || size == 16)
1920 && !TARGET_SSE
1921 && !TARGET_IAMCU)
1922 {
1923 static bool warnedsse;
1924 static bool warnedsse_ret;
1925
1926 if (cum && cum->warn_sse && !warnedsse)
1927 {
1928 if (warning (OPT_Wpsabi, "SSE vector argument "
1929 "without SSE enabled changes the ABI"))
1930 warnedsse = true;
1931 }
1932 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
1933 {
1934 if (warning (OPT_Wpsabi, "SSE vector return "
1935 "without SSE enabled changes the ABI"))
1936 warnedsse_ret = true;
1937 }
1938 }
1939 else if ((size == 8 && !TARGET_64BIT)
1940 && (!cfun
1941 || cfun->machine->func_type == TYPE_NORMAL)
1942 && !TARGET_MMX
1943 && !TARGET_IAMCU)
1944 {
1945 static bool warnedmmx;
1946 static bool warnedmmx_ret;
1947
1948 if (cum && cum->warn_mmx && !warnedmmx)
1949 {
1950 if (warning (OPT_Wpsabi, "MMX vector argument "
1951 "without MMX enabled changes the ABI"))
1952 warnedmmx = true;
1953 }
1954 else if (in_return && !warnedmmx_ret)
1955 {
1956 if (warning (OPT_Wpsabi, "MMX vector return "
1957 "without MMX enabled changes the ABI"))
1958 warnedmmx_ret = true;
1959 }
1960 }
1961 return mode;
1962 }
1963
1964 gcc_unreachable ();
1965 }
1966 }
1967
1968 return mode;
1969 }
1970
1971 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
1972 this may not agree with the mode that the type system has chosen for the
1973 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
1974 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
1975
1976 static rtx
1977 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
1978 unsigned int regno)
1979 {
1980 rtx tmp;
1981
1982 if (orig_mode != BLKmode)
1983 tmp = gen_rtx_REG (orig_mode, regno);
1984 else
1985 {
1986 tmp = gen_rtx_REG (mode, regno);
1987 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
1988 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
1989 }
1990
1991 return tmp;
1992 }
1993
1994 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1995 of this code is to classify each 8bytes of incoming argument by the register
1996 class and assign registers accordingly. */
1997
1998 /* Return the union class of CLASS1 and CLASS2.
1999 See the x86-64 PS ABI for details. */
2000
2001 static enum x86_64_reg_class
2002 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2003 {
2004 /* Rule #1: If both classes are equal, this is the resulting class. */
2005 if (class1 == class2)
2006 return class1;
2007
2008 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2009 the other class. */
2010 if (class1 == X86_64_NO_CLASS)
2011 return class2;
2012 if (class2 == X86_64_NO_CLASS)
2013 return class1;
2014
2015 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2016 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2017 return X86_64_MEMORY_CLASS;
2018
2019 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2020 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2021 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2022 return X86_64_INTEGERSI_CLASS;
2023 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2024 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2025 return X86_64_INTEGER_CLASS;
2026
2027 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2028 MEMORY is used. */
2029 if (class1 == X86_64_X87_CLASS
2030 || class1 == X86_64_X87UP_CLASS
2031 || class1 == X86_64_COMPLEX_X87_CLASS
2032 || class2 == X86_64_X87_CLASS
2033 || class2 == X86_64_X87UP_CLASS
2034 || class2 == X86_64_COMPLEX_X87_CLASS)
2035 return X86_64_MEMORY_CLASS;
2036
2037 /* Rule #6: Otherwise class SSE is used. */
2038 return X86_64_SSE_CLASS;
2039 }
2040
2041 /* Classify the argument of type TYPE and mode MODE.
2042 CLASSES will be filled by the register class used to pass each word
2043 of the operand. The number of words is returned. In case the parameter
2044 should be passed in memory, 0 is returned. As a special case for zero
2045 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2046
2047 BIT_OFFSET is used internally for handling records and specifies offset
2048 of the offset in bits modulo 512 to avoid overflow cases.
2049
2050 See the x86-64 PS ABI for details.
2051 */
2052
2053 static int
2054 classify_argument (machine_mode mode, const_tree type,
2055 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2056 {
2057 HOST_WIDE_INT bytes
2058 = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2059 int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD);
2060
2061 /* Variable sized entities are always passed/returned in memory. */
2062 if (bytes < 0)
2063 return 0;
2064
2065 if (mode != VOIDmode)
2066 {
2067 /* The value of "named" doesn't matter. */
2068 function_arg_info arg (const_cast<tree> (type), mode, /*named=*/true);
2069 if (targetm.calls.must_pass_in_stack (arg))
2070 return 0;
2071 }
2072
2073 if (type && AGGREGATE_TYPE_P (type))
2074 {
2075 int i;
2076 tree field;
2077 enum x86_64_reg_class subclasses[MAX_CLASSES];
2078
2079 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
2080 if (bytes > 64)
2081 return 0;
2082
2083 for (i = 0; i < words; i++)
2084 classes[i] = X86_64_NO_CLASS;
2085
2086 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2087 signalize memory class, so handle it as special case. */
2088 if (!words)
2089 {
2090 classes[0] = X86_64_NO_CLASS;
2091 return 1;
2092 }
2093
2094 /* Classify each field of record and merge classes. */
2095 switch (TREE_CODE (type))
2096 {
2097 case RECORD_TYPE:
2098 /* And now merge the fields of structure. */
2099 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
2100 {
2101 if (TREE_CODE (field) == FIELD_DECL)
2102 {
2103 int num;
2104
2105 if (TREE_TYPE (field) == error_mark_node)
2106 continue;
2107
2108 /* Bitfields are always classified as integer. Handle them
2109 early, since later code would consider them to be
2110 misaligned integers. */
2111 if (DECL_BIT_FIELD (field))
2112 {
2113 for (i = (int_bit_position (field)
2114 + (bit_offset % 64)) / 8 / 8;
2115 i < ((int_bit_position (field) + (bit_offset % 64))
2116 + tree_to_shwi (DECL_SIZE (field))
2117 + 63) / 8 / 8; i++)
2118 classes[i]
2119 = merge_classes (X86_64_INTEGER_CLASS, classes[i]);
2120 }
2121 else
2122 {
2123 int pos;
2124
2125 type = TREE_TYPE (field);
2126
2127 /* Flexible array member is ignored. */
2128 if (TYPE_MODE (type) == BLKmode
2129 && TREE_CODE (type) == ARRAY_TYPE
2130 && TYPE_SIZE (type) == NULL_TREE
2131 && TYPE_DOMAIN (type) != NULL_TREE
2132 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
2133 == NULL_TREE))
2134 {
2135 static bool warned;
2136
2137 if (!warned && warn_psabi)
2138 {
2139 warned = true;
2140 inform (input_location,
2141 "the ABI of passing struct with"
2142 " a flexible array member has"
2143 " changed in GCC 4.4");
2144 }
2145 continue;
2146 }
2147 num = classify_argument (TYPE_MODE (type), type,
2148 subclasses,
2149 (int_bit_position (field)
2150 + bit_offset) % 512);
2151 if (!num)
2152 return 0;
2153 pos = (int_bit_position (field)
2154 + (bit_offset % 64)) / 8 / 8;
2155 for (i = 0; i < num && (i + pos) < words; i++)
2156 classes[i + pos]
2157 = merge_classes (subclasses[i], classes[i + pos]);
2158 }
2159 }
2160 }
2161 break;
2162
2163 case ARRAY_TYPE:
2164 /* Arrays are handled as small records. */
2165 {
2166 int num;
2167 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2168 TREE_TYPE (type), subclasses, bit_offset);
2169 if (!num)
2170 return 0;
2171
2172 /* The partial classes are now full classes. */
2173 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2174 subclasses[0] = X86_64_SSE_CLASS;
2175 if (subclasses[0] == X86_64_INTEGERSI_CLASS
2176 && !((bit_offset % 64) == 0 && bytes == 4))
2177 subclasses[0] = X86_64_INTEGER_CLASS;
2178
2179 for (i = 0; i < words; i++)
2180 classes[i] = subclasses[i % num];
2181
2182 break;
2183 }
2184 case UNION_TYPE:
2185 case QUAL_UNION_TYPE:
2186 /* Unions are similar to RECORD_TYPE but offset is always 0.
2187 */
2188 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
2189 {
2190 if (TREE_CODE (field) == FIELD_DECL)
2191 {
2192 int num;
2193
2194 if (TREE_TYPE (field) == error_mark_node)
2195 continue;
2196
2197 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2198 TREE_TYPE (field), subclasses,
2199 bit_offset);
2200 if (!num)
2201 return 0;
2202 for (i = 0; i < num && i < words; i++)
2203 classes[i] = merge_classes (subclasses[i], classes[i]);
2204 }
2205 }
2206 break;
2207
2208 default:
2209 gcc_unreachable ();
2210 }
2211
2212 if (words > 2)
2213 {
2214 /* When size > 16 bytes, if the first one isn't
2215 X86_64_SSE_CLASS or any other ones aren't
2216 X86_64_SSEUP_CLASS, everything should be passed in
2217 memory. */
2218 if (classes[0] != X86_64_SSE_CLASS)
2219 return 0;
2220
2221 for (i = 1; i < words; i++)
2222 if (classes[i] != X86_64_SSEUP_CLASS)
2223 return 0;
2224 }
2225
2226 /* Final merger cleanup. */
2227 for (i = 0; i < words; i++)
2228 {
2229 /* If one class is MEMORY, everything should be passed in
2230 memory. */
2231 if (classes[i] == X86_64_MEMORY_CLASS)
2232 return 0;
2233
2234 /* The X86_64_SSEUP_CLASS should be always preceded by
2235 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
2236 if (classes[i] == X86_64_SSEUP_CLASS
2237 && classes[i - 1] != X86_64_SSE_CLASS
2238 && classes[i - 1] != X86_64_SSEUP_CLASS)
2239 {
2240 /* The first one should never be X86_64_SSEUP_CLASS. */
2241 gcc_assert (i != 0);
2242 classes[i] = X86_64_SSE_CLASS;
2243 }
2244
2245 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
2246 everything should be passed in memory. */
2247 if (classes[i] == X86_64_X87UP_CLASS
2248 && (classes[i - 1] != X86_64_X87_CLASS))
2249 {
2250 static bool warned;
2251
2252 /* The first one should never be X86_64_X87UP_CLASS. */
2253 gcc_assert (i != 0);
2254 if (!warned && warn_psabi)
2255 {
2256 warned = true;
2257 inform (input_location,
2258 "the ABI of passing union with %<long double%>"
2259 " has changed in GCC 4.4");
2260 }
2261 return 0;
2262 }
2263 }
2264 return words;
2265 }
2266
2267 /* Compute alignment needed. We align all types to natural boundaries with
2268 exception of XFmode that is aligned to 64bits. */
2269 if (mode != VOIDmode && mode != BLKmode)
2270 {
2271 int mode_alignment = GET_MODE_BITSIZE (mode);
2272
2273 if (mode == XFmode)
2274 mode_alignment = 128;
2275 else if (mode == XCmode)
2276 mode_alignment = 256;
2277 if (COMPLEX_MODE_P (mode))
2278 mode_alignment /= 2;
2279 /* Misaligned fields are always returned in memory. */
2280 if (bit_offset % mode_alignment)
2281 return 0;
2282 }
2283
2284 /* for V1xx modes, just use the base mode */
2285 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
2286 && GET_MODE_UNIT_SIZE (mode) == bytes)
2287 mode = GET_MODE_INNER (mode);
2288
2289 /* Classification of atomic types. */
2290 switch (mode)
2291 {
2292 case E_SDmode:
2293 case E_DDmode:
2294 classes[0] = X86_64_SSE_CLASS;
2295 return 1;
2296 case E_TDmode:
2297 classes[0] = X86_64_SSE_CLASS;
2298 classes[1] = X86_64_SSEUP_CLASS;
2299 return 2;
2300 case E_DImode:
2301 case E_SImode:
2302 case E_HImode:
2303 case E_QImode:
2304 case E_CSImode:
2305 case E_CHImode:
2306 case E_CQImode:
2307 {
2308 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
2309
2310 /* Analyze last 128 bits only. */
2311 size = (size - 1) & 0x7f;
2312
2313 if (size < 32)
2314 {
2315 classes[0] = X86_64_INTEGERSI_CLASS;
2316 return 1;
2317 }
2318 else if (size < 64)
2319 {
2320 classes[0] = X86_64_INTEGER_CLASS;
2321 return 1;
2322 }
2323 else if (size < 64+32)
2324 {
2325 classes[0] = X86_64_INTEGER_CLASS;
2326 classes[1] = X86_64_INTEGERSI_CLASS;
2327 return 2;
2328 }
2329 else if (size < 64+64)
2330 {
2331 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2332 return 2;
2333 }
2334 else
2335 gcc_unreachable ();
2336 }
2337 case E_CDImode:
2338 case E_TImode:
2339 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2340 return 2;
2341 case E_COImode:
2342 case E_OImode:
2343 /* OImode shouldn't be used directly. */
2344 gcc_unreachable ();
2345 case E_CTImode:
2346 return 0;
2347 case E_SFmode:
2348 if (!(bit_offset % 64))
2349 classes[0] = X86_64_SSESF_CLASS;
2350 else
2351 classes[0] = X86_64_SSE_CLASS;
2352 return 1;
2353 case E_DFmode:
2354 classes[0] = X86_64_SSEDF_CLASS;
2355 return 1;
2356 case E_XFmode:
2357 classes[0] = X86_64_X87_CLASS;
2358 classes[1] = X86_64_X87UP_CLASS;
2359 return 2;
2360 case E_TFmode:
2361 classes[0] = X86_64_SSE_CLASS;
2362 classes[1] = X86_64_SSEUP_CLASS;
2363 return 2;
2364 case E_SCmode:
2365 classes[0] = X86_64_SSE_CLASS;
2366 if (!(bit_offset % 64))
2367 return 1;
2368 else
2369 {
2370 static bool warned;
2371
2372 if (!warned && warn_psabi)
2373 {
2374 warned = true;
2375 inform (input_location,
2376 "the ABI of passing structure with %<complex float%>"
2377 " member has changed in GCC 4.4");
2378 }
2379 classes[1] = X86_64_SSESF_CLASS;
2380 return 2;
2381 }
2382 case E_DCmode:
2383 classes[0] = X86_64_SSEDF_CLASS;
2384 classes[1] = X86_64_SSEDF_CLASS;
2385 return 2;
2386 case E_XCmode:
2387 classes[0] = X86_64_COMPLEX_X87_CLASS;
2388 return 1;
2389 case E_TCmode:
2390 /* This modes is larger than 16 bytes. */
2391 return 0;
2392 case E_V8SFmode:
2393 case E_V8SImode:
2394 case E_V32QImode:
2395 case E_V16HImode:
2396 case E_V4DFmode:
2397 case E_V4DImode:
2398 classes[0] = X86_64_SSE_CLASS;
2399 classes[1] = X86_64_SSEUP_CLASS;
2400 classes[2] = X86_64_SSEUP_CLASS;
2401 classes[3] = X86_64_SSEUP_CLASS;
2402 return 4;
2403 case E_V8DFmode:
2404 case E_V16SFmode:
2405 case E_V8DImode:
2406 case E_V16SImode:
2407 case E_V32HImode:
2408 case E_V64QImode:
2409 classes[0] = X86_64_SSE_CLASS;
2410 classes[1] = X86_64_SSEUP_CLASS;
2411 classes[2] = X86_64_SSEUP_CLASS;
2412 classes[3] = X86_64_SSEUP_CLASS;
2413 classes[4] = X86_64_SSEUP_CLASS;
2414 classes[5] = X86_64_SSEUP_CLASS;
2415 classes[6] = X86_64_SSEUP_CLASS;
2416 classes[7] = X86_64_SSEUP_CLASS;
2417 return 8;
2418 case E_V4SFmode:
2419 case E_V4SImode:
2420 case E_V16QImode:
2421 case E_V8HImode:
2422 case E_V2DFmode:
2423 case E_V2DImode:
2424 classes[0] = X86_64_SSE_CLASS;
2425 classes[1] = X86_64_SSEUP_CLASS;
2426 return 2;
2427 case E_V1TImode:
2428 case E_V1DImode:
2429 case E_V2SFmode:
2430 case E_V2SImode:
2431 case E_V4HImode:
2432 case E_V8QImode:
2433 classes[0] = X86_64_SSE_CLASS;
2434 return 1;
2435 case E_BLKmode:
2436 case E_VOIDmode:
2437 return 0;
2438 default:
2439 gcc_assert (VECTOR_MODE_P (mode));
2440
2441 if (bytes > 16)
2442 return 0;
2443
2444 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
2445
2446 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2447 classes[0] = X86_64_INTEGERSI_CLASS;
2448 else
2449 classes[0] = X86_64_INTEGER_CLASS;
2450 classes[1] = X86_64_INTEGER_CLASS;
2451 return 1 + (bytes > 8);
2452 }
2453 }
2454
2455 /* Examine the argument and return set number of register required in each
2456 class. Return true iff parameter should be passed in memory. */
2457
2458 static bool
2459 examine_argument (machine_mode mode, const_tree type, int in_return,
2460 int *int_nregs, int *sse_nregs)
2461 {
2462 enum x86_64_reg_class regclass[MAX_CLASSES];
2463 int n = classify_argument (mode, type, regclass, 0);
2464
2465 *int_nregs = 0;
2466 *sse_nregs = 0;
2467
2468 if (!n)
2469 return true;
2470 for (n--; n >= 0; n--)
2471 switch (regclass[n])
2472 {
2473 case X86_64_INTEGER_CLASS:
2474 case X86_64_INTEGERSI_CLASS:
2475 (*int_nregs)++;
2476 break;
2477 case X86_64_SSE_CLASS:
2478 case X86_64_SSESF_CLASS:
2479 case X86_64_SSEDF_CLASS:
2480 (*sse_nregs)++;
2481 break;
2482 case X86_64_NO_CLASS:
2483 case X86_64_SSEUP_CLASS:
2484 break;
2485 case X86_64_X87_CLASS:
2486 case X86_64_X87UP_CLASS:
2487 case X86_64_COMPLEX_X87_CLASS:
2488 if (!in_return)
2489 return true;
2490 break;
2491 case X86_64_MEMORY_CLASS:
2492 gcc_unreachable ();
2493 }
2494
2495 return false;
2496 }
2497
2498 /* Construct container for the argument used by GCC interface. See
2499 FUNCTION_ARG for the detailed description. */
2500
2501 static rtx
2502 construct_container (machine_mode mode, machine_mode orig_mode,
2503 const_tree type, int in_return, int nintregs, int nsseregs,
2504 const int *intreg, int sse_regno)
2505 {
2506 /* The following variables hold the static issued_error state. */
2507 static bool issued_sse_arg_error;
2508 static bool issued_sse_ret_error;
2509 static bool issued_x87_ret_error;
2510
2511 machine_mode tmpmode;
2512 int bytes
2513 = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2514 enum x86_64_reg_class regclass[MAX_CLASSES];
2515 int n;
2516 int i;
2517 int nexps = 0;
2518 int needed_sseregs, needed_intregs;
2519 rtx exp[MAX_CLASSES];
2520 rtx ret;
2521
2522 n = classify_argument (mode, type, regclass, 0);
2523 if (!n)
2524 return NULL;
2525 if (examine_argument (mode, type, in_return, &needed_intregs,
2526 &needed_sseregs))
2527 return NULL;
2528 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2529 return NULL;
2530
2531 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2532 some less clueful developer tries to use floating-point anyway. */
2533 if (needed_sseregs && !TARGET_SSE)
2534 {
2535 if (in_return)
2536 {
2537 if (!issued_sse_ret_error)
2538 {
2539 error ("SSE register return with SSE disabled");
2540 issued_sse_ret_error = true;
2541 }
2542 }
2543 else if (!issued_sse_arg_error)
2544 {
2545 error ("SSE register argument with SSE disabled");
2546 issued_sse_arg_error = true;
2547 }
2548 return NULL;
2549 }
2550
2551 /* Likewise, error if the ABI requires us to return values in the
2552 x87 registers and the user specified -mno-80387. */
2553 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
2554 for (i = 0; i < n; i++)
2555 if (regclass[i] == X86_64_X87_CLASS
2556 || regclass[i] == X86_64_X87UP_CLASS
2557 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
2558 {
2559 if (!issued_x87_ret_error)
2560 {
2561 error ("x87 register return with x87 disabled");
2562 issued_x87_ret_error = true;
2563 }
2564 return NULL;
2565 }
2566
2567 /* First construct simple cases. Avoid SCmode, since we want to use
2568 single register to pass this type. */
2569 if (n == 1 && mode != SCmode)
2570 switch (regclass[0])
2571 {
2572 case X86_64_INTEGER_CLASS:
2573 case X86_64_INTEGERSI_CLASS:
2574 return gen_rtx_REG (mode, intreg[0]);
2575 case X86_64_SSE_CLASS:
2576 case X86_64_SSESF_CLASS:
2577 case X86_64_SSEDF_CLASS:
2578 if (mode != BLKmode)
2579 return gen_reg_or_parallel (mode, orig_mode,
2580 GET_SSE_REGNO (sse_regno));
2581 break;
2582 case X86_64_X87_CLASS:
2583 case X86_64_COMPLEX_X87_CLASS:
2584 return gen_rtx_REG (mode, FIRST_STACK_REG);
2585 case X86_64_NO_CLASS:
2586 /* Zero sized array, struct or class. */
2587 return NULL;
2588 default:
2589 gcc_unreachable ();
2590 }
2591 if (n == 2
2592 && regclass[0] == X86_64_SSE_CLASS
2593 && regclass[1] == X86_64_SSEUP_CLASS
2594 && mode != BLKmode)
2595 return gen_reg_or_parallel (mode, orig_mode,
2596 GET_SSE_REGNO (sse_regno));
2597 if (n == 4
2598 && regclass[0] == X86_64_SSE_CLASS
2599 && regclass[1] == X86_64_SSEUP_CLASS
2600 && regclass[2] == X86_64_SSEUP_CLASS
2601 && regclass[3] == X86_64_SSEUP_CLASS
2602 && mode != BLKmode)
2603 return gen_reg_or_parallel (mode, orig_mode,
2604 GET_SSE_REGNO (sse_regno));
2605 if (n == 8
2606 && regclass[0] == X86_64_SSE_CLASS
2607 && regclass[1] == X86_64_SSEUP_CLASS
2608 && regclass[2] == X86_64_SSEUP_CLASS
2609 && regclass[3] == X86_64_SSEUP_CLASS
2610 && regclass[4] == X86_64_SSEUP_CLASS
2611 && regclass[5] == X86_64_SSEUP_CLASS
2612 && regclass[6] == X86_64_SSEUP_CLASS
2613 && regclass[7] == X86_64_SSEUP_CLASS
2614 && mode != BLKmode)
2615 return gen_reg_or_parallel (mode, orig_mode,
2616 GET_SSE_REGNO (sse_regno));
2617 if (n == 2
2618 && regclass[0] == X86_64_X87_CLASS
2619 && regclass[1] == X86_64_X87UP_CLASS)
2620 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2621
2622 if (n == 2
2623 && regclass[0] == X86_64_INTEGER_CLASS
2624 && regclass[1] == X86_64_INTEGER_CLASS
2625 && (mode == CDImode || mode == TImode || mode == BLKmode)
2626 && intreg[0] + 1 == intreg[1])
2627 {
2628 if (mode == BLKmode)
2629 {
2630 /* Use TImode for BLKmode values in 2 integer registers. */
2631 exp[0] = gen_rtx_EXPR_LIST (VOIDmode,
2632 gen_rtx_REG (TImode, intreg[0]),
2633 GEN_INT (0));
2634 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (1));
2635 XVECEXP (ret, 0, 0) = exp[0];
2636 return ret;
2637 }
2638 else
2639 return gen_rtx_REG (mode, intreg[0]);
2640 }
2641
2642 /* Otherwise figure out the entries of the PARALLEL. */
2643 for (i = 0; i < n; i++)
2644 {
2645 int pos;
2646
2647 switch (regclass[i])
2648 {
2649 case X86_64_NO_CLASS:
2650 break;
2651 case X86_64_INTEGER_CLASS:
2652 case X86_64_INTEGERSI_CLASS:
2653 /* Merge TImodes on aligned occasions here too. */
2654 if (i * 8 + 8 > bytes)
2655 {
2656 unsigned int tmpbits = (bytes - i * 8) * BITS_PER_UNIT;
2657 if (!int_mode_for_size (tmpbits, 0).exists (&tmpmode))
2658 /* We've requested 24 bytes we
2659 don't have mode for. Use DImode. */
2660 tmpmode = DImode;
2661 }
2662 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
2663 tmpmode = SImode;
2664 else
2665 tmpmode = DImode;
2666 exp [nexps++]
2667 = gen_rtx_EXPR_LIST (VOIDmode,
2668 gen_rtx_REG (tmpmode, *intreg),
2669 GEN_INT (i*8));
2670 intreg++;
2671 break;
2672 case X86_64_SSESF_CLASS:
2673 exp [nexps++]
2674 = gen_rtx_EXPR_LIST (VOIDmode,
2675 gen_rtx_REG (SFmode,
2676 GET_SSE_REGNO (sse_regno)),
2677 GEN_INT (i*8));
2678 sse_regno++;
2679 break;
2680 case X86_64_SSEDF_CLASS:
2681 exp [nexps++]
2682 = gen_rtx_EXPR_LIST (VOIDmode,
2683 gen_rtx_REG (DFmode,
2684 GET_SSE_REGNO (sse_regno)),
2685 GEN_INT (i*8));
2686 sse_regno++;
2687 break;
2688 case X86_64_SSE_CLASS:
2689 pos = i;
2690 switch (n)
2691 {
2692 case 1:
2693 tmpmode = DImode;
2694 break;
2695 case 2:
2696 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
2697 {
2698 tmpmode = TImode;
2699 i++;
2700 }
2701 else
2702 tmpmode = DImode;
2703 break;
2704 case 4:
2705 gcc_assert (i == 0
2706 && regclass[1] == X86_64_SSEUP_CLASS
2707 && regclass[2] == X86_64_SSEUP_CLASS
2708 && regclass[3] == X86_64_SSEUP_CLASS);
2709 tmpmode = OImode;
2710 i += 3;
2711 break;
2712 case 8:
2713 gcc_assert (i == 0
2714 && regclass[1] == X86_64_SSEUP_CLASS
2715 && regclass[2] == X86_64_SSEUP_CLASS
2716 && regclass[3] == X86_64_SSEUP_CLASS
2717 && regclass[4] == X86_64_SSEUP_CLASS
2718 && regclass[5] == X86_64_SSEUP_CLASS
2719 && regclass[6] == X86_64_SSEUP_CLASS
2720 && regclass[7] == X86_64_SSEUP_CLASS);
2721 tmpmode = XImode;
2722 i += 7;
2723 break;
2724 default:
2725 gcc_unreachable ();
2726 }
2727 exp [nexps++]
2728 = gen_rtx_EXPR_LIST (VOIDmode,
2729 gen_rtx_REG (tmpmode,
2730 GET_SSE_REGNO (sse_regno)),
2731 GEN_INT (pos*8));
2732 sse_regno++;
2733 break;
2734 default:
2735 gcc_unreachable ();
2736 }
2737 }
2738
2739 /* Empty aligned struct, union or class. */
2740 if (nexps == 0)
2741 return NULL;
2742
2743 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2744 for (i = 0; i < nexps; i++)
2745 XVECEXP (ret, 0, i) = exp [i];
2746 return ret;
2747 }
2748
2749 /* Update the data in CUM to advance over an argument of mode MODE
2750 and data type TYPE. (TYPE is null for libcalls where that information
2751 may not be available.)
2752
2753 Return a number of integer regsiters advanced over. */
2754
2755 static int
2756 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
2757 const_tree type, HOST_WIDE_INT bytes,
2758 HOST_WIDE_INT words)
2759 {
2760 int res = 0;
2761 bool error_p = false;
2762
2763 if (TARGET_IAMCU)
2764 {
2765 /* Intel MCU psABI passes scalars and aggregates no larger than 8
2766 bytes in registers. */
2767 if (!VECTOR_MODE_P (mode) && bytes <= 8)
2768 goto pass_in_reg;
2769 return res;
2770 }
2771
2772 switch (mode)
2773 {
2774 default:
2775 break;
2776
2777 case E_BLKmode:
2778 if (bytes < 0)
2779 break;
2780 /* FALLTHRU */
2781
2782 case E_DImode:
2783 case E_SImode:
2784 case E_HImode:
2785 case E_QImode:
2786 pass_in_reg:
2787 cum->words += words;
2788 cum->nregs -= words;
2789 cum->regno += words;
2790 if (cum->nregs >= 0)
2791 res = words;
2792 if (cum->nregs <= 0)
2793 {
2794 cum->nregs = 0;
2795 cfun->machine->arg_reg_available = false;
2796 cum->regno = 0;
2797 }
2798 break;
2799
2800 case E_OImode:
2801 /* OImode shouldn't be used directly. */
2802 gcc_unreachable ();
2803
2804 case E_DFmode:
2805 if (cum->float_in_sse == -1)
2806 error_p = true;
2807 if (cum->float_in_sse < 2)
2808 break;
2809 /* FALLTHRU */
2810 case E_SFmode:
2811 if (cum->float_in_sse == -1)
2812 error_p = true;
2813 if (cum->float_in_sse < 1)
2814 break;
2815 /* FALLTHRU */
2816
2817 case E_V8SFmode:
2818 case E_V8SImode:
2819 case E_V64QImode:
2820 case E_V32HImode:
2821 case E_V16SImode:
2822 case E_V8DImode:
2823 case E_V16SFmode:
2824 case E_V8DFmode:
2825 case E_V32QImode:
2826 case E_V16HImode:
2827 case E_V4DFmode:
2828 case E_V4DImode:
2829 case E_TImode:
2830 case E_V16QImode:
2831 case E_V8HImode:
2832 case E_V4SImode:
2833 case E_V2DImode:
2834 case E_V4SFmode:
2835 case E_V2DFmode:
2836 if (!type || !AGGREGATE_TYPE_P (type))
2837 {
2838 cum->sse_words += words;
2839 cum->sse_nregs -= 1;
2840 cum->sse_regno += 1;
2841 if (cum->sse_nregs <= 0)
2842 {
2843 cum->sse_nregs = 0;
2844 cum->sse_regno = 0;
2845 }
2846 }
2847 break;
2848
2849 case E_V8QImode:
2850 case E_V4HImode:
2851 case E_V2SImode:
2852 case E_V2SFmode:
2853 case E_V1TImode:
2854 case E_V1DImode:
2855 if (!type || !AGGREGATE_TYPE_P (type))
2856 {
2857 cum->mmx_words += words;
2858 cum->mmx_nregs -= 1;
2859 cum->mmx_regno += 1;
2860 if (cum->mmx_nregs <= 0)
2861 {
2862 cum->mmx_nregs = 0;
2863 cum->mmx_regno = 0;
2864 }
2865 }
2866 break;
2867 }
2868 if (error_p)
2869 {
2870 cum->float_in_sse = 0;
2871 error ("calling %qD with SSE calling convention without "
2872 "SSE/SSE2 enabled", cum->decl);
2873 sorry ("this is a GCC bug that can be worked around by adding "
2874 "attribute used to function called");
2875 }
2876
2877 return res;
2878 }
2879
2880 static int
2881 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
2882 const_tree type, HOST_WIDE_INT words, bool named)
2883 {
2884 int int_nregs, sse_nregs;
2885
2886 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
2887 if (!named && (VALID_AVX512F_REG_MODE (mode)
2888 || VALID_AVX256_REG_MODE (mode)))
2889 return 0;
2890
2891 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
2892 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2893 {
2894 cum->nregs -= int_nregs;
2895 cum->sse_nregs -= sse_nregs;
2896 cum->regno += int_nregs;
2897 cum->sse_regno += sse_nregs;
2898 return int_nregs;
2899 }
2900 else
2901 {
2902 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
2903 cum->words = ROUND_UP (cum->words, align);
2904 cum->words += words;
2905 return 0;
2906 }
2907 }
2908
2909 static int
2910 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
2911 HOST_WIDE_INT words)
2912 {
2913 /* Otherwise, this should be passed indirect. */
2914 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
2915
2916 cum->words += words;
2917 if (cum->nregs > 0)
2918 {
2919 cum->nregs -= 1;
2920 cum->regno += 1;
2921 return 1;
2922 }
2923 return 0;
2924 }
2925
2926 /* Update the data in CUM to advance over argument ARG. */
2927
2928 static void
2929 ix86_function_arg_advance (cumulative_args_t cum_v,
2930 const function_arg_info &arg)
2931 {
2932 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2933 machine_mode mode = arg.mode;
2934 HOST_WIDE_INT bytes, words;
2935 int nregs;
2936
2937 /* The argument of interrupt handler is a special case and is
2938 handled in ix86_function_arg. */
2939 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
2940 return;
2941
2942 bytes = arg.promoted_size_in_bytes ();
2943 words = CEIL (bytes, UNITS_PER_WORD);
2944
2945 if (arg.type)
2946 mode = type_natural_mode (arg.type, NULL, false);
2947
2948 if (TARGET_64BIT)
2949 {
2950 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
2951
2952 if (call_abi == MS_ABI)
2953 nregs = function_arg_advance_ms_64 (cum, bytes, words);
2954 else
2955 nregs = function_arg_advance_64 (cum, mode, arg.type, words,
2956 arg.named);
2957 }
2958 else
2959 nregs = function_arg_advance_32 (cum, mode, arg.type, bytes, words);
2960
2961 if (!nregs)
2962 {
2963 /* Track if there are outgoing arguments on stack. */
2964 if (cum->caller)
2965 cfun->machine->outgoing_args_on_stack = true;
2966 }
2967 }
2968
2969 /* Define where to put the arguments to a function.
2970 Value is zero to push the argument on the stack,
2971 or a hard register in which to store the argument.
2972
2973 MODE is the argument's machine mode.
2974 TYPE is the data type of the argument (as a tree).
2975 This is null for libcalls where that information may
2976 not be available.
2977 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2978 the preceding args and about the function being called.
2979 NAMED is nonzero if this argument is a named parameter
2980 (otherwise it is an extra parameter matching an ellipsis). */
2981
2982 static rtx
2983 function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
2984 machine_mode orig_mode, const_tree type,
2985 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
2986 {
2987 bool error_p = false;
2988
2989 /* Avoid the AL settings for the Unix64 ABI. */
2990 if (mode == VOIDmode)
2991 return constm1_rtx;
2992
2993 if (TARGET_IAMCU)
2994 {
2995 /* Intel MCU psABI passes scalars and aggregates no larger than 8
2996 bytes in registers. */
2997 if (!VECTOR_MODE_P (mode) && bytes <= 8)
2998 goto pass_in_reg;
2999 return NULL_RTX;
3000 }
3001
3002 switch (mode)
3003 {
3004 default:
3005 break;
3006
3007 case E_BLKmode:
3008 if (bytes < 0)
3009 break;
3010 /* FALLTHRU */
3011 case E_DImode:
3012 case E_SImode:
3013 case E_HImode:
3014 case E_QImode:
3015 pass_in_reg:
3016 if (words <= cum->nregs)
3017 {
3018 int regno = cum->regno;
3019
3020 /* Fastcall allocates the first two DWORD (SImode) or
3021 smaller arguments to ECX and EDX if it isn't an
3022 aggregate type . */
3023 if (cum->fastcall)
3024 {
3025 if (mode == BLKmode
3026 || mode == DImode
3027 || (type && AGGREGATE_TYPE_P (type)))
3028 break;
3029
3030 /* ECX not EAX is the first allocated register. */
3031 if (regno == AX_REG)
3032 regno = CX_REG;
3033 }
3034 return gen_rtx_REG (mode, regno);
3035 }
3036 break;
3037
3038 case E_DFmode:
3039 if (cum->float_in_sse == -1)
3040 error_p = true;
3041 if (cum->float_in_sse < 2)
3042 break;
3043 /* FALLTHRU */
3044 case E_SFmode:
3045 if (cum->float_in_sse == -1)
3046 error_p = true;
3047 if (cum->float_in_sse < 1)
3048 break;
3049 /* FALLTHRU */
3050 case E_TImode:
3051 /* In 32bit, we pass TImode in xmm registers. */
3052 case E_V16QImode:
3053 case E_V8HImode:
3054 case E_V4SImode:
3055 case E_V2DImode:
3056 case E_V4SFmode:
3057 case E_V2DFmode:
3058 if (!type || !AGGREGATE_TYPE_P (type))
3059 {
3060 if (cum->sse_nregs)
3061 return gen_reg_or_parallel (mode, orig_mode,
3062 cum->sse_regno + FIRST_SSE_REG);
3063 }
3064 break;
3065
3066 case E_OImode:
3067 case E_XImode:
3068 /* OImode and XImode shouldn't be used directly. */
3069 gcc_unreachable ();
3070
3071 case E_V64QImode:
3072 case E_V32HImode:
3073 case E_V16SImode:
3074 case E_V8DImode:
3075 case E_V16SFmode:
3076 case E_V8DFmode:
3077 case E_V8SFmode:
3078 case E_V8SImode:
3079 case E_V32QImode:
3080 case E_V16HImode:
3081 case E_V4DFmode:
3082 case E_V4DImode:
3083 if (!type || !AGGREGATE_TYPE_P (type))
3084 {
3085 if (cum->sse_nregs)
3086 return gen_reg_or_parallel (mode, orig_mode,
3087 cum->sse_regno + FIRST_SSE_REG);
3088 }
3089 break;
3090
3091 case E_V8QImode:
3092 case E_V4HImode:
3093 case E_V2SImode:
3094 case E_V2SFmode:
3095 case E_V1TImode:
3096 case E_V1DImode:
3097 if (!type || !AGGREGATE_TYPE_P (type))
3098 {
3099 if (cum->mmx_nregs)
3100 return gen_reg_or_parallel (mode, orig_mode,
3101 cum->mmx_regno + FIRST_MMX_REG);
3102 }
3103 break;
3104 }
3105 if (error_p)
3106 {
3107 cum->float_in_sse = 0;
3108 error ("calling %qD with SSE calling convention without "
3109 "SSE/SSE2 enabled", cum->decl);
3110 sorry ("this is a GCC bug that can be worked around by adding "
3111 "attribute used to function called");
3112 }
3113
3114 return NULL_RTX;
3115 }
3116
3117 static rtx
3118 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
3119 machine_mode orig_mode, const_tree type, bool named)
3120 {
3121 /* Handle a hidden AL argument containing number of registers
3122 for varargs x86-64 functions. */
3123 if (mode == VOIDmode)
3124 return GEN_INT (cum->maybe_vaarg
3125 ? (cum->sse_nregs < 0
3126 ? X86_64_SSE_REGPARM_MAX
3127 : cum->sse_regno)
3128 : -1);
3129
3130 switch (mode)
3131 {
3132 default:
3133 break;
3134
3135 case E_V8SFmode:
3136 case E_V8SImode:
3137 case E_V32QImode:
3138 case E_V16HImode:
3139 case E_V4DFmode:
3140 case E_V4DImode:
3141 case E_V16SFmode:
3142 case E_V16SImode:
3143 case E_V64QImode:
3144 case E_V32HImode:
3145 case E_V8DFmode:
3146 case E_V8DImode:
3147 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
3148 if (!named)
3149 return NULL;
3150 break;
3151 }
3152
3153 return construct_container (mode, orig_mode, type, 0, cum->nregs,
3154 cum->sse_nregs,
3155 &x86_64_int_parameter_registers [cum->regno],
3156 cum->sse_regno);
3157 }
3158
3159 static rtx
3160 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
3161 machine_mode orig_mode, bool named,
3162 HOST_WIDE_INT bytes)
3163 {
3164 unsigned int regno;
3165
3166 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
3167 We use value of -2 to specify that current function call is MSABI. */
3168 if (mode == VOIDmode)
3169 return GEN_INT (-2);
3170
3171 /* If we've run out of registers, it goes on the stack. */
3172 if (cum->nregs == 0)
3173 return NULL_RTX;
3174
3175 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
3176
3177 /* Only floating point modes are passed in anything but integer regs. */
3178 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
3179 {
3180 if (named)
3181 regno = cum->regno + FIRST_SSE_REG;
3182 else
3183 {
3184 rtx t1, t2;
3185
3186 /* Unnamed floating parameters are passed in both the
3187 SSE and integer registers. */
3188 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
3189 t2 = gen_rtx_REG (mode, regno);
3190 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
3191 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
3192 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
3193 }
3194 }
3195 /* Handle aggregated types passed in register. */
3196 if (orig_mode == BLKmode)
3197 {
3198 if (bytes > 0 && bytes <= 8)
3199 mode = (bytes > 4 ? DImode : SImode);
3200 if (mode == BLKmode)
3201 mode = DImode;
3202 }
3203
3204 return gen_reg_or_parallel (mode, orig_mode, regno);
3205 }
3206
3207 /* Return where to put the arguments to a function.
3208 Return zero to push the argument on the stack, or a hard register in which to store the argument.
3209
3210 ARG describes the argument while CUM gives information about the
3211 preceding args and about the function being called. */
3212
3213 static rtx
3214 ix86_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
3215 {
3216 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3217 machine_mode mode = arg.mode;
3218 HOST_WIDE_INT bytes, words;
3219 rtx reg;
3220
3221 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
3222 {
3223 gcc_assert (arg.type != NULL_TREE);
3224 if (POINTER_TYPE_P (arg.type))
3225 {
3226 /* This is the pointer argument. */
3227 gcc_assert (TYPE_MODE (arg.type) == Pmode);
3228 /* It is at -WORD(AP) in the current frame in interrupt and
3229 exception handlers. */
3230 reg = plus_constant (Pmode, arg_pointer_rtx, -UNITS_PER_WORD);
3231 }
3232 else
3233 {
3234 gcc_assert (cfun->machine->func_type == TYPE_EXCEPTION
3235 && TREE_CODE (arg.type) == INTEGER_TYPE
3236 && TYPE_MODE (arg.type) == word_mode);
3237 /* The error code is the word-mode integer argument at
3238 -2 * WORD(AP) in the current frame of the exception
3239 handler. */
3240 reg = gen_rtx_MEM (word_mode,
3241 plus_constant (Pmode,
3242 arg_pointer_rtx,
3243 -2 * UNITS_PER_WORD));
3244 }
3245 return reg;
3246 }
3247
3248 bytes = arg.promoted_size_in_bytes ();
3249 words = CEIL (bytes, UNITS_PER_WORD);
3250
3251 /* To simplify the code below, represent vector types with a vector mode
3252 even if MMX/SSE are not active. */
3253 if (arg.type && TREE_CODE (arg.type) == VECTOR_TYPE)
3254 mode = type_natural_mode (arg.type, cum, false);
3255
3256 if (TARGET_64BIT)
3257 {
3258 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3259
3260 if (call_abi == MS_ABI)
3261 reg = function_arg_ms_64 (cum, mode, arg.mode, arg.named, bytes);
3262 else
3263 reg = function_arg_64 (cum, mode, arg.mode, arg.type, arg.named);
3264 }
3265 else
3266 reg = function_arg_32 (cum, mode, arg.mode, arg.type, bytes, words);
3267
3268 /* Track if there are outgoing arguments on stack. */
3269 if (reg == NULL_RTX && cum->caller)
3270 cfun->machine->outgoing_args_on_stack = true;
3271
3272 return reg;
3273 }
3274
3275 /* A C expression that indicates when an argument must be passed by
3276 reference. If nonzero for an argument, a copy of that argument is
3277 made in memory and a pointer to the argument is passed instead of
3278 the argument itself. The pointer is passed in whatever way is
3279 appropriate for passing a pointer to that type. */
3280
3281 static bool
3282 ix86_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg)
3283 {
3284 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3285
3286 if (TARGET_64BIT)
3287 {
3288 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3289
3290 /* See Windows x64 Software Convention. */
3291 if (call_abi == MS_ABI)
3292 {
3293 HOST_WIDE_INT msize = GET_MODE_SIZE (arg.mode);
3294
3295 if (tree type = arg.type)
3296 {
3297 /* Arrays are passed by reference. */
3298 if (TREE_CODE (type) == ARRAY_TYPE)
3299 return true;
3300
3301 if (RECORD_OR_UNION_TYPE_P (type))
3302 {
3303 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
3304 are passed by reference. */
3305 msize = int_size_in_bytes (type);
3306 }
3307 }
3308
3309 /* __m128 is passed by reference. */
3310 return msize != 1 && msize != 2 && msize != 4 && msize != 8;
3311 }
3312 else if (arg.type && int_size_in_bytes (arg.type) == -1)
3313 return true;
3314 }
3315
3316 return false;
3317 }
3318
3319 /* Return true when TYPE should be 128bit aligned for 32bit argument
3320 passing ABI. XXX: This function is obsolete and is only used for
3321 checking psABI compatibility with previous versions of GCC. */
3322
3323 static bool
3324 ix86_compat_aligned_value_p (const_tree type)
3325 {
3326 machine_mode mode = TYPE_MODE (type);
3327 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
3328 || mode == TDmode
3329 || mode == TFmode
3330 || mode == TCmode)
3331 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3332 return true;
3333 if (TYPE_ALIGN (type) < 128)
3334 return false;
3335
3336 if (AGGREGATE_TYPE_P (type))
3337 {
3338 /* Walk the aggregates recursively. */
3339 switch (TREE_CODE (type))
3340 {
3341 case RECORD_TYPE:
3342 case UNION_TYPE:
3343 case QUAL_UNION_TYPE:
3344 {
3345 tree field;
3346
3347 /* Walk all the structure fields. */
3348 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3349 {
3350 if (TREE_CODE (field) == FIELD_DECL
3351 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
3352 return true;
3353 }
3354 break;
3355 }
3356
3357 case ARRAY_TYPE:
3358 /* Just for use if some languages passes arrays by value. */
3359 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
3360 return true;
3361 break;
3362
3363 default:
3364 gcc_unreachable ();
3365 }
3366 }
3367 return false;
3368 }
3369
3370 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
3371 XXX: This function is obsolete and is only used for checking psABI
3372 compatibility with previous versions of GCC. */
3373
3374 static unsigned int
3375 ix86_compat_function_arg_boundary (machine_mode mode,
3376 const_tree type, unsigned int align)
3377 {
3378 /* In 32bit, only _Decimal128 and __float128 are aligned to their
3379 natural boundaries. */
3380 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
3381 {
3382 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3383 make an exception for SSE modes since these require 128bit
3384 alignment.
3385
3386 The handling here differs from field_alignment. ICC aligns MMX
3387 arguments to 4 byte boundaries, while structure fields are aligned
3388 to 8 byte boundaries. */
3389 if (!type)
3390 {
3391 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
3392 align = PARM_BOUNDARY;
3393 }
3394 else
3395 {
3396 if (!ix86_compat_aligned_value_p (type))
3397 align = PARM_BOUNDARY;
3398 }
3399 }
3400 if (align > BIGGEST_ALIGNMENT)
3401 align = BIGGEST_ALIGNMENT;
3402 return align;
3403 }
3404
3405 /* Return true when TYPE should be 128bit aligned for 32bit argument
3406 passing ABI. */
3407
3408 static bool
3409 ix86_contains_aligned_value_p (const_tree type)
3410 {
3411 machine_mode mode = TYPE_MODE (type);
3412
3413 if (mode == XFmode || mode == XCmode)
3414 return false;
3415
3416 if (TYPE_ALIGN (type) < 128)
3417 return false;
3418
3419 if (AGGREGATE_TYPE_P (type))
3420 {
3421 /* Walk the aggregates recursively. */
3422 switch (TREE_CODE (type))
3423 {
3424 case RECORD_TYPE:
3425 case UNION_TYPE:
3426 case QUAL_UNION_TYPE:
3427 {
3428 tree field;
3429
3430 /* Walk all the structure fields. */
3431 for (field = TYPE_FIELDS (type);
3432 field;
3433 field = DECL_CHAIN (field))
3434 {
3435 if (TREE_CODE (field) == FIELD_DECL
3436 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
3437 return true;
3438 }
3439 break;
3440 }
3441
3442 case ARRAY_TYPE:
3443 /* Just for use if some languages passes arrays by value. */
3444 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
3445 return true;
3446 break;
3447
3448 default:
3449 gcc_unreachable ();
3450 }
3451 }
3452 else
3453 return TYPE_ALIGN (type) >= 128;
3454
3455 return false;
3456 }
3457
3458 /* Gives the alignment boundary, in bits, of an argument with the
3459 specified mode and type. */
3460
3461 static unsigned int
3462 ix86_function_arg_boundary (machine_mode mode, const_tree type)
3463 {
3464 unsigned int align;
3465 if (type)
3466 {
3467 /* Since the main variant type is used for call, we convert it to
3468 the main variant type. */
3469 type = TYPE_MAIN_VARIANT (type);
3470 align = TYPE_ALIGN (type);
3471 if (TYPE_EMPTY_P (type))
3472 return PARM_BOUNDARY;
3473 }
3474 else
3475 align = GET_MODE_ALIGNMENT (mode);
3476 if (align < PARM_BOUNDARY)
3477 align = PARM_BOUNDARY;
3478 else
3479 {
3480 static bool warned;
3481 unsigned int saved_align = align;
3482
3483 if (!TARGET_64BIT)
3484 {
3485 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
3486 if (!type)
3487 {
3488 if (mode == XFmode || mode == XCmode)
3489 align = PARM_BOUNDARY;
3490 }
3491 else if (!ix86_contains_aligned_value_p (type))
3492 align = PARM_BOUNDARY;
3493
3494 if (align < 128)
3495 align = PARM_BOUNDARY;
3496 }
3497
3498 if (warn_psabi
3499 && !warned
3500 && align != ix86_compat_function_arg_boundary (mode, type,
3501 saved_align))
3502 {
3503 warned = true;
3504 inform (input_location,
3505 "the ABI for passing parameters with %d-byte"
3506 " alignment has changed in GCC 4.6",
3507 align / BITS_PER_UNIT);
3508 }
3509 }
3510
3511 return align;
3512 }
3513
3514 /* Return true if N is a possible register number of function value. */
3515
3516 static bool
3517 ix86_function_value_regno_p (const unsigned int regno)
3518 {
3519 switch (regno)
3520 {
3521 case AX_REG:
3522 return true;
3523 case DX_REG:
3524 return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
3525 case DI_REG:
3526 case SI_REG:
3527 return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
3528
3529 /* Complex values are returned in %st(0)/%st(1) pair. */
3530 case ST0_REG:
3531 case ST1_REG:
3532 /* TODO: The function should depend on current function ABI but
3533 builtins.c would need updating then. Therefore we use the
3534 default ABI. */
3535 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3536 return false;
3537 return TARGET_FLOAT_RETURNS_IN_80387;
3538
3539 /* Complex values are returned in %xmm0/%xmm1 pair. */
3540 case XMM0_REG:
3541 case XMM1_REG:
3542 return TARGET_SSE;
3543
3544 case MM0_REG:
3545 if (TARGET_MACHO || TARGET_64BIT)
3546 return false;
3547 return TARGET_MMX;
3548 }
3549
3550 return false;
3551 }
3552
3553 /* Define how to find the value returned by a function.
3554 VALTYPE is the data type of the value (as a tree).
3555 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3556 otherwise, FUNC is 0. */
3557
3558 static rtx
3559 function_value_32 (machine_mode orig_mode, machine_mode mode,
3560 const_tree fntype, const_tree fn)
3561 {
3562 unsigned int regno;
3563
3564 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
3565 we normally prevent this case when mmx is not available. However
3566 some ABIs may require the result to be returned like DImode. */
3567 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
3568 regno = FIRST_MMX_REG;
3569
3570 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3571 we prevent this case when sse is not available. However some ABIs
3572 may require the result to be returned like integer TImode. */
3573 else if (mode == TImode
3574 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3575 regno = FIRST_SSE_REG;
3576
3577 /* 32-byte vector modes in %ymm0. */
3578 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
3579 regno = FIRST_SSE_REG;
3580
3581 /* 64-byte vector modes in %zmm0. */
3582 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
3583 regno = FIRST_SSE_REG;
3584
3585 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
3586 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
3587 regno = FIRST_FLOAT_REG;
3588 else
3589 /* Most things go in %eax. */
3590 regno = AX_REG;
3591
3592 /* Override FP return register with %xmm0 for local functions when
3593 SSE math is enabled or for functions with sseregparm attribute. */
3594 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
3595 {
3596 int sse_level = ix86_function_sseregparm (fntype, fn, false);
3597 if (sse_level == -1)
3598 {
3599 error ("calling %qD with SSE calling convention without "
3600 "SSE/SSE2 enabled", fn);
3601 sorry ("this is a GCC bug that can be worked around by adding "
3602 "attribute used to function called");
3603 }
3604 else if ((sse_level >= 1 && mode == SFmode)
3605 || (sse_level == 2 && mode == DFmode))
3606 regno = FIRST_SSE_REG;
3607 }
3608
3609 /* OImode shouldn't be used directly. */
3610 gcc_assert (mode != OImode);
3611
3612 return gen_rtx_REG (orig_mode, regno);
3613 }
3614
3615 static rtx
3616 function_value_64 (machine_mode orig_mode, machine_mode mode,
3617 const_tree valtype)
3618 {
3619 rtx ret;
3620
3621 /* Handle libcalls, which don't provide a type node. */
3622 if (valtype == NULL)
3623 {
3624 unsigned int regno;
3625
3626 switch (mode)
3627 {
3628 case E_SFmode:
3629 case E_SCmode:
3630 case E_DFmode:
3631 case E_DCmode:
3632 case E_TFmode:
3633 case E_SDmode:
3634 case E_DDmode:
3635 case E_TDmode:
3636 regno = FIRST_SSE_REG;
3637 break;
3638 case E_XFmode:
3639 case E_XCmode:
3640 regno = FIRST_FLOAT_REG;
3641 break;
3642 case E_TCmode:
3643 return NULL;
3644 default:
3645 regno = AX_REG;
3646 }
3647
3648 return gen_rtx_REG (mode, regno);
3649 }
3650 else if (POINTER_TYPE_P (valtype))
3651 {
3652 /* Pointers are always returned in word_mode. */
3653 mode = word_mode;
3654 }
3655
3656 ret = construct_container (mode, orig_mode, valtype, 1,
3657 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
3658 x86_64_int_return_registers, 0);
3659
3660 /* For zero sized structures, construct_container returns NULL, but we
3661 need to keep rest of compiler happy by returning meaningful value. */
3662 if (!ret)
3663 ret = gen_rtx_REG (orig_mode, AX_REG);
3664
3665 return ret;
3666 }
3667
3668 static rtx
3669 function_value_ms_32 (machine_mode orig_mode, machine_mode mode,
3670 const_tree fntype, const_tree fn, const_tree valtype)
3671 {
3672 unsigned int regno;
3673
3674 /* Floating point return values in %st(0)
3675 (unless -mno-fp-ret-in-387 or aggregate type of up to 8 bytes). */
3676 if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387
3677 && (GET_MODE_SIZE (mode) > 8
3678 || valtype == NULL_TREE || !AGGREGATE_TYPE_P (valtype)))
3679 {
3680 regno = FIRST_FLOAT_REG;
3681 return gen_rtx_REG (orig_mode, regno);
3682 }
3683 else
3684 return function_value_32(orig_mode, mode, fntype,fn);
3685 }
3686
3687 static rtx
3688 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
3689 const_tree valtype)
3690 {
3691 unsigned int regno = AX_REG;
3692
3693 if (TARGET_SSE)
3694 {
3695 switch (GET_MODE_SIZE (mode))
3696 {
3697 case 16:
3698 if (valtype != NULL_TREE
3699 && !VECTOR_INTEGER_TYPE_P (valtype)
3700 && !VECTOR_INTEGER_TYPE_P (valtype)
3701 && !INTEGRAL_TYPE_P (valtype)
3702 && !VECTOR_FLOAT_TYPE_P (valtype))
3703 break;
3704 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
3705 && !COMPLEX_MODE_P (mode))
3706 regno = FIRST_SSE_REG;
3707 break;
3708 case 8:
3709 case 4:
3710 if (valtype != NULL_TREE && AGGREGATE_TYPE_P (valtype))
3711 break;
3712 if (mode == SFmode || mode == DFmode)
3713 regno = FIRST_SSE_REG;
3714 break;
3715 default:
3716 break;
3717 }
3718 }
3719 return gen_rtx_REG (orig_mode, regno);
3720 }
3721
3722 static rtx
3723 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
3724 machine_mode orig_mode, machine_mode mode)
3725 {
3726 const_tree fn, fntype;
3727
3728 fn = NULL_TREE;
3729 if (fntype_or_decl && DECL_P (fntype_or_decl))
3730 fn = fntype_or_decl;
3731 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
3732
3733 if (ix86_function_type_abi (fntype) == MS_ABI)
3734 {
3735 if (TARGET_64BIT)
3736 return function_value_ms_64 (orig_mode, mode, valtype);
3737 else
3738 return function_value_ms_32 (orig_mode, mode, fntype, fn, valtype);
3739 }
3740 else if (TARGET_64BIT)
3741 return function_value_64 (orig_mode, mode, valtype);
3742 else
3743 return function_value_32 (orig_mode, mode, fntype, fn);
3744 }
3745
3746 static rtx
3747 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
3748 {
3749 machine_mode mode, orig_mode;
3750
3751 orig_mode = TYPE_MODE (valtype);
3752 mode = type_natural_mode (valtype, NULL, true);
3753 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
3754 }
3755
3756 /* Pointer function arguments and return values are promoted to
3757 word_mode for normal functions. */
3758
3759 static machine_mode
3760 ix86_promote_function_mode (const_tree type, machine_mode mode,
3761 int *punsignedp, const_tree fntype,
3762 int for_return)
3763 {
3764 if (cfun->machine->func_type == TYPE_NORMAL
3765 && type != NULL_TREE
3766 && POINTER_TYPE_P (type))
3767 {
3768 *punsignedp = POINTERS_EXTEND_UNSIGNED;
3769 return word_mode;
3770 }
3771 return default_promote_function_mode (type, mode, punsignedp, fntype,
3772 for_return);
3773 }
3774
3775 /* Return true if a structure, union or array with MODE containing FIELD
3776 should be accessed using BLKmode. */
3777
3778 static bool
3779 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
3780 {
3781 /* Union with XFmode must be in BLKmode. */
3782 return (mode == XFmode
3783 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
3784 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
3785 }
3786
3787 rtx
3788 ix86_libcall_value (machine_mode mode)
3789 {
3790 return ix86_function_value_1 (NULL, NULL, mode, mode);
3791 }
3792
3793 /* Return true iff type is returned in memory. */
3794
3795 static bool
3796 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
3797 {
3798 #ifdef SUBTARGET_RETURN_IN_MEMORY
3799 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
3800 #else
3801 const machine_mode mode = type_natural_mode (type, NULL, true);
3802 HOST_WIDE_INT size;
3803
3804 if (TARGET_64BIT)
3805 {
3806 if (ix86_function_type_abi (fntype) == MS_ABI)
3807 {
3808 size = int_size_in_bytes (type);
3809
3810 /* __m128 is returned in xmm0. */
3811 if ((!type || VECTOR_INTEGER_TYPE_P (type)
3812 || INTEGRAL_TYPE_P (type)
3813 || VECTOR_FLOAT_TYPE_P (type))
3814 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
3815 && !COMPLEX_MODE_P (mode)
3816 && (GET_MODE_SIZE (mode) == 16 || size == 16))
3817 return false;
3818
3819 /* Otherwise, the size must be exactly in [1248]. */
3820 return size != 1 && size != 2 && size != 4 && size != 8;
3821 }
3822 else
3823 {
3824 int needed_intregs, needed_sseregs;
3825
3826 return examine_argument (mode, type, 1,
3827 &needed_intregs, &needed_sseregs);
3828 }
3829 }
3830 else
3831 {
3832 size = int_size_in_bytes (type);
3833
3834 /* Intel MCU psABI returns scalars and aggregates no larger than 8
3835 bytes in registers. */
3836 if (TARGET_IAMCU)
3837 return VECTOR_MODE_P (mode) || size < 0 || size > 8;
3838
3839 if (mode == BLKmode)
3840 return true;
3841
3842 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3843 return false;
3844
3845 if (VECTOR_MODE_P (mode) || mode == TImode)
3846 {
3847 /* User-created vectors small enough to fit in EAX. */
3848 if (size < 8)
3849 return false;
3850
3851 /* Unless ABI prescibes otherwise,
3852 MMX/3dNow values are returned in MM0 if available. */
3853
3854 if (size == 8)
3855 return TARGET_VECT8_RETURNS || !TARGET_MMX;
3856
3857 /* SSE values are returned in XMM0 if available. */
3858 if (size == 16)
3859 return !TARGET_SSE;
3860
3861 /* AVX values are returned in YMM0 if available. */
3862 if (size == 32)
3863 return !TARGET_AVX;
3864
3865 /* AVX512F values are returned in ZMM0 if available. */
3866 if (size == 64)
3867 return !TARGET_AVX512F;
3868 }
3869
3870 if (mode == XFmode)
3871 return false;
3872
3873 if (size > 12)
3874 return true;
3875
3876 /* OImode shouldn't be used directly. */
3877 gcc_assert (mode != OImode);
3878
3879 return false;
3880 }
3881 #endif
3882 }
3883
3884 \f
3885 /* Create the va_list data type. */
3886
3887 static tree
3888 ix86_build_builtin_va_list_64 (void)
3889 {
3890 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
3891
3892 record = lang_hooks.types.make_type (RECORD_TYPE);
3893 type_decl = build_decl (BUILTINS_LOCATION,
3894 TYPE_DECL, get_identifier ("__va_list_tag"), record);
3895
3896 f_gpr = build_decl (BUILTINS_LOCATION,
3897 FIELD_DECL, get_identifier ("gp_offset"),
3898 unsigned_type_node);
3899 f_fpr = build_decl (BUILTINS_LOCATION,
3900 FIELD_DECL, get_identifier ("fp_offset"),
3901 unsigned_type_node);
3902 f_ovf = build_decl (BUILTINS_LOCATION,
3903 FIELD_DECL, get_identifier ("overflow_arg_area"),
3904 ptr_type_node);
3905 f_sav = build_decl (BUILTINS_LOCATION,
3906 FIELD_DECL, get_identifier ("reg_save_area"),
3907 ptr_type_node);
3908
3909 va_list_gpr_counter_field = f_gpr;
3910 va_list_fpr_counter_field = f_fpr;
3911
3912 DECL_FIELD_CONTEXT (f_gpr) = record;
3913 DECL_FIELD_CONTEXT (f_fpr) = record;
3914 DECL_FIELD_CONTEXT (f_ovf) = record;
3915 DECL_FIELD_CONTEXT (f_sav) = record;
3916
3917 TYPE_STUB_DECL (record) = type_decl;
3918 TYPE_NAME (record) = type_decl;
3919 TYPE_FIELDS (record) = f_gpr;
3920 DECL_CHAIN (f_gpr) = f_fpr;
3921 DECL_CHAIN (f_fpr) = f_ovf;
3922 DECL_CHAIN (f_ovf) = f_sav;
3923
3924 layout_type (record);
3925
3926 TYPE_ATTRIBUTES (record) = tree_cons (get_identifier ("sysv_abi va_list"),
3927 NULL_TREE, TYPE_ATTRIBUTES (record));
3928
3929 /* The correct type is an array type of one element. */
3930 return build_array_type (record, build_index_type (size_zero_node));
3931 }
3932
3933 /* Setup the builtin va_list data type and for 64-bit the additional
3934 calling convention specific va_list data types. */
3935
3936 static tree
3937 ix86_build_builtin_va_list (void)
3938 {
3939 if (TARGET_64BIT)
3940 {
3941 /* Initialize ABI specific va_list builtin types.
3942
3943 In lto1, we can encounter two va_list types:
3944 - one as a result of the type-merge across TUs, and
3945 - the one constructed here.
3946 These two types will not have the same TYPE_MAIN_VARIANT, and therefore
3947 a type identity check in canonical_va_list_type based on
3948 TYPE_MAIN_VARIANT (which we used to have) will not work.
3949 Instead, we tag each va_list_type_node with its unique attribute, and
3950 look for the attribute in the type identity check in
3951 canonical_va_list_type.
3952
3953 Tagging sysv_va_list_type_node directly with the attribute is
3954 problematic since it's a array of one record, which will degrade into a
3955 pointer to record when used as parameter (see build_va_arg comments for
3956 an example), dropping the attribute in the process. So we tag the
3957 record instead. */
3958
3959 /* For SYSV_ABI we use an array of one record. */
3960 sysv_va_list_type_node = ix86_build_builtin_va_list_64 ();
3961
3962 /* For MS_ABI we use plain pointer to argument area. */
3963 tree char_ptr_type = build_pointer_type (char_type_node);
3964 tree attr = tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE,
3965 TYPE_ATTRIBUTES (char_ptr_type));
3966 ms_va_list_type_node = build_type_attribute_variant (char_ptr_type, attr);
3967
3968 return ((ix86_abi == MS_ABI)
3969 ? ms_va_list_type_node
3970 : sysv_va_list_type_node);
3971 }
3972 else
3973 {
3974 /* For i386 we use plain pointer to argument area. */
3975 return build_pointer_type (char_type_node);
3976 }
3977 }
3978
3979 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3980
3981 static void
3982 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
3983 {
3984 rtx save_area, mem;
3985 alias_set_type set;
3986 int i, max;
3987
3988 /* GPR size of varargs save area. */
3989 if (cfun->va_list_gpr_size)
3990 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
3991 else
3992 ix86_varargs_gpr_size = 0;
3993
3994 /* FPR size of varargs save area. We don't need it if we don't pass
3995 anything in SSE registers. */
3996 if (TARGET_SSE && cfun->va_list_fpr_size)
3997 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
3998 else
3999 ix86_varargs_fpr_size = 0;
4000
4001 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
4002 return;
4003
4004 save_area = frame_pointer_rtx;
4005 set = get_varargs_alias_set ();
4006
4007 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4008 if (max > X86_64_REGPARM_MAX)
4009 max = X86_64_REGPARM_MAX;
4010
4011 for (i = cum->regno; i < max; i++)
4012 {
4013 mem = gen_rtx_MEM (word_mode,
4014 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
4015 MEM_NOTRAP_P (mem) = 1;
4016 set_mem_alias_set (mem, set);
4017 emit_move_insn (mem,
4018 gen_rtx_REG (word_mode,
4019 x86_64_int_parameter_registers[i]));
4020 }
4021
4022 if (ix86_varargs_fpr_size)
4023 {
4024 machine_mode smode;
4025 rtx_code_label *label;
4026 rtx test;
4027
4028 /* Now emit code to save SSE registers. The AX parameter contains number
4029 of SSE parameter registers used to call this function, though all we
4030 actually check here is the zero/non-zero status. */
4031
4032 label = gen_label_rtx ();
4033 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
4034 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
4035 label));
4036
4037 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
4038 we used movdqa (i.e. TImode) instead? Perhaps even better would
4039 be if we could determine the real mode of the data, via a hook
4040 into pass_stdarg. Ignore all that for now. */
4041 smode = V4SFmode;
4042 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
4043 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
4044
4045 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
4046 if (max > X86_64_SSE_REGPARM_MAX)
4047 max = X86_64_SSE_REGPARM_MAX;
4048
4049 for (i = cum->sse_regno; i < max; ++i)
4050 {
4051 mem = plus_constant (Pmode, save_area,
4052 i * 16 + ix86_varargs_gpr_size);
4053 mem = gen_rtx_MEM (smode, mem);
4054 MEM_NOTRAP_P (mem) = 1;
4055 set_mem_alias_set (mem, set);
4056 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
4057
4058 emit_move_insn (mem, gen_rtx_REG (smode, GET_SSE_REGNO (i)));
4059 }
4060
4061 emit_label (label);
4062 }
4063 }
4064
4065 static void
4066 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
4067 {
4068 alias_set_type set = get_varargs_alias_set ();
4069 int i;
4070
4071 /* Reset to zero, as there might be a sysv vaarg used
4072 before. */
4073 ix86_varargs_gpr_size = 0;
4074 ix86_varargs_fpr_size = 0;
4075
4076 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
4077 {
4078 rtx reg, mem;
4079
4080 mem = gen_rtx_MEM (Pmode,
4081 plus_constant (Pmode, virtual_incoming_args_rtx,
4082 i * UNITS_PER_WORD));
4083 MEM_NOTRAP_P (mem) = 1;
4084 set_mem_alias_set (mem, set);
4085
4086 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
4087 emit_move_insn (mem, reg);
4088 }
4089 }
4090
4091 static void
4092 ix86_setup_incoming_varargs (cumulative_args_t cum_v,
4093 const function_arg_info &arg,
4094 int *, int no_rtl)
4095 {
4096 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4097 CUMULATIVE_ARGS next_cum;
4098 tree fntype;
4099
4100 /* This argument doesn't appear to be used anymore. Which is good,
4101 because the old code here didn't suppress rtl generation. */
4102 gcc_assert (!no_rtl);
4103
4104 if (!TARGET_64BIT)
4105 return;
4106
4107 fntype = TREE_TYPE (current_function_decl);
4108
4109 /* For varargs, we do not want to skip the dummy va_dcl argument.
4110 For stdargs, we do want to skip the last named argument. */
4111 next_cum = *cum;
4112 if (stdarg_p (fntype))
4113 ix86_function_arg_advance (pack_cumulative_args (&next_cum), arg);
4114
4115 if (cum->call_abi == MS_ABI)
4116 setup_incoming_varargs_ms_64 (&next_cum);
4117 else
4118 setup_incoming_varargs_64 (&next_cum);
4119 }
4120
4121 /* Checks if TYPE is of kind va_list char *. */
4122
4123 static bool
4124 is_va_list_char_pointer (tree type)
4125 {
4126 tree canonic;
4127
4128 /* For 32-bit it is always true. */
4129 if (!TARGET_64BIT)
4130 return true;
4131 canonic = ix86_canonical_va_list_type (type);
4132 return (canonic == ms_va_list_type_node
4133 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
4134 }
4135
4136 /* Implement va_start. */
4137
4138 static void
4139 ix86_va_start (tree valist, rtx nextarg)
4140 {
4141 HOST_WIDE_INT words, n_gpr, n_fpr;
4142 tree f_gpr, f_fpr, f_ovf, f_sav;
4143 tree gpr, fpr, ovf, sav, t;
4144 tree type;
4145 rtx ovf_rtx;
4146
4147 if (flag_split_stack
4148 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4149 {
4150 unsigned int scratch_regno;
4151
4152 /* When we are splitting the stack, we can't refer to the stack
4153 arguments using internal_arg_pointer, because they may be on
4154 the old stack. The split stack prologue will arrange to
4155 leave a pointer to the old stack arguments in a scratch
4156 register, which we here copy to a pseudo-register. The split
4157 stack prologue can't set the pseudo-register directly because
4158 it (the prologue) runs before any registers have been saved. */
4159
4160 scratch_regno = split_stack_prologue_scratch_regno ();
4161 if (scratch_regno != INVALID_REGNUM)
4162 {
4163 rtx reg;
4164 rtx_insn *seq;
4165
4166 reg = gen_reg_rtx (Pmode);
4167 cfun->machine->split_stack_varargs_pointer = reg;
4168
4169 start_sequence ();
4170 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
4171 seq = get_insns ();
4172 end_sequence ();
4173
4174 push_topmost_sequence ();
4175 emit_insn_after (seq, entry_of_function ());
4176 pop_topmost_sequence ();
4177 }
4178 }
4179
4180 /* Only 64bit target needs something special. */
4181 if (is_va_list_char_pointer (TREE_TYPE (valist)))
4182 {
4183 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4184 std_expand_builtin_va_start (valist, nextarg);
4185 else
4186 {
4187 rtx va_r, next;
4188
4189 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
4190 next = expand_binop (ptr_mode, add_optab,
4191 cfun->machine->split_stack_varargs_pointer,
4192 crtl->args.arg_offset_rtx,
4193 NULL_RTX, 0, OPTAB_LIB_WIDEN);
4194 convert_move (va_r, next, 0);
4195 }
4196 return;
4197 }
4198
4199 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
4200 f_fpr = DECL_CHAIN (f_gpr);
4201 f_ovf = DECL_CHAIN (f_fpr);
4202 f_sav = DECL_CHAIN (f_ovf);
4203
4204 valist = build_simple_mem_ref (valist);
4205 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
4206 /* The following should be folded into the MEM_REF offset. */
4207 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
4208 f_gpr, NULL_TREE);
4209 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
4210 f_fpr, NULL_TREE);
4211 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
4212 f_ovf, NULL_TREE);
4213 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
4214 f_sav, NULL_TREE);
4215
4216 /* Count number of gp and fp argument registers used. */
4217 words = crtl->args.info.words;
4218 n_gpr = crtl->args.info.regno;
4219 n_fpr = crtl->args.info.sse_regno;
4220
4221 if (cfun->va_list_gpr_size)
4222 {
4223 type = TREE_TYPE (gpr);
4224 t = build2 (MODIFY_EXPR, type,
4225 gpr, build_int_cst (type, n_gpr * 8));
4226 TREE_SIDE_EFFECTS (t) = 1;
4227 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4228 }
4229
4230 if (TARGET_SSE && cfun->va_list_fpr_size)
4231 {
4232 type = TREE_TYPE (fpr);
4233 t = build2 (MODIFY_EXPR, type, fpr,
4234 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
4235 TREE_SIDE_EFFECTS (t) = 1;
4236 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4237 }
4238
4239 /* Find the overflow area. */
4240 type = TREE_TYPE (ovf);
4241 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4242 ovf_rtx = crtl->args.internal_arg_pointer;
4243 else
4244 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
4245 t = make_tree (type, ovf_rtx);
4246 if (words != 0)
4247 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
4248
4249 t = build2 (MODIFY_EXPR, type, ovf, t);
4250 TREE_SIDE_EFFECTS (t) = 1;
4251 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4252
4253 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
4254 {
4255 /* Find the register save area.
4256 Prologue of the function save it right above stack frame. */
4257 type = TREE_TYPE (sav);
4258 t = make_tree (type, frame_pointer_rtx);
4259 if (!ix86_varargs_gpr_size)
4260 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
4261
4262 t = build2 (MODIFY_EXPR, type, sav, t);
4263 TREE_SIDE_EFFECTS (t) = 1;
4264 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4265 }
4266 }
4267
4268 /* Implement va_arg. */
4269
4270 static tree
4271 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
4272 gimple_seq *post_p)
4273 {
4274 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4275 tree f_gpr, f_fpr, f_ovf, f_sav;
4276 tree gpr, fpr, ovf, sav, t;
4277 int size, rsize;
4278 tree lab_false, lab_over = NULL_TREE;
4279 tree addr, t2;
4280 rtx container;
4281 int indirect_p = 0;
4282 tree ptrtype;
4283 machine_mode nat_mode;
4284 unsigned int arg_boundary;
4285
4286 /* Only 64bit target needs something special. */
4287 if (is_va_list_char_pointer (TREE_TYPE (valist)))
4288 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4289
4290 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
4291 f_fpr = DECL_CHAIN (f_gpr);
4292 f_ovf = DECL_CHAIN (f_fpr);
4293 f_sav = DECL_CHAIN (f_ovf);
4294
4295 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
4296 valist, f_gpr, NULL_TREE);
4297
4298 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4299 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4300 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4301
4302 indirect_p = pass_va_arg_by_reference (type);
4303 if (indirect_p)
4304 type = build_pointer_type (type);
4305 size = arg_int_size_in_bytes (type);
4306 rsize = CEIL (size, UNITS_PER_WORD);
4307
4308 nat_mode = type_natural_mode (type, NULL, false);
4309 switch (nat_mode)
4310 {
4311 case E_V8SFmode:
4312 case E_V8SImode:
4313 case E_V32QImode:
4314 case E_V16HImode:
4315 case E_V4DFmode:
4316 case E_V4DImode:
4317 case E_V16SFmode:
4318 case E_V16SImode:
4319 case E_V64QImode:
4320 case E_V32HImode:
4321 case E_V8DFmode:
4322 case E_V8DImode:
4323 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
4324 if (!TARGET_64BIT_MS_ABI)
4325 {
4326 container = NULL;
4327 break;
4328 }
4329 /* FALLTHRU */
4330
4331 default:
4332 container = construct_container (nat_mode, TYPE_MODE (type),
4333 type, 0, X86_64_REGPARM_MAX,
4334 X86_64_SSE_REGPARM_MAX, intreg,
4335 0);
4336 break;
4337 }
4338
4339 /* Pull the value out of the saved registers. */
4340
4341 addr = create_tmp_var (ptr_type_node, "addr");
4342
4343 if (container)
4344 {
4345 int needed_intregs, needed_sseregs;
4346 bool need_temp;
4347 tree int_addr, sse_addr;
4348
4349 lab_false = create_artificial_label (UNKNOWN_LOCATION);
4350 lab_over = create_artificial_label (UNKNOWN_LOCATION);
4351
4352 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4353
4354 need_temp = (!REG_P (container)
4355 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4356 || TYPE_ALIGN (type) > 128));
4357
4358 /* In case we are passing structure, verify that it is consecutive block
4359 on the register save area. If not we need to do moves. */
4360 if (!need_temp && !REG_P (container))
4361 {
4362 /* Verify that all registers are strictly consecutive */
4363 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4364 {
4365 int i;
4366
4367 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4368 {
4369 rtx slot = XVECEXP (container, 0, i);
4370 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4371 || INTVAL (XEXP (slot, 1)) != i * 16)
4372 need_temp = true;
4373 }
4374 }
4375 else
4376 {
4377 int i;
4378
4379 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4380 {
4381 rtx slot = XVECEXP (container, 0, i);
4382 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4383 || INTVAL (XEXP (slot, 1)) != i * 8)
4384 need_temp = true;
4385 }
4386 }
4387 }
4388 if (!need_temp)
4389 {
4390 int_addr = addr;
4391 sse_addr = addr;
4392 }
4393 else
4394 {
4395 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4396 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4397 }
4398
4399 /* First ensure that we fit completely in registers. */
4400 if (needed_intregs)
4401 {
4402 t = build_int_cst (TREE_TYPE (gpr),
4403 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
4404 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4405 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4406 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4407 gimplify_and_add (t, pre_p);
4408 }
4409 if (needed_sseregs)
4410 {
4411 t = build_int_cst (TREE_TYPE (fpr),
4412 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4413 + X86_64_REGPARM_MAX * 8);
4414 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4415 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4416 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4417 gimplify_and_add (t, pre_p);
4418 }
4419
4420 /* Compute index to start of area used for integer regs. */
4421 if (needed_intregs)
4422 {
4423 /* int_addr = gpr + sav; */
4424 t = fold_build_pointer_plus (sav, gpr);
4425 gimplify_assign (int_addr, t, pre_p);
4426 }
4427 if (needed_sseregs)
4428 {
4429 /* sse_addr = fpr + sav; */
4430 t = fold_build_pointer_plus (sav, fpr);
4431 gimplify_assign (sse_addr, t, pre_p);
4432 }
4433 if (need_temp)
4434 {
4435 int i, prev_size = 0;
4436 tree temp = create_tmp_var (type, "va_arg_tmp");
4437
4438 /* addr = &temp; */
4439 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4440 gimplify_assign (addr, t, pre_p);
4441
4442 for (i = 0; i < XVECLEN (container, 0); i++)
4443 {
4444 rtx slot = XVECEXP (container, 0, i);
4445 rtx reg = XEXP (slot, 0);
4446 machine_mode mode = GET_MODE (reg);
4447 tree piece_type;
4448 tree addr_type;
4449 tree daddr_type;
4450 tree src_addr, src;
4451 int src_offset;
4452 tree dest_addr, dest;
4453 int cur_size = GET_MODE_SIZE (mode);
4454
4455 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
4456 prev_size = INTVAL (XEXP (slot, 1));
4457 if (prev_size + cur_size > size)
4458 {
4459 cur_size = size - prev_size;
4460 unsigned int nbits = cur_size * BITS_PER_UNIT;
4461 if (!int_mode_for_size (nbits, 1).exists (&mode))
4462 mode = QImode;
4463 }
4464 piece_type = lang_hooks.types.type_for_mode (mode, 1);
4465 if (mode == GET_MODE (reg))
4466 addr_type = build_pointer_type (piece_type);
4467 else
4468 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
4469 true);
4470 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
4471 true);
4472
4473 if (SSE_REGNO_P (REGNO (reg)))
4474 {
4475 src_addr = sse_addr;
4476 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4477 }
4478 else
4479 {
4480 src_addr = int_addr;
4481 src_offset = REGNO (reg) * 8;
4482 }
4483 src_addr = fold_convert (addr_type, src_addr);
4484 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
4485
4486 dest_addr = fold_convert (daddr_type, addr);
4487 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
4488 if (cur_size == GET_MODE_SIZE (mode))
4489 {
4490 src = build_va_arg_indirect_ref (src_addr);
4491 dest = build_va_arg_indirect_ref (dest_addr);
4492
4493 gimplify_assign (dest, src, pre_p);
4494 }
4495 else
4496 {
4497 tree copy
4498 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
4499 3, dest_addr, src_addr,
4500 size_int (cur_size));
4501 gimplify_and_add (copy, pre_p);
4502 }
4503 prev_size += cur_size;
4504 }
4505 }
4506
4507 if (needed_intregs)
4508 {
4509 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4510 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4511 gimplify_assign (gpr, t, pre_p);
4512 }
4513
4514 if (needed_sseregs)
4515 {
4516 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4517 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4518 gimplify_assign (unshare_expr (fpr), t, pre_p);
4519 }
4520
4521 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
4522
4523 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
4524 }
4525
4526 /* ... otherwise out of the overflow area. */
4527
4528 /* When we align parameter on stack for caller, if the parameter
4529 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
4530 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
4531 here with caller. */
4532 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
4533 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
4534 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
4535
4536 /* Care for on-stack alignment if needed. */
4537 if (arg_boundary <= 64 || size == 0)
4538 t = ovf;
4539 else
4540 {
4541 HOST_WIDE_INT align = arg_boundary / 8;
4542 t = fold_build_pointer_plus_hwi (ovf, align - 1);
4543 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4544 build_int_cst (TREE_TYPE (t), -align));
4545 }
4546
4547 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4548 gimplify_assign (addr, t, pre_p);
4549
4550 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
4551 gimplify_assign (unshare_expr (ovf), t, pre_p);
4552
4553 if (container)
4554 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
4555
4556 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
4557 addr = fold_convert (ptrtype, addr);
4558
4559 if (indirect_p)
4560 addr = build_va_arg_indirect_ref (addr);
4561 return build_va_arg_indirect_ref (addr);
4562 }
4563 \f
4564 /* Return true if OPNUM's MEM should be matched
4565 in movabs* patterns. */
4566
4567 bool
4568 ix86_check_movabs (rtx insn, int opnum)
4569 {
4570 rtx set, mem;
4571
4572 set = PATTERN (insn);
4573 if (GET_CODE (set) == PARALLEL)
4574 set = XVECEXP (set, 0, 0);
4575 gcc_assert (GET_CODE (set) == SET);
4576 mem = XEXP (set, opnum);
4577 while (SUBREG_P (mem))
4578 mem = SUBREG_REG (mem);
4579 gcc_assert (MEM_P (mem));
4580 return volatile_ok || !MEM_VOLATILE_P (mem);
4581 }
4582
4583 /* Return false if INSN contains a MEM with a non-default address space. */
4584 bool
4585 ix86_check_no_addr_space (rtx insn)
4586 {
4587 subrtx_var_iterator::array_type array;
4588 FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL)
4589 {
4590 rtx x = *iter;
4591 if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)))
4592 return false;
4593 }
4594 return true;
4595 }
4596 \f
4597 /* Initialize the table of extra 80387 mathematical constants. */
4598
4599 static void
4600 init_ext_80387_constants (void)
4601 {
4602 static const char * cst[5] =
4603 {
4604 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4605 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4606 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4607 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4608 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4609 };
4610 int i;
4611
4612 for (i = 0; i < 5; i++)
4613 {
4614 real_from_string (&ext_80387_constants_table[i], cst[i]);
4615 /* Ensure each constant is rounded to XFmode precision. */
4616 real_convert (&ext_80387_constants_table[i],
4617 XFmode, &ext_80387_constants_table[i]);
4618 }
4619
4620 ext_80387_constants_init = 1;
4621 }
4622
4623 /* Return non-zero if the constant is something that
4624 can be loaded with a special instruction. */
4625
4626 int
4627 standard_80387_constant_p (rtx x)
4628 {
4629 machine_mode mode = GET_MODE (x);
4630
4631 const REAL_VALUE_TYPE *r;
4632
4633 if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
4634 return -1;
4635
4636 if (x == CONST0_RTX (mode))
4637 return 1;
4638 if (x == CONST1_RTX (mode))
4639 return 2;
4640
4641 r = CONST_DOUBLE_REAL_VALUE (x);
4642
4643 /* For XFmode constants, try to find a special 80387 instruction when
4644 optimizing for size or on those CPUs that benefit from them. */
4645 if (mode == XFmode
4646 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
4647 {
4648 int i;
4649
4650 if (! ext_80387_constants_init)
4651 init_ext_80387_constants ();
4652
4653 for (i = 0; i < 5; i++)
4654 if (real_identical (r, &ext_80387_constants_table[i]))
4655 return i + 3;
4656 }
4657
4658 /* Load of the constant -0.0 or -1.0 will be split as
4659 fldz;fchs or fld1;fchs sequence. */
4660 if (real_isnegzero (r))
4661 return 8;
4662 if (real_identical (r, &dconstm1))
4663 return 9;
4664
4665 return 0;
4666 }
4667
4668 /* Return the opcode of the special instruction to be used to load
4669 the constant X. */
4670
4671 const char *
4672 standard_80387_constant_opcode (rtx x)
4673 {
4674 switch (standard_80387_constant_p (x))
4675 {
4676 case 1:
4677 return "fldz";
4678 case 2:
4679 return "fld1";
4680 case 3:
4681 return "fldlg2";
4682 case 4:
4683 return "fldln2";
4684 case 5:
4685 return "fldl2e";
4686 case 6:
4687 return "fldl2t";
4688 case 7:
4689 return "fldpi";
4690 case 8:
4691 case 9:
4692 return "#";
4693 default:
4694 gcc_unreachable ();
4695 }
4696 }
4697
4698 /* Return the CONST_DOUBLE representing the 80387 constant that is
4699 loaded by the specified special instruction. The argument IDX
4700 matches the return value from standard_80387_constant_p. */
4701
4702 rtx
4703 standard_80387_constant_rtx (int idx)
4704 {
4705 int i;
4706
4707 if (! ext_80387_constants_init)
4708 init_ext_80387_constants ();
4709
4710 switch (idx)
4711 {
4712 case 3:
4713 case 4:
4714 case 5:
4715 case 6:
4716 case 7:
4717 i = idx - 3;
4718 break;
4719
4720 default:
4721 gcc_unreachable ();
4722 }
4723
4724 return const_double_from_real_value (ext_80387_constants_table[i],
4725 XFmode);
4726 }
4727
4728 /* Return 1 if X is all bits 0 and 2 if X is all bits 1
4729 in supported SSE/AVX vector mode. */
4730
4731 int
4732 standard_sse_constant_p (rtx x, machine_mode pred_mode)
4733 {
4734 machine_mode mode;
4735
4736 if (!TARGET_SSE)
4737 return 0;
4738
4739 mode = GET_MODE (x);
4740
4741 if (x == const0_rtx || const0_operand (x, mode))
4742 return 1;
4743
4744 if (x == constm1_rtx || vector_all_ones_operand (x, mode))
4745 {
4746 /* VOIDmode integer constant, get mode from the predicate. */
4747 if (mode == VOIDmode)
4748 mode = pred_mode;
4749
4750 switch (GET_MODE_SIZE (mode))
4751 {
4752 case 64:
4753 if (TARGET_AVX512F)
4754 return 2;
4755 break;
4756 case 32:
4757 if (TARGET_AVX2)
4758 return 2;
4759 break;
4760 case 16:
4761 if (TARGET_SSE2)
4762 return 2;
4763 break;
4764 case 0:
4765 /* VOIDmode */
4766 gcc_unreachable ();
4767 default:
4768 break;
4769 }
4770 }
4771
4772 return 0;
4773 }
4774
4775 /* Return the opcode of the special instruction to be used to load
4776 the constant operands[1] into operands[0]. */
4777
4778 const char *
4779 standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
4780 {
4781 machine_mode mode;
4782 rtx x = operands[1];
4783
4784 gcc_assert (TARGET_SSE);
4785
4786 mode = GET_MODE (x);
4787
4788 if (x == const0_rtx || const0_operand (x, mode))
4789 {
4790 switch (get_attr_mode (insn))
4791 {
4792 case MODE_TI:
4793 if (!EXT_REX_SSE_REG_P (operands[0]))
4794 return "%vpxor\t%0, %d0";
4795 /* FALLTHRU */
4796 case MODE_XI:
4797 case MODE_OI:
4798 if (EXT_REX_SSE_REG_P (operands[0]))
4799 return (TARGET_AVX512VL
4800 ? "vpxord\t%x0, %x0, %x0"
4801 : "vpxord\t%g0, %g0, %g0");
4802 return "vpxor\t%x0, %x0, %x0";
4803
4804 case MODE_V2DF:
4805 if (!EXT_REX_SSE_REG_P (operands[0]))
4806 return "%vxorpd\t%0, %d0";
4807 /* FALLTHRU */
4808 case MODE_V8DF:
4809 case MODE_V4DF:
4810 if (!EXT_REX_SSE_REG_P (operands[0]))
4811 return "vxorpd\t%x0, %x0, %x0";
4812 else if (TARGET_AVX512DQ)
4813 return (TARGET_AVX512VL
4814 ? "vxorpd\t%x0, %x0, %x0"
4815 : "vxorpd\t%g0, %g0, %g0");
4816 else
4817 return (TARGET_AVX512VL
4818 ? "vpxorq\t%x0, %x0, %x0"
4819 : "vpxorq\t%g0, %g0, %g0");
4820
4821 case MODE_V4SF:
4822 if (!EXT_REX_SSE_REG_P (operands[0]))
4823 return "%vxorps\t%0, %d0";
4824 /* FALLTHRU */
4825 case MODE_V16SF:
4826 case MODE_V8SF:
4827 if (!EXT_REX_SSE_REG_P (operands[0]))
4828 return "vxorps\t%x0, %x0, %x0";
4829 else if (TARGET_AVX512DQ)
4830 return (TARGET_AVX512VL
4831 ? "vxorps\t%x0, %x0, %x0"
4832 : "vxorps\t%g0, %g0, %g0");
4833 else
4834 return (TARGET_AVX512VL
4835 ? "vpxord\t%x0, %x0, %x0"
4836 : "vpxord\t%g0, %g0, %g0");
4837
4838 default:
4839 gcc_unreachable ();
4840 }
4841 }
4842 else if (x == constm1_rtx || vector_all_ones_operand (x, mode))
4843 {
4844 enum attr_mode insn_mode = get_attr_mode (insn);
4845
4846 switch (insn_mode)
4847 {
4848 case MODE_XI:
4849 case MODE_V8DF:
4850 case MODE_V16SF:
4851 gcc_assert (TARGET_AVX512F);
4852 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
4853
4854 case MODE_OI:
4855 case MODE_V4DF:
4856 case MODE_V8SF:
4857 gcc_assert (TARGET_AVX2);
4858 /* FALLTHRU */
4859 case MODE_TI:
4860 case MODE_V2DF:
4861 case MODE_V4SF:
4862 gcc_assert (TARGET_SSE2);
4863 if (!EXT_REX_SSE_REG_P (operands[0]))
4864 return (TARGET_AVX
4865 ? "vpcmpeqd\t%0, %0, %0"
4866 : "pcmpeqd\t%0, %0");
4867 else if (TARGET_AVX512VL)
4868 return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
4869 else
4870 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
4871
4872 default:
4873 gcc_unreachable ();
4874 }
4875 }
4876
4877 gcc_unreachable ();
4878 }
4879
4880 /* Returns true if INSN can be transformed from a memory load
4881 to a supported FP constant load. */
4882
4883 bool
4884 ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst)
4885 {
4886 rtx src = find_constant_src (insn);
4887
4888 gcc_assert (REG_P (dst));
4889
4890 if (src == NULL
4891 || (SSE_REGNO_P (REGNO (dst))
4892 && standard_sse_constant_p (src, GET_MODE (dst)) != 1)
4893 || (STACK_REGNO_P (REGNO (dst))
4894 && standard_80387_constant_p (src) < 1))
4895 return false;
4896
4897 return true;
4898 }
4899
4900 /* Returns true if OP contains a symbol reference */
4901
4902 bool
4903 symbolic_reference_mentioned_p (rtx op)
4904 {
4905 const char *fmt;
4906 int i;
4907
4908 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4909 return true;
4910
4911 fmt = GET_RTX_FORMAT (GET_CODE (op));
4912 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4913 {
4914 if (fmt[i] == 'E')
4915 {
4916 int j;
4917
4918 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4919 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4920 return true;
4921 }
4922
4923 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4924 return true;
4925 }
4926
4927 return false;
4928 }
4929
4930 /* Return true if it is appropriate to emit `ret' instructions in the
4931 body of a function. Do this only if the epilogue is simple, needing a
4932 couple of insns. Prior to reloading, we can't tell how many registers
4933 must be saved, so return false then. Return false if there is no frame
4934 marker to de-allocate. */
4935
4936 bool
4937 ix86_can_use_return_insn_p (void)
4938 {
4939 if (ix86_function_naked (current_function_decl))
4940 return false;
4941
4942 /* Don't use `ret' instruction in interrupt handler. */
4943 if (! reload_completed
4944 || frame_pointer_needed
4945 || cfun->machine->func_type != TYPE_NORMAL)
4946 return 0;
4947
4948 /* Don't allow more than 32k pop, since that's all we can do
4949 with one instruction. */
4950 if (crtl->args.pops_args && crtl->args.size >= 32768)
4951 return 0;
4952
4953 struct ix86_frame &frame = cfun->machine->frame;
4954 return (frame.stack_pointer_offset == UNITS_PER_WORD
4955 && (frame.nregs + frame.nsseregs) == 0);
4956 }
4957 \f
4958 /* Return stack frame size. get_frame_size () returns used stack slots
4959 during compilation, which may be optimized out later. If stack frame
4960 is needed, stack_frame_required should be true. */
4961
4962 static HOST_WIDE_INT
4963 ix86_get_frame_size (void)
4964 {
4965 if (cfun->machine->stack_frame_required)
4966 return get_frame_size ();
4967 else
4968 return 0;
4969 }
4970
4971 /* Value should be nonzero if functions must have frame pointers.
4972 Zero means the frame pointer need not be set up (and parms may
4973 be accessed via the stack pointer) in functions that seem suitable. */
4974
4975 static bool
4976 ix86_frame_pointer_required (void)
4977 {
4978 /* If we accessed previous frames, then the generated code expects
4979 to be able to access the saved ebp value in our frame. */
4980 if (cfun->machine->accesses_prev_frame)
4981 return true;
4982
4983 /* Several x86 os'es need a frame pointer for other reasons,
4984 usually pertaining to setjmp. */
4985 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4986 return true;
4987
4988 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
4989 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
4990 return true;
4991
4992 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
4993 allocation is 4GB. */
4994 if (TARGET_64BIT_MS_ABI && ix86_get_frame_size () > SEH_MAX_FRAME_SIZE)
4995 return true;
4996
4997 /* SSE saves require frame-pointer when stack is misaligned. */
4998 if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128)
4999 return true;
5000
5001 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
5002 turns off the frame pointer by default. Turn it back on now if
5003 we've not got a leaf function. */
5004 if (TARGET_OMIT_LEAF_FRAME_POINTER
5005 && (!crtl->is_leaf
5006 || ix86_current_function_calls_tls_descriptor))
5007 return true;
5008
5009 if (crtl->profile && !flag_fentry)
5010 return true;
5011
5012 return false;
5013 }
5014
5015 /* Record that the current function accesses previous call frames. */
5016
5017 void
5018 ix86_setup_frame_addresses (void)
5019 {
5020 cfun->machine->accesses_prev_frame = 1;
5021 }
5022 \f
5023 #ifndef USE_HIDDEN_LINKONCE
5024 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
5025 # define USE_HIDDEN_LINKONCE 1
5026 # else
5027 # define USE_HIDDEN_LINKONCE 0
5028 # endif
5029 #endif
5030
5031 /* Label count for call and return thunks. It is used to make unique
5032 labels in call and return thunks. */
5033 static int indirectlabelno;
5034
5035 /* True if call thunk function is needed. */
5036 static bool indirect_thunk_needed = false;
5037
5038 /* Bit masks of integer registers, which contain branch target, used
5039 by call thunk functions. */
5040 static int indirect_thunks_used;
5041
5042 /* True if return thunk function is needed. */
5043 static bool indirect_return_needed = false;
5044
5045 /* True if return thunk function via CX is needed. */
5046 static bool indirect_return_via_cx;
5047
5048 #ifndef INDIRECT_LABEL
5049 # define INDIRECT_LABEL "LIND"
5050 #endif
5051
5052 /* Indicate what prefix is needed for an indirect branch. */
5053 enum indirect_thunk_prefix
5054 {
5055 indirect_thunk_prefix_none,
5056 indirect_thunk_prefix_nt
5057 };
5058
5059 /* Return the prefix needed for an indirect branch INSN. */
5060
5061 enum indirect_thunk_prefix
5062 indirect_thunk_need_prefix (rtx_insn *insn)
5063 {
5064 enum indirect_thunk_prefix need_prefix;
5065 if ((cfun->machine->indirect_branch_type
5066 == indirect_branch_thunk_extern)
5067 && ix86_notrack_prefixed_insn_p (insn))
5068 {
5069 /* NOTRACK prefix is only used with external thunk so that it
5070 can be properly updated to support CET at run-time. */
5071 need_prefix = indirect_thunk_prefix_nt;
5072 }
5073 else
5074 need_prefix = indirect_thunk_prefix_none;
5075 return need_prefix;
5076 }
5077
5078 /* Fills in the label name that should be used for the indirect thunk. */
5079
5080 static void
5081 indirect_thunk_name (char name[32], unsigned int regno,
5082 enum indirect_thunk_prefix need_prefix,
5083 bool ret_p)
5084 {
5085 if (regno != INVALID_REGNUM && regno != CX_REG && ret_p)
5086 gcc_unreachable ();
5087
5088 if (USE_HIDDEN_LINKONCE)
5089 {
5090 const char *prefix;
5091
5092 if (need_prefix == indirect_thunk_prefix_nt
5093 && regno != INVALID_REGNUM)
5094 {
5095 /* NOTRACK prefix is only used with external thunk via
5096 register so that NOTRACK prefix can be added to indirect
5097 branch via register to support CET at run-time. */
5098 prefix = "_nt";
5099 }
5100 else
5101 prefix = "";
5102
5103 const char *ret = ret_p ? "return" : "indirect";
5104
5105 if (regno != INVALID_REGNUM)
5106 {
5107 const char *reg_prefix;
5108 if (LEGACY_INT_REGNO_P (regno))
5109 reg_prefix = TARGET_64BIT ? "r" : "e";
5110 else
5111 reg_prefix = "";
5112 sprintf (name, "__x86_%s_thunk%s_%s%s",
5113 ret, prefix, reg_prefix, reg_names[regno]);
5114 }
5115 else
5116 sprintf (name, "__x86_%s_thunk%s", ret, prefix);
5117 }
5118 else
5119 {
5120 if (regno != INVALID_REGNUM)
5121 ASM_GENERATE_INTERNAL_LABEL (name, "LITR", regno);
5122 else
5123 {
5124 if (ret_p)
5125 ASM_GENERATE_INTERNAL_LABEL (name, "LRT", 0);
5126 else
5127 ASM_GENERATE_INTERNAL_LABEL (name, "LIT", 0);
5128 }
5129 }
5130 }
5131
5132 /* Output a call and return thunk for indirect branch. If REGNO != -1,
5133 the function address is in REGNO and the call and return thunk looks like:
5134
5135 call L2
5136 L1:
5137 pause
5138 lfence
5139 jmp L1
5140 L2:
5141 mov %REG, (%sp)
5142 ret
5143
5144 Otherwise, the function address is on the top of stack and the
5145 call and return thunk looks like:
5146
5147 call L2
5148 L1:
5149 pause
5150 lfence
5151 jmp L1
5152 L2:
5153 lea WORD_SIZE(%sp), %sp
5154 ret
5155 */
5156
5157 static void
5158 output_indirect_thunk (unsigned int regno)
5159 {
5160 char indirectlabel1[32];
5161 char indirectlabel2[32];
5162
5163 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, INDIRECT_LABEL,
5164 indirectlabelno++);
5165 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, INDIRECT_LABEL,
5166 indirectlabelno++);
5167
5168 /* Call */
5169 fputs ("\tcall\t", asm_out_file);
5170 assemble_name_raw (asm_out_file, indirectlabel2);
5171 fputc ('\n', asm_out_file);
5172
5173 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
5174
5175 /* AMD and Intel CPUs prefer each a different instruction as loop filler.
5176 Usage of both pause + lfence is compromise solution. */
5177 fprintf (asm_out_file, "\tpause\n\tlfence\n");
5178
5179 /* Jump. */
5180 fputs ("\tjmp\t", asm_out_file);
5181 assemble_name_raw (asm_out_file, indirectlabel1);
5182 fputc ('\n', asm_out_file);
5183
5184 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
5185
5186 /* The above call insn pushed a word to stack. Adjust CFI info. */
5187 if (flag_asynchronous_unwind_tables && dwarf2out_do_frame ())
5188 {
5189 if (! dwarf2out_do_cfi_asm ())
5190 {
5191 dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
5192 xcfi->dw_cfi_opc = DW_CFA_advance_loc4;
5193 xcfi->dw_cfi_oprnd1.dw_cfi_addr = ggc_strdup (indirectlabel2);
5194 vec_safe_push (cfun->fde->dw_fde_cfi, xcfi);
5195 }
5196 dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
5197 xcfi->dw_cfi_opc = DW_CFA_def_cfa_offset;
5198 xcfi->dw_cfi_oprnd1.dw_cfi_offset = 2 * UNITS_PER_WORD;
5199 vec_safe_push (cfun->fde->dw_fde_cfi, xcfi);
5200 dwarf2out_emit_cfi (xcfi);
5201 }
5202
5203 if (regno != INVALID_REGNUM)
5204 {
5205 /* MOV. */
5206 rtx xops[2];
5207 xops[0] = gen_rtx_MEM (word_mode, stack_pointer_rtx);
5208 xops[1] = gen_rtx_REG (word_mode, regno);
5209 output_asm_insn ("mov\t{%1, %0|%0, %1}", xops);
5210 }
5211 else
5212 {
5213 /* LEA. */
5214 rtx xops[2];
5215 xops[0] = stack_pointer_rtx;
5216 xops[1] = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
5217 output_asm_insn ("lea\t{%E1, %0|%0, %E1}", xops);
5218 }
5219
5220 fputs ("\tret\n", asm_out_file);
5221 }
5222
5223 /* Output a funtion with a call and return thunk for indirect branch.
5224 If REGNO != INVALID_REGNUM, the function address is in REGNO.
5225 Otherwise, the function address is on the top of stack. Thunk is
5226 used for function return if RET_P is true. */
5227
5228 static void
5229 output_indirect_thunk_function (enum indirect_thunk_prefix need_prefix,
5230 unsigned int regno, bool ret_p)
5231 {
5232 char name[32];
5233 tree decl;
5234
5235 /* Create __x86_indirect_thunk. */
5236 indirect_thunk_name (name, regno, need_prefix, ret_p);
5237 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
5238 get_identifier (name),
5239 build_function_type_list (void_type_node, NULL_TREE));
5240 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
5241 NULL_TREE, void_type_node);
5242 TREE_PUBLIC (decl) = 1;
5243 TREE_STATIC (decl) = 1;
5244 DECL_IGNORED_P (decl) = 1;
5245
5246 #if TARGET_MACHO
5247 if (TARGET_MACHO)
5248 {
5249 switch_to_section (darwin_sections[picbase_thunk_section]);
5250 fputs ("\t.weak_definition\t", asm_out_file);
5251 assemble_name (asm_out_file, name);
5252 fputs ("\n\t.private_extern\t", asm_out_file);
5253 assemble_name (asm_out_file, name);
5254 putc ('\n', asm_out_file);
5255 ASM_OUTPUT_LABEL (asm_out_file, name);
5256 DECL_WEAK (decl) = 1;
5257 }
5258 else
5259 #endif
5260 if (USE_HIDDEN_LINKONCE)
5261 {
5262 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
5263
5264 targetm.asm_out.unique_section (decl, 0);
5265 switch_to_section (get_named_section (decl, NULL, 0));
5266
5267 targetm.asm_out.globalize_label (asm_out_file, name);
5268 fputs ("\t.hidden\t", asm_out_file);
5269 assemble_name (asm_out_file, name);
5270 putc ('\n', asm_out_file);
5271 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5272 }
5273 else
5274 {
5275 switch_to_section (text_section);
5276 ASM_OUTPUT_LABEL (asm_out_file, name);
5277 }
5278
5279 DECL_INITIAL (decl) = make_node (BLOCK);
5280 current_function_decl = decl;
5281 allocate_struct_function (decl, false);
5282 init_function_start (decl);
5283 /* We're about to hide the function body from callees of final_* by
5284 emitting it directly; tell them we're a thunk, if they care. */
5285 cfun->is_thunk = true;
5286 first_function_block_is_cold = false;
5287 /* Make sure unwind info is emitted for the thunk if needed. */
5288 final_start_function (emit_barrier (), asm_out_file, 1);
5289
5290 output_indirect_thunk (regno);
5291
5292 final_end_function ();
5293 init_insn_lengths ();
5294 free_after_compilation (cfun);
5295 set_cfun (NULL);
5296 current_function_decl = NULL;
5297 }
5298
5299 static int pic_labels_used;
5300
5301 /* Fills in the label name that should be used for a pc thunk for
5302 the given register. */
5303
5304 static void
5305 get_pc_thunk_name (char name[32], unsigned int regno)
5306 {
5307 gcc_assert (!TARGET_64BIT);
5308
5309 if (USE_HIDDEN_LINKONCE)
5310 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
5311 else
5312 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5313 }
5314
5315
5316 /* This function generates code for -fpic that loads %ebx with
5317 the return address of the caller and then returns. */
5318
5319 static void
5320 ix86_code_end (void)
5321 {
5322 rtx xops[2];
5323 unsigned int regno;
5324
5325 if (indirect_return_needed)
5326 output_indirect_thunk_function (indirect_thunk_prefix_none,
5327 INVALID_REGNUM, true);
5328 if (indirect_return_via_cx)
5329 output_indirect_thunk_function (indirect_thunk_prefix_none,
5330 CX_REG, true);
5331 if (indirect_thunk_needed)
5332 output_indirect_thunk_function (indirect_thunk_prefix_none,
5333 INVALID_REGNUM, false);
5334
5335 for (regno = FIRST_REX_INT_REG; regno <= LAST_REX_INT_REG; regno++)
5336 {
5337 unsigned int i = regno - FIRST_REX_INT_REG + LAST_INT_REG + 1;
5338 if ((indirect_thunks_used & (1 << i)))
5339 output_indirect_thunk_function (indirect_thunk_prefix_none,
5340 regno, false);
5341 }
5342
5343 for (regno = FIRST_INT_REG; regno <= LAST_INT_REG; regno++)
5344 {
5345 char name[32];
5346 tree decl;
5347
5348 if ((indirect_thunks_used & (1 << regno)))
5349 output_indirect_thunk_function (indirect_thunk_prefix_none,
5350 regno, false);
5351
5352 if (!(pic_labels_used & (1 << regno)))
5353 continue;
5354
5355 get_pc_thunk_name (name, regno);
5356
5357 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
5358 get_identifier (name),
5359 build_function_type_list (void_type_node, NULL_TREE));
5360 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
5361 NULL_TREE, void_type_node);
5362 TREE_PUBLIC (decl) = 1;
5363 TREE_STATIC (decl) = 1;
5364 DECL_IGNORED_P (decl) = 1;
5365
5366 #if TARGET_MACHO
5367 if (TARGET_MACHO)
5368 {
5369 switch_to_section (darwin_sections[picbase_thunk_section]);
5370 fputs ("\t.weak_definition\t", asm_out_file);
5371 assemble_name (asm_out_file, name);
5372 fputs ("\n\t.private_extern\t", asm_out_file);
5373 assemble_name (asm_out_file, name);
5374 putc ('\n', asm_out_file);
5375 ASM_OUTPUT_LABEL (asm_out_file, name);
5376 DECL_WEAK (decl) = 1;
5377 }
5378 else
5379 #endif
5380 if (USE_HIDDEN_LINKONCE)
5381 {
5382 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
5383
5384 targetm.asm_out.unique_section (decl, 0);
5385 switch_to_section (get_named_section (decl, NULL, 0));
5386
5387 targetm.asm_out.globalize_label (asm_out_file, name);
5388 fputs ("\t.hidden\t", asm_out_file);
5389 assemble_name (asm_out_file, name);
5390 putc ('\n', asm_out_file);
5391 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5392 }
5393 else
5394 {
5395 switch_to_section (text_section);
5396 ASM_OUTPUT_LABEL (asm_out_file, name);
5397 }
5398
5399 DECL_INITIAL (decl) = make_node (BLOCK);
5400 current_function_decl = decl;
5401 allocate_struct_function (decl, false);
5402 init_function_start (decl);
5403 /* We're about to hide the function body from callees of final_* by
5404 emitting it directly; tell them we're a thunk, if they care. */
5405 cfun->is_thunk = true;
5406 first_function_block_is_cold = false;
5407 /* Make sure unwind info is emitted for the thunk if needed. */
5408 final_start_function (emit_barrier (), asm_out_file, 1);
5409
5410 /* Pad stack IP move with 4 instructions (two NOPs count
5411 as one instruction). */
5412 if (TARGET_PAD_SHORT_FUNCTION)
5413 {
5414 int i = 8;
5415
5416 while (i--)
5417 fputs ("\tnop\n", asm_out_file);
5418 }
5419
5420 xops[0] = gen_rtx_REG (Pmode, regno);
5421 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
5422 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
5423 output_asm_insn ("%!ret", NULL);
5424 final_end_function ();
5425 init_insn_lengths ();
5426 free_after_compilation (cfun);
5427 set_cfun (NULL);
5428 current_function_decl = NULL;
5429 }
5430
5431 if (flag_split_stack)
5432 file_end_indicate_split_stack ();
5433 }
5434
5435 /* Emit code for the SET_GOT patterns. */
5436
5437 const char *
5438 output_set_got (rtx dest, rtx label)
5439 {
5440 rtx xops[3];
5441
5442 xops[0] = dest;
5443
5444 if (TARGET_VXWORKS_RTP && flag_pic)
5445 {
5446 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5447 xops[2] = gen_rtx_MEM (Pmode,
5448 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5449 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5450
5451 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5452 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5453 an unadorned address. */
5454 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5455 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5456 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5457 return "";
5458 }
5459
5460 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5461
5462 if (flag_pic)
5463 {
5464 char name[32];
5465 get_pc_thunk_name (name, REGNO (dest));
5466 pic_labels_used |= 1 << REGNO (dest);
5467
5468 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5469 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5470 output_asm_insn ("%!call\t%X2", xops);
5471
5472 #if TARGET_MACHO
5473 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
5474 This is what will be referenced by the Mach-O PIC subsystem. */
5475 if (machopic_should_output_picbase_label () || !label)
5476 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
5477
5478 /* When we are restoring the pic base at the site of a nonlocal label,
5479 and we decided to emit the pic base above, we will still output a
5480 local label used for calculating the correction offset (even though
5481 the offset will be 0 in that case). */
5482 if (label)
5483 targetm.asm_out.internal_label (asm_out_file, "L",
5484 CODE_LABEL_NUMBER (label));
5485 #endif
5486 }
5487 else
5488 {
5489 if (TARGET_MACHO)
5490 /* We don't need a pic base, we're not producing pic. */
5491 gcc_unreachable ();
5492
5493 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5494 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
5495 targetm.asm_out.internal_label (asm_out_file, "L",
5496 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5497 }
5498
5499 if (!TARGET_MACHO)
5500 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
5501
5502 return "";
5503 }
5504
5505 /* Generate an "push" pattern for input ARG. */
5506
5507 rtx
5508 gen_push (rtx arg)
5509 {
5510 struct machine_function *m = cfun->machine;
5511
5512 if (m->fs.cfa_reg == stack_pointer_rtx)
5513 m->fs.cfa_offset += UNITS_PER_WORD;
5514 m->fs.sp_offset += UNITS_PER_WORD;
5515
5516 if (REG_P (arg) && GET_MODE (arg) != word_mode)
5517 arg = gen_rtx_REG (word_mode, REGNO (arg));
5518
5519 return gen_rtx_SET (gen_rtx_MEM (word_mode,
5520 gen_rtx_PRE_DEC (Pmode,
5521 stack_pointer_rtx)),
5522 arg);
5523 }
5524
5525 /* Generate an "pop" pattern for input ARG. */
5526
5527 rtx
5528 gen_pop (rtx arg)
5529 {
5530 if (REG_P (arg) && GET_MODE (arg) != word_mode)
5531 arg = gen_rtx_REG (word_mode, REGNO (arg));
5532
5533 return gen_rtx_SET (arg,
5534 gen_rtx_MEM (word_mode,
5535 gen_rtx_POST_INC (Pmode,
5536 stack_pointer_rtx)));
5537 }
5538
5539 /* Return >= 0 if there is an unused call-clobbered register available
5540 for the entire function. */
5541
5542 static unsigned int
5543 ix86_select_alt_pic_regnum (void)
5544 {
5545 if (ix86_use_pseudo_pic_reg ())
5546 return INVALID_REGNUM;
5547
5548 if (crtl->is_leaf
5549 && !crtl->profile
5550 && !ix86_current_function_calls_tls_descriptor)
5551 {
5552 int i, drap;
5553 /* Can't use the same register for both PIC and DRAP. */
5554 if (crtl->drap_reg)
5555 drap = REGNO (crtl->drap_reg);
5556 else
5557 drap = -1;
5558 for (i = 2; i >= 0; --i)
5559 if (i != drap && !df_regs_ever_live_p (i))
5560 return i;
5561 }
5562
5563 return INVALID_REGNUM;
5564 }
5565
5566 /* Return true if REGNO is used by the epilogue. */
5567
5568 bool
5569 ix86_epilogue_uses (int regno)
5570 {
5571 /* If there are no caller-saved registers, we preserve all registers,
5572 except for MMX and x87 registers which aren't supported when saving
5573 and restoring registers. Don't explicitly save SP register since
5574 it is always preserved. */
5575 return (epilogue_completed
5576 && cfun->machine->no_caller_saved_registers
5577 && !fixed_regs[regno]
5578 && !STACK_REGNO_P (regno)
5579 && !MMX_REGNO_P (regno));
5580 }
5581
5582 /* Return nonzero if register REGNO can be used as a scratch register
5583 in peephole2. */
5584
5585 static bool
5586 ix86_hard_regno_scratch_ok (unsigned int regno)
5587 {
5588 /* If there are no caller-saved registers, we can't use any register
5589 as a scratch register after epilogue and use REGNO as scratch
5590 register only if it has been used before to avoid saving and
5591 restoring it. */
5592 return (!cfun->machine->no_caller_saved_registers
5593 || (!epilogue_completed
5594 && df_regs_ever_live_p (regno)));
5595 }
5596
5597 /* Return TRUE if we need to save REGNO. */
5598
5599 bool
5600 ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined)
5601 {
5602 /* If there are no caller-saved registers, we preserve all registers,
5603 except for MMX and x87 registers which aren't supported when saving
5604 and restoring registers. Don't explicitly save SP register since
5605 it is always preserved. */
5606 if (cfun->machine->no_caller_saved_registers)
5607 {
5608 /* Don't preserve registers used for function return value. */
5609 rtx reg = crtl->return_rtx;
5610 if (reg)
5611 {
5612 unsigned int i = REGNO (reg);
5613 unsigned int nregs = REG_NREGS (reg);
5614 while (nregs-- > 0)
5615 if ((i + nregs) == regno)
5616 return false;
5617 }
5618
5619 return (df_regs_ever_live_p (regno)
5620 && !fixed_regs[regno]
5621 && !STACK_REGNO_P (regno)
5622 && !MMX_REGNO_P (regno)
5623 && (regno != HARD_FRAME_POINTER_REGNUM
5624 || !frame_pointer_needed));
5625 }
5626
5627 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
5628 && pic_offset_table_rtx)
5629 {
5630 if (ix86_use_pseudo_pic_reg ())
5631 {
5632 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
5633 _mcount in prologue. */
5634 if (!TARGET_64BIT && flag_pic && crtl->profile)
5635 return true;
5636 }
5637 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
5638 || crtl->profile
5639 || crtl->calls_eh_return
5640 || crtl->uses_const_pool
5641 || cfun->has_nonlocal_label)
5642 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
5643 }
5644
5645 if (crtl->calls_eh_return && maybe_eh_return)
5646 {
5647 unsigned i;
5648 for (i = 0; ; i++)
5649 {
5650 unsigned test = EH_RETURN_DATA_REGNO (i);
5651 if (test == INVALID_REGNUM)
5652 break;
5653 if (test == regno)
5654 return true;
5655 }
5656 }
5657
5658 if (ignore_outlined && cfun->machine->call_ms2sysv)
5659 {
5660 unsigned count = cfun->machine->call_ms2sysv_extra_regs
5661 + xlogue_layout::MIN_REGS;
5662 if (xlogue_layout::is_stub_managed_reg (regno, count))
5663 return false;
5664 }
5665
5666 if (crtl->drap_reg
5667 && regno == REGNO (crtl->drap_reg)
5668 && !cfun->machine->no_drap_save_restore)
5669 return true;
5670
5671 return (df_regs_ever_live_p (regno)
5672 && !call_used_regs[regno]
5673 && !fixed_regs[regno]
5674 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5675 }
5676
5677 /* Return number of saved general prupose registers. */
5678
5679 static int
5680 ix86_nsaved_regs (void)
5681 {
5682 int nregs = 0;
5683 int regno;
5684
5685 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5686 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
5687 nregs ++;
5688 return nregs;
5689 }
5690
5691 /* Return number of saved SSE registers. */
5692
5693 static int
5694 ix86_nsaved_sseregs (void)
5695 {
5696 int nregs = 0;
5697 int regno;
5698
5699 if (!TARGET_64BIT_MS_ABI)
5700 return 0;
5701 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5702 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
5703 nregs ++;
5704 return nregs;
5705 }
5706
5707 /* Given FROM and TO register numbers, say whether this elimination is
5708 allowed. If stack alignment is needed, we can only replace argument
5709 pointer with hard frame pointer, or replace frame pointer with stack
5710 pointer. Otherwise, frame pointer elimination is automatically
5711 handled and all other eliminations are valid. */
5712
5713 static bool
5714 ix86_can_eliminate (const int from, const int to)
5715 {
5716 if (stack_realign_fp)
5717 return ((from == ARG_POINTER_REGNUM
5718 && to == HARD_FRAME_POINTER_REGNUM)
5719 || (from == FRAME_POINTER_REGNUM
5720 && to == STACK_POINTER_REGNUM));
5721 else
5722 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
5723 }
5724
5725 /* Return the offset between two registers, one to be eliminated, and the other
5726 its replacement, at the start of a routine. */
5727
5728 HOST_WIDE_INT
5729 ix86_initial_elimination_offset (int from, int to)
5730 {
5731 struct ix86_frame &frame = cfun->machine->frame;
5732
5733 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5734 return frame.hard_frame_pointer_offset;
5735 else if (from == FRAME_POINTER_REGNUM
5736 && to == HARD_FRAME_POINTER_REGNUM)
5737 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5738 else
5739 {
5740 gcc_assert (to == STACK_POINTER_REGNUM);
5741
5742 if (from == ARG_POINTER_REGNUM)
5743 return frame.stack_pointer_offset;
5744
5745 gcc_assert (from == FRAME_POINTER_REGNUM);
5746 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5747 }
5748 }
5749
5750 /* Emits a warning for unsupported msabi to sysv pro/epilogues. */
5751 void warn_once_call_ms2sysv_xlogues (const char *feature)
5752 {
5753 static bool warned_once = false;
5754 if (!warned_once)
5755 {
5756 warning (0, "%<-mcall-ms2sysv-xlogues%> is not compatible with %s",
5757 feature);
5758 warned_once = true;
5759 }
5760 }
5761
5762 /* Return the probing interval for -fstack-clash-protection. */
5763
5764 static HOST_WIDE_INT
5765 get_probe_interval (void)
5766 {
5767 if (flag_stack_clash_protection)
5768 return (HOST_WIDE_INT_1U
5769 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL));
5770 else
5771 return (HOST_WIDE_INT_1U << STACK_CHECK_PROBE_INTERVAL_EXP);
5772 }
5773
5774 /* When using -fsplit-stack, the allocation routines set a field in
5775 the TCB to the bottom of the stack plus this much space, measured
5776 in bytes. */
5777
5778 #define SPLIT_STACK_AVAILABLE 256
5779
5780 /* Fill structure ix86_frame about frame of currently computed function. */
5781
5782 static void
5783 ix86_compute_frame_layout (void)
5784 {
5785 struct ix86_frame *frame = &cfun->machine->frame;
5786 struct machine_function *m = cfun->machine;
5787 unsigned HOST_WIDE_INT stack_alignment_needed;
5788 HOST_WIDE_INT offset;
5789 unsigned HOST_WIDE_INT preferred_alignment;
5790 HOST_WIDE_INT size = ix86_get_frame_size ();
5791 HOST_WIDE_INT to_allocate;
5792
5793 /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
5794 * ms_abi functions that call a sysv function. We now need to prune away
5795 * cases where it should be disabled. */
5796 if (TARGET_64BIT && m->call_ms2sysv)
5797 {
5798 gcc_assert (TARGET_64BIT_MS_ABI);
5799 gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES);
5800 gcc_assert (!TARGET_SEH);
5801 gcc_assert (TARGET_SSE);
5802 gcc_assert (!ix86_using_red_zone ());
5803
5804 if (crtl->calls_eh_return)
5805 {
5806 gcc_assert (!reload_completed);
5807 m->call_ms2sysv = false;
5808 warn_once_call_ms2sysv_xlogues ("__builtin_eh_return");
5809 }
5810
5811 else if (ix86_static_chain_on_stack)
5812 {
5813 gcc_assert (!reload_completed);
5814 m->call_ms2sysv = false;
5815 warn_once_call_ms2sysv_xlogues ("static call chains");
5816 }
5817
5818 /* Finally, compute which registers the stub will manage. */
5819 else
5820 {
5821 unsigned count = xlogue_layout::count_stub_managed_regs ();
5822 m->call_ms2sysv_extra_regs = count - xlogue_layout::MIN_REGS;
5823 m->call_ms2sysv_pad_in = 0;
5824 }
5825 }
5826
5827 frame->nregs = ix86_nsaved_regs ();
5828 frame->nsseregs = ix86_nsaved_sseregs ();
5829
5830 /* 64-bit MS ABI seem to require stack alignment to be always 16,
5831 except for function prologues, leaf functions and when the defult
5832 incoming stack boundary is overriden at command line or via
5833 force_align_arg_pointer attribute.
5834
5835 Darwin's ABI specifies 128b alignment for both 32 and 64 bit variants
5836 at call sites, including profile function calls.
5837 */
5838 if (((TARGET_64BIT_MS_ABI || TARGET_MACHO)
5839 && crtl->preferred_stack_boundary < 128)
5840 && (!crtl->is_leaf || cfun->calls_alloca != 0
5841 || ix86_current_function_calls_tls_descriptor
5842 || (TARGET_MACHO && crtl->profile)
5843 || ix86_incoming_stack_boundary < 128))
5844 {
5845 crtl->preferred_stack_boundary = 128;
5846 crtl->stack_alignment_needed = 128;
5847 }
5848
5849 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
5850 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
5851
5852 gcc_assert (!size || stack_alignment_needed);
5853 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5854 gcc_assert (preferred_alignment <= stack_alignment_needed);
5855
5856 /* The only ABI saving SSE regs should be 64-bit ms_abi. */
5857 gcc_assert (TARGET_64BIT || !frame->nsseregs);
5858 if (TARGET_64BIT && m->call_ms2sysv)
5859 {
5860 gcc_assert (stack_alignment_needed >= 16);
5861 gcc_assert (!frame->nsseregs);
5862 }
5863
5864 /* For SEH we have to limit the amount of code movement into the prologue.
5865 At present we do this via a BLOCKAGE, at which point there's very little
5866 scheduling that can be done, which means that there's very little point
5867 in doing anything except PUSHs. */
5868 if (TARGET_SEH)
5869 m->use_fast_prologue_epilogue = false;
5870 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun)))
5871 {
5872 int count = frame->nregs;
5873 struct cgraph_node *node = cgraph_node::get (current_function_decl);
5874
5875 /* The fast prologue uses move instead of push to save registers. This
5876 is significantly longer, but also executes faster as modern hardware
5877 can execute the moves in parallel, but can't do that for push/pop.
5878
5879 Be careful about choosing what prologue to emit: When function takes
5880 many instructions to execute we may use slow version as well as in
5881 case function is known to be outside hot spot (this is known with
5882 feedback only). Weight the size of function by number of registers
5883 to save as it is cheap to use one or two push instructions but very
5884 slow to use many of them. */
5885 if (count)
5886 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5887 if (node->frequency < NODE_FREQUENCY_NORMAL
5888 || (flag_branch_probabilities
5889 && node->frequency < NODE_FREQUENCY_HOT))
5890 m->use_fast_prologue_epilogue = false;
5891 else
5892 m->use_fast_prologue_epilogue
5893 = !expensive_function_p (count);
5894 }
5895
5896 frame->save_regs_using_mov
5897 = (TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue
5898 /* If static stack checking is enabled and done with probes,
5899 the registers need to be saved before allocating the frame. */
5900 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
5901
5902 /* Skip return address and error code in exception handler. */
5903 offset = INCOMING_FRAME_SP_OFFSET;
5904
5905 /* Skip pushed static chain. */
5906 if (ix86_static_chain_on_stack)
5907 offset += UNITS_PER_WORD;
5908
5909 /* Skip saved base pointer. */
5910 if (frame_pointer_needed)
5911 offset += UNITS_PER_WORD;
5912 frame->hfp_save_offset = offset;
5913
5914 /* The traditional frame pointer location is at the top of the frame. */
5915 frame->hard_frame_pointer_offset = offset;
5916
5917 /* Register save area */
5918 offset += frame->nregs * UNITS_PER_WORD;
5919 frame->reg_save_offset = offset;
5920
5921 /* On SEH target, registers are pushed just before the frame pointer
5922 location. */
5923 if (TARGET_SEH)
5924 frame->hard_frame_pointer_offset = offset;
5925
5926 /* Calculate the size of the va-arg area (not including padding, if any). */
5927 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
5928
5929 /* Also adjust stack_realign_offset for the largest alignment of
5930 stack slot actually used. */
5931 if (stack_realign_fp
5932 || (cfun->machine->max_used_stack_alignment != 0
5933 && (offset % cfun->machine->max_used_stack_alignment) != 0))
5934 {
5935 /* We may need a 16-byte aligned stack for the remainder of the
5936 register save area, but the stack frame for the local function
5937 may require a greater alignment if using AVX/2/512. In order
5938 to avoid wasting space, we first calculate the space needed for
5939 the rest of the register saves, add that to the stack pointer,
5940 and then realign the stack to the boundary of the start of the
5941 frame for the local function. */
5942 HOST_WIDE_INT space_needed = 0;
5943 HOST_WIDE_INT sse_reg_space_needed = 0;
5944
5945 if (TARGET_64BIT)
5946 {
5947 if (m->call_ms2sysv)
5948 {
5949 m->call_ms2sysv_pad_in = 0;
5950 space_needed = xlogue_layout::get_instance ().get_stack_space_used ();
5951 }
5952
5953 else if (frame->nsseregs)
5954 /* The only ABI that has saved SSE registers (Win64) also has a
5955 16-byte aligned default stack. However, many programs violate
5956 the ABI, and Wine64 forces stack realignment to compensate. */
5957 space_needed = frame->nsseregs * 16;
5958
5959 sse_reg_space_needed = space_needed = ROUND_UP (space_needed, 16);
5960
5961 /* 64-bit frame->va_arg_size should always be a multiple of 16, but
5962 rounding to be pedantic. */
5963 space_needed = ROUND_UP (space_needed + frame->va_arg_size, 16);
5964 }
5965 else
5966 space_needed = frame->va_arg_size;
5967
5968 /* Record the allocation size required prior to the realignment AND. */
5969 frame->stack_realign_allocate = space_needed;
5970
5971 /* The re-aligned stack starts at frame->stack_realign_offset. Values
5972 before this point are not directly comparable with values below
5973 this point. Use sp_valid_at to determine if the stack pointer is
5974 valid for a given offset, fp_valid_at for the frame pointer, or
5975 choose_baseaddr to have a base register chosen for you.
5976
5977 Note that the result of (frame->stack_realign_offset
5978 & (stack_alignment_needed - 1)) may not equal zero. */
5979 offset = ROUND_UP (offset + space_needed, stack_alignment_needed);
5980 frame->stack_realign_offset = offset - space_needed;
5981 frame->sse_reg_save_offset = frame->stack_realign_offset
5982 + sse_reg_space_needed;
5983 }
5984 else
5985 {
5986 frame->stack_realign_offset = offset;
5987
5988 if (TARGET_64BIT && m->call_ms2sysv)
5989 {
5990 m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD);
5991 offset += xlogue_layout::get_instance ().get_stack_space_used ();
5992 }
5993
5994 /* Align and set SSE register save area. */
5995 else if (frame->nsseregs)
5996 {
5997 /* If the incoming stack boundary is at least 16 bytes, or DRAP is
5998 required and the DRAP re-alignment boundary is at least 16 bytes,
5999 then we want the SSE register save area properly aligned. */
6000 if (ix86_incoming_stack_boundary >= 128
6001 || (stack_realign_drap && stack_alignment_needed >= 16))
6002 offset = ROUND_UP (offset, 16);
6003 offset += frame->nsseregs * 16;
6004 }
6005 frame->sse_reg_save_offset = offset;
6006 offset += frame->va_arg_size;
6007 }
6008
6009 /* Align start of frame for local function. When a function call
6010 is removed, it may become a leaf function. But if argument may
6011 be passed on stack, we need to align the stack when there is no
6012 tail call. */
6013 if (m->call_ms2sysv
6014 || frame->va_arg_size != 0
6015 || size != 0
6016 || !crtl->is_leaf
6017 || (!crtl->tail_call_emit
6018 && cfun->machine->outgoing_args_on_stack)
6019 || cfun->calls_alloca
6020 || ix86_current_function_calls_tls_descriptor)
6021 offset = ROUND_UP (offset, stack_alignment_needed);
6022
6023 /* Frame pointer points here. */
6024 frame->frame_pointer_offset = offset;
6025
6026 offset += size;
6027
6028 /* Add outgoing arguments area. Can be skipped if we eliminated
6029 all the function calls as dead code.
6030 Skipping is however impossible when function calls alloca. Alloca
6031 expander assumes that last crtl->outgoing_args_size
6032 of stack frame are unused. */
6033 if (ACCUMULATE_OUTGOING_ARGS
6034 && (!crtl->is_leaf || cfun->calls_alloca
6035 || ix86_current_function_calls_tls_descriptor))
6036 {
6037 offset += crtl->outgoing_args_size;
6038 frame->outgoing_arguments_size = crtl->outgoing_args_size;
6039 }
6040 else
6041 frame->outgoing_arguments_size = 0;
6042
6043 /* Align stack boundary. Only needed if we're calling another function
6044 or using alloca. */
6045 if (!crtl->is_leaf || cfun->calls_alloca
6046 || ix86_current_function_calls_tls_descriptor)
6047 offset = ROUND_UP (offset, preferred_alignment);
6048
6049 /* We've reached end of stack frame. */
6050 frame->stack_pointer_offset = offset;
6051
6052 /* Size prologue needs to allocate. */
6053 to_allocate = offset - frame->sse_reg_save_offset;
6054
6055 if ((!to_allocate && frame->nregs <= 1)
6056 || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000))
6057 /* If stack clash probing needs a loop, then it needs a
6058 scratch register. But the returned register is only guaranteed
6059 to be safe to use after register saves are complete. So if
6060 stack clash protections are enabled and the allocated frame is
6061 larger than the probe interval, then use pushes to save
6062 callee saved registers. */
6063 || (flag_stack_clash_protection && to_allocate > get_probe_interval ()))
6064 frame->save_regs_using_mov = false;
6065
6066 if (ix86_using_red_zone ()
6067 && crtl->sp_is_unchanging
6068 && crtl->is_leaf
6069 && !ix86_pc_thunk_call_expanded
6070 && !ix86_current_function_calls_tls_descriptor)
6071 {
6072 frame->red_zone_size = to_allocate;
6073 if (frame->save_regs_using_mov)
6074 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
6075 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
6076 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
6077 }
6078 else
6079 frame->red_zone_size = 0;
6080 frame->stack_pointer_offset -= frame->red_zone_size;
6081
6082 /* The SEH frame pointer location is near the bottom of the frame.
6083 This is enforced by the fact that the difference between the
6084 stack pointer and the frame pointer is limited to 240 bytes in
6085 the unwind data structure. */
6086 if (TARGET_SEH)
6087 {
6088 HOST_WIDE_INT diff;
6089
6090 /* If we can leave the frame pointer where it is, do so. Also, returns
6091 the establisher frame for __builtin_frame_address (0). */
6092 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
6093 if (diff <= SEH_MAX_FRAME_SIZE
6094 && (diff > 240 || (diff & 15) != 0)
6095 && !crtl->accesses_prior_frames)
6096 {
6097 /* Ideally we'd determine what portion of the local stack frame
6098 (within the constraint of the lowest 240) is most heavily used.
6099 But without that complication, simply bias the frame pointer
6100 by 128 bytes so as to maximize the amount of the local stack
6101 frame that is addressable with 8-bit offsets. */
6102 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
6103 }
6104 }
6105 }
6106
6107 /* This is semi-inlined memory_address_length, but simplified
6108 since we know that we're always dealing with reg+offset, and
6109 to avoid having to create and discard all that rtl. */
6110
6111 static inline int
6112 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
6113 {
6114 int len = 4;
6115
6116 if (offset == 0)
6117 {
6118 /* EBP and R13 cannot be encoded without an offset. */
6119 len = (regno == BP_REG || regno == R13_REG);
6120 }
6121 else if (IN_RANGE (offset, -128, 127))
6122 len = 1;
6123
6124 /* ESP and R12 must be encoded with a SIB byte. */
6125 if (regno == SP_REG || regno == R12_REG)
6126 len++;
6127
6128 return len;
6129 }
6130
6131 /* Determine if the stack pointer is valid for accessing the CFA_OFFSET in
6132 the frame save area. The register is saved at CFA - CFA_OFFSET. */
6133
6134 static bool
6135 sp_valid_at (HOST_WIDE_INT cfa_offset)
6136 {
6137 const struct machine_frame_state &fs = cfun->machine->fs;
6138 if (fs.sp_realigned && cfa_offset <= fs.sp_realigned_offset)
6139 {
6140 /* Validate that the cfa_offset isn't in a "no-man's land". */
6141 gcc_assert (cfa_offset <= fs.sp_realigned_fp_last);
6142 return false;
6143 }
6144 return fs.sp_valid;
6145 }
6146
6147 /* Determine if the frame pointer is valid for accessing the CFA_OFFSET in
6148 the frame save area. The register is saved at CFA - CFA_OFFSET. */
6149
6150 static inline bool
6151 fp_valid_at (HOST_WIDE_INT cfa_offset)
6152 {
6153 const struct machine_frame_state &fs = cfun->machine->fs;
6154 if (fs.sp_realigned && cfa_offset > fs.sp_realigned_fp_last)
6155 {
6156 /* Validate that the cfa_offset isn't in a "no-man's land". */
6157 gcc_assert (cfa_offset >= fs.sp_realigned_offset);
6158 return false;
6159 }
6160 return fs.fp_valid;
6161 }
6162
6163 /* Choose a base register based upon alignment requested, speed and/or
6164 size. */
6165
6166 static void
6167 choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg,
6168 HOST_WIDE_INT &base_offset,
6169 unsigned int align_reqested, unsigned int *align)
6170 {
6171 const struct machine_function *m = cfun->machine;
6172 unsigned int hfp_align;
6173 unsigned int drap_align;
6174 unsigned int sp_align;
6175 bool hfp_ok = fp_valid_at (cfa_offset);
6176 bool drap_ok = m->fs.drap_valid;
6177 bool sp_ok = sp_valid_at (cfa_offset);
6178
6179 hfp_align = drap_align = sp_align = INCOMING_STACK_BOUNDARY;
6180
6181 /* Filter out any registers that don't meet the requested alignment
6182 criteria. */
6183 if (align_reqested)
6184 {
6185 if (m->fs.realigned)
6186 hfp_align = drap_align = sp_align = crtl->stack_alignment_needed;
6187 /* SEH unwind code does do not currently support REG_CFA_EXPRESSION
6188 notes (which we would need to use a realigned stack pointer),
6189 so disable on SEH targets. */
6190 else if (m->fs.sp_realigned)
6191 sp_align = crtl->stack_alignment_needed;
6192
6193 hfp_ok = hfp_ok && hfp_align >= align_reqested;
6194 drap_ok = drap_ok && drap_align >= align_reqested;
6195 sp_ok = sp_ok && sp_align >= align_reqested;
6196 }
6197
6198 if (m->use_fast_prologue_epilogue)
6199 {
6200 /* Choose the base register most likely to allow the most scheduling
6201 opportunities. Generally FP is valid throughout the function,
6202 while DRAP must be reloaded within the epilogue. But choose either
6203 over the SP due to increased encoding size. */
6204
6205 if (hfp_ok)
6206 {
6207 base_reg = hard_frame_pointer_rtx;
6208 base_offset = m->fs.fp_offset - cfa_offset;
6209 }
6210 else if (drap_ok)
6211 {
6212 base_reg = crtl->drap_reg;
6213 base_offset = 0 - cfa_offset;
6214 }
6215 else if (sp_ok)
6216 {
6217 base_reg = stack_pointer_rtx;
6218 base_offset = m->fs.sp_offset - cfa_offset;
6219 }
6220 }
6221 else
6222 {
6223 HOST_WIDE_INT toffset;
6224 int len = 16, tlen;
6225
6226 /* Choose the base register with the smallest address encoding.
6227 With a tie, choose FP > DRAP > SP. */
6228 if (sp_ok)
6229 {
6230 base_reg = stack_pointer_rtx;
6231 base_offset = m->fs.sp_offset - cfa_offset;
6232 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
6233 }
6234 if (drap_ok)
6235 {
6236 toffset = 0 - cfa_offset;
6237 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
6238 if (tlen <= len)
6239 {
6240 base_reg = crtl->drap_reg;
6241 base_offset = toffset;
6242 len = tlen;
6243 }
6244 }
6245 if (hfp_ok)
6246 {
6247 toffset = m->fs.fp_offset - cfa_offset;
6248 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
6249 if (tlen <= len)
6250 {
6251 base_reg = hard_frame_pointer_rtx;
6252 base_offset = toffset;
6253 }
6254 }
6255 }
6256
6257 /* Set the align return value. */
6258 if (align)
6259 {
6260 if (base_reg == stack_pointer_rtx)
6261 *align = sp_align;
6262 else if (base_reg == crtl->drap_reg)
6263 *align = drap_align;
6264 else if (base_reg == hard_frame_pointer_rtx)
6265 *align = hfp_align;
6266 }
6267 }
6268
6269 /* Return an RTX that points to CFA_OFFSET within the stack frame and
6270 the alignment of address. If ALIGN is non-null, it should point to
6271 an alignment value (in bits) that is preferred or zero and will
6272 recieve the alignment of the base register that was selected,
6273 irrespective of rather or not CFA_OFFSET is a multiple of that
6274 alignment value. If it is possible for the base register offset to be
6275 non-immediate then SCRATCH_REGNO should specify a scratch register to
6276 use.
6277
6278 The valid base registers are taken from CFUN->MACHINE->FS. */
6279
6280 static rtx
6281 choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align,
6282 unsigned int scratch_regno = INVALID_REGNUM)
6283 {
6284 rtx base_reg = NULL;
6285 HOST_WIDE_INT base_offset = 0;
6286
6287 /* If a specific alignment is requested, try to get a base register
6288 with that alignment first. */
6289 if (align && *align)
6290 choose_basereg (cfa_offset, base_reg, base_offset, *align, align);
6291
6292 if (!base_reg)
6293 choose_basereg (cfa_offset, base_reg, base_offset, 0, align);
6294
6295 gcc_assert (base_reg != NULL);
6296
6297 rtx base_offset_rtx = GEN_INT (base_offset);
6298
6299 if (!x86_64_immediate_operand (base_offset_rtx, Pmode))
6300 {
6301 gcc_assert (scratch_regno != INVALID_REGNUM);
6302
6303 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
6304 emit_move_insn (scratch_reg, base_offset_rtx);
6305
6306 return gen_rtx_PLUS (Pmode, base_reg, scratch_reg);
6307 }
6308
6309 return plus_constant (Pmode, base_reg, base_offset);
6310 }
6311
6312 /* Emit code to save registers in the prologue. */
6313
6314 static void
6315 ix86_emit_save_regs (void)
6316 {
6317 unsigned int regno;
6318 rtx_insn *insn;
6319
6320 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
6321 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
6322 {
6323 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
6324 RTX_FRAME_RELATED_P (insn) = 1;
6325 }
6326 }
6327
6328 /* Emit a single register save at CFA - CFA_OFFSET. */
6329
6330 static void
6331 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
6332 HOST_WIDE_INT cfa_offset)
6333 {
6334 struct machine_function *m = cfun->machine;
6335 rtx reg = gen_rtx_REG (mode, regno);
6336 rtx mem, addr, base, insn;
6337 unsigned int align = GET_MODE_ALIGNMENT (mode);
6338
6339 addr = choose_baseaddr (cfa_offset, &align);
6340 mem = gen_frame_mem (mode, addr);
6341
6342 /* The location aligment depends upon the base register. */
6343 align = MIN (GET_MODE_ALIGNMENT (mode), align);
6344 gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
6345 set_mem_align (mem, align);
6346
6347 insn = emit_insn (gen_rtx_SET (mem, reg));
6348 RTX_FRAME_RELATED_P (insn) = 1;
6349
6350 base = addr;
6351 if (GET_CODE (base) == PLUS)
6352 base = XEXP (base, 0);
6353 gcc_checking_assert (REG_P (base));
6354
6355 /* When saving registers into a re-aligned local stack frame, avoid
6356 any tricky guessing by dwarf2out. */
6357 if (m->fs.realigned)
6358 {
6359 gcc_checking_assert (stack_realign_drap);
6360
6361 if (regno == REGNO (crtl->drap_reg))
6362 {
6363 /* A bit of a hack. We force the DRAP register to be saved in
6364 the re-aligned stack frame, which provides us with a copy
6365 of the CFA that will last past the prologue. Install it. */
6366 gcc_checking_assert (cfun->machine->fs.fp_valid);
6367 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
6368 cfun->machine->fs.fp_offset - cfa_offset);
6369 mem = gen_rtx_MEM (mode, addr);
6370 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
6371 }
6372 else
6373 {
6374 /* The frame pointer is a stable reference within the
6375 aligned frame. Use it. */
6376 gcc_checking_assert (cfun->machine->fs.fp_valid);
6377 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
6378 cfun->machine->fs.fp_offset - cfa_offset);
6379 mem = gen_rtx_MEM (mode, addr);
6380 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
6381 }
6382 }
6383
6384 else if (base == stack_pointer_rtx && m->fs.sp_realigned
6385 && cfa_offset >= m->fs.sp_realigned_offset)
6386 {
6387 gcc_checking_assert (stack_realign_fp);
6388 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
6389 }
6390
6391 /* The memory may not be relative to the current CFA register,
6392 which means that we may need to generate a new pattern for
6393 use by the unwind info. */
6394 else if (base != m->fs.cfa_reg)
6395 {
6396 addr = plus_constant (Pmode, m->fs.cfa_reg,
6397 m->fs.cfa_offset - cfa_offset);
6398 mem = gen_rtx_MEM (mode, addr);
6399 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
6400 }
6401 }
6402
6403 /* Emit code to save registers using MOV insns.
6404 First register is stored at CFA - CFA_OFFSET. */
6405 static void
6406 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
6407 {
6408 unsigned int regno;
6409
6410 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6411 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
6412 {
6413 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
6414 cfa_offset -= UNITS_PER_WORD;
6415 }
6416 }
6417
6418 /* Emit code to save SSE registers using MOV insns.
6419 First register is stored at CFA - CFA_OFFSET. */
6420 static void
6421 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
6422 {
6423 unsigned int regno;
6424
6425 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6426 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
6427 {
6428 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
6429 cfa_offset -= GET_MODE_SIZE (V4SFmode);
6430 }
6431 }
6432
6433 static GTY(()) rtx queued_cfa_restores;
6434
6435 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
6436 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
6437 Don't add the note if the previously saved value will be left untouched
6438 within stack red-zone till return, as unwinders can find the same value
6439 in the register and on the stack. */
6440
6441 static void
6442 ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
6443 {
6444 if (!crtl->shrink_wrapped
6445 && cfa_offset <= cfun->machine->fs.red_zone_offset)
6446 return;
6447
6448 if (insn)
6449 {
6450 add_reg_note (insn, REG_CFA_RESTORE, reg);
6451 RTX_FRAME_RELATED_P (insn) = 1;
6452 }
6453 else
6454 queued_cfa_restores
6455 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
6456 }
6457
6458 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
6459
6460 static void
6461 ix86_add_queued_cfa_restore_notes (rtx insn)
6462 {
6463 rtx last;
6464 if (!queued_cfa_restores)
6465 return;
6466 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
6467 ;
6468 XEXP (last, 1) = REG_NOTES (insn);
6469 REG_NOTES (insn) = queued_cfa_restores;
6470 queued_cfa_restores = NULL_RTX;
6471 RTX_FRAME_RELATED_P (insn) = 1;
6472 }
6473
6474 /* Expand prologue or epilogue stack adjustment.
6475 The pattern exist to put a dependency on all ebp-based memory accesses.
6476 STYLE should be negative if instructions should be marked as frame related,
6477 zero if %r11 register is live and cannot be freely used and positive
6478 otherwise. */
6479
6480 static rtx
6481 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
6482 int style, bool set_cfa)
6483 {
6484 struct machine_function *m = cfun->machine;
6485 rtx addend = offset;
6486 rtx insn;
6487 bool add_frame_related_expr = false;
6488
6489 if (!x86_64_immediate_operand (offset, Pmode))
6490 {
6491 /* r11 is used by indirect sibcall return as well, set before the
6492 epilogue and used after the epilogue. */
6493 if (style)
6494 addend = gen_rtx_REG (Pmode, R11_REG);
6495 else
6496 {
6497 gcc_assert (src != hard_frame_pointer_rtx
6498 && dest != hard_frame_pointer_rtx);
6499 addend = hard_frame_pointer_rtx;
6500 }
6501 emit_insn (gen_rtx_SET (addend, offset));
6502 if (style < 0)
6503 add_frame_related_expr = true;
6504 }
6505
6506 insn = emit_insn (gen_pro_epilogue_adjust_stack_add
6507 (Pmode, dest, src, addend));
6508 if (style >= 0)
6509 ix86_add_queued_cfa_restore_notes (insn);
6510
6511 if (set_cfa)
6512 {
6513 rtx r;
6514
6515 gcc_assert (m->fs.cfa_reg == src);
6516 m->fs.cfa_offset += INTVAL (offset);
6517 m->fs.cfa_reg = dest;
6518
6519 r = gen_rtx_PLUS (Pmode, src, offset);
6520 r = gen_rtx_SET (dest, r);
6521 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
6522 RTX_FRAME_RELATED_P (insn) = 1;
6523 }
6524 else if (style < 0)
6525 {
6526 RTX_FRAME_RELATED_P (insn) = 1;
6527 if (add_frame_related_expr)
6528 {
6529 rtx r = gen_rtx_PLUS (Pmode, src, offset);
6530 r = gen_rtx_SET (dest, r);
6531 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
6532 }
6533 }
6534
6535 if (dest == stack_pointer_rtx)
6536 {
6537 HOST_WIDE_INT ooffset = m->fs.sp_offset;
6538 bool valid = m->fs.sp_valid;
6539 bool realigned = m->fs.sp_realigned;
6540
6541 if (src == hard_frame_pointer_rtx)
6542 {
6543 valid = m->fs.fp_valid;
6544 realigned = false;
6545 ooffset = m->fs.fp_offset;
6546 }
6547 else if (src == crtl->drap_reg)
6548 {
6549 valid = m->fs.drap_valid;
6550 realigned = false;
6551 ooffset = 0;
6552 }
6553 else
6554 {
6555 /* Else there are two possibilities: SP itself, which we set
6556 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
6557 taken care of this by hand along the eh_return path. */
6558 gcc_checking_assert (src == stack_pointer_rtx
6559 || offset == const0_rtx);
6560 }
6561
6562 m->fs.sp_offset = ooffset - INTVAL (offset);
6563 m->fs.sp_valid = valid;
6564 m->fs.sp_realigned = realigned;
6565 }
6566 return insn;
6567 }
6568
6569 /* Find an available register to be used as dynamic realign argument
6570 pointer regsiter. Such a register will be written in prologue and
6571 used in begin of body, so it must not be
6572 1. parameter passing register.
6573 2. GOT pointer.
6574 We reuse static-chain register if it is available. Otherwise, we
6575 use DI for i386 and R13 for x86-64. We chose R13 since it has
6576 shorter encoding.
6577
6578 Return: the regno of chosen register. */
6579
6580 static unsigned int
6581 find_drap_reg (void)
6582 {
6583 tree decl = cfun->decl;
6584
6585 /* Always use callee-saved register if there are no caller-saved
6586 registers. */
6587 if (TARGET_64BIT)
6588 {
6589 /* Use R13 for nested function or function need static chain.
6590 Since function with tail call may use any caller-saved
6591 registers in epilogue, DRAP must not use caller-saved
6592 register in such case. */
6593 if (DECL_STATIC_CHAIN (decl)
6594 || cfun->machine->no_caller_saved_registers
6595 || crtl->tail_call_emit)
6596 return R13_REG;
6597
6598 return R10_REG;
6599 }
6600 else
6601 {
6602 /* Use DI for nested function or function need static chain.
6603 Since function with tail call may use any caller-saved
6604 registers in epilogue, DRAP must not use caller-saved
6605 register in such case. */
6606 if (DECL_STATIC_CHAIN (decl)
6607 || cfun->machine->no_caller_saved_registers
6608 || crtl->tail_call_emit)
6609 return DI_REG;
6610
6611 /* Reuse static chain register if it isn't used for parameter
6612 passing. */
6613 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
6614 {
6615 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
6616 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
6617 return CX_REG;
6618 }
6619 return DI_REG;
6620 }
6621 }
6622
6623 /* Return minimum incoming stack alignment. */
6624
6625 static unsigned int
6626 ix86_minimum_incoming_stack_boundary (bool sibcall)
6627 {
6628 unsigned int incoming_stack_boundary;
6629
6630 /* Stack of interrupt handler is aligned to 128 bits in 64bit mode. */
6631 if (cfun->machine->func_type != TYPE_NORMAL)
6632 incoming_stack_boundary = TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY;
6633 /* Prefer the one specified at command line. */
6634 else if (ix86_user_incoming_stack_boundary)
6635 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
6636 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
6637 if -mstackrealign is used, it isn't used for sibcall check and
6638 estimated stack alignment is 128bit. */
6639 else if (!sibcall
6640 && ix86_force_align_arg_pointer
6641 && crtl->stack_alignment_estimated == 128)
6642 incoming_stack_boundary = MIN_STACK_BOUNDARY;
6643 else
6644 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
6645
6646 /* Incoming stack alignment can be changed on individual functions
6647 via force_align_arg_pointer attribute. We use the smallest
6648 incoming stack boundary. */
6649 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
6650 && lookup_attribute ("force_align_arg_pointer",
6651 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
6652 incoming_stack_boundary = MIN_STACK_BOUNDARY;
6653
6654 /* The incoming stack frame has to be aligned at least at
6655 parm_stack_boundary. */
6656 if (incoming_stack_boundary < crtl->parm_stack_boundary)
6657 incoming_stack_boundary = crtl->parm_stack_boundary;
6658
6659 /* Stack at entrance of main is aligned by runtime. We use the
6660 smallest incoming stack boundary. */
6661 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
6662 && DECL_NAME (current_function_decl)
6663 && MAIN_NAME_P (DECL_NAME (current_function_decl))
6664 && DECL_FILE_SCOPE_P (current_function_decl))
6665 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
6666
6667 return incoming_stack_boundary;
6668 }
6669
6670 /* Update incoming stack boundary and estimated stack alignment. */
6671
6672 static void
6673 ix86_update_stack_boundary (void)
6674 {
6675 ix86_incoming_stack_boundary
6676 = ix86_minimum_incoming_stack_boundary (false);
6677
6678 /* x86_64 vararg needs 16byte stack alignment for register save area. */
6679 if (TARGET_64BIT
6680 && cfun->stdarg
6681 && crtl->stack_alignment_estimated < 128)
6682 crtl->stack_alignment_estimated = 128;
6683
6684 /* __tls_get_addr needs to be called with 16-byte aligned stack. */
6685 if (ix86_tls_descriptor_calls_expanded_in_cfun
6686 && crtl->preferred_stack_boundary < 128)
6687 crtl->preferred_stack_boundary = 128;
6688 }
6689
6690 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
6691 needed or an rtx for DRAP otherwise. */
6692
6693 static rtx
6694 ix86_get_drap_rtx (void)
6695 {
6696 /* We must use DRAP if there are outgoing arguments on stack and
6697 ACCUMULATE_OUTGOING_ARGS is false. */
6698 if (ix86_force_drap
6699 || (cfun->machine->outgoing_args_on_stack
6700 && !ACCUMULATE_OUTGOING_ARGS))
6701 crtl->need_drap = true;
6702
6703 if (stack_realign_drap)
6704 {
6705 /* Assign DRAP to vDRAP and returns vDRAP */
6706 unsigned int regno = find_drap_reg ();
6707 rtx drap_vreg;
6708 rtx arg_ptr;
6709 rtx_insn *seq, *insn;
6710
6711 arg_ptr = gen_rtx_REG (Pmode, regno);
6712 crtl->drap_reg = arg_ptr;
6713
6714 start_sequence ();
6715 drap_vreg = copy_to_reg (arg_ptr);
6716 seq = get_insns ();
6717 end_sequence ();
6718
6719 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
6720 if (!optimize)
6721 {
6722 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
6723 RTX_FRAME_RELATED_P (insn) = 1;
6724 }
6725 return drap_vreg;
6726 }
6727 else
6728 return NULL;
6729 }
6730
6731 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
6732
6733 static rtx
6734 ix86_internal_arg_pointer (void)
6735 {
6736 return virtual_incoming_args_rtx;
6737 }
6738
6739 struct scratch_reg {
6740 rtx reg;
6741 bool saved;
6742 };
6743
6744 /* Return a short-lived scratch register for use on function entry.
6745 In 32-bit mode, it is valid only after the registers are saved
6746 in the prologue. This register must be released by means of
6747 release_scratch_register_on_entry once it is dead. */
6748
6749 static void
6750 get_scratch_register_on_entry (struct scratch_reg *sr)
6751 {
6752 int regno;
6753
6754 sr->saved = false;
6755
6756 if (TARGET_64BIT)
6757 {
6758 /* We always use R11 in 64-bit mode. */
6759 regno = R11_REG;
6760 }
6761 else
6762 {
6763 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
6764 bool fastcall_p
6765 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
6766 bool thiscall_p
6767 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
6768 bool static_chain_p = DECL_STATIC_CHAIN (decl);
6769 int regparm = ix86_function_regparm (fntype, decl);
6770 int drap_regno
6771 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
6772
6773 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
6774 for the static chain register. */
6775 if ((regparm < 1 || (fastcall_p && !static_chain_p))
6776 && drap_regno != AX_REG)
6777 regno = AX_REG;
6778 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
6779 for the static chain register. */
6780 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
6781 regno = AX_REG;
6782 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
6783 regno = DX_REG;
6784 /* ecx is the static chain register. */
6785 else if (regparm < 3 && !fastcall_p && !thiscall_p
6786 && !static_chain_p
6787 && drap_regno != CX_REG)
6788 regno = CX_REG;
6789 else if (ix86_save_reg (BX_REG, true, false))
6790 regno = BX_REG;
6791 /* esi is the static chain register. */
6792 else if (!(regparm == 3 && static_chain_p)
6793 && ix86_save_reg (SI_REG, true, false))
6794 regno = SI_REG;
6795 else if (ix86_save_reg (DI_REG, true, false))
6796 regno = DI_REG;
6797 else
6798 {
6799 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
6800 sr->saved = true;
6801 }
6802 }
6803
6804 sr->reg = gen_rtx_REG (Pmode, regno);
6805 if (sr->saved)
6806 {
6807 rtx_insn *insn = emit_insn (gen_push (sr->reg));
6808 RTX_FRAME_RELATED_P (insn) = 1;
6809 }
6810 }
6811
6812 /* Release a scratch register obtained from the preceding function.
6813
6814 If RELEASE_VIA_POP is true, we just pop the register off the stack
6815 to release it. This is what non-Linux systems use with -fstack-check.
6816
6817 Otherwise we use OFFSET to locate the saved register and the
6818 allocated stack space becomes part of the local frame and is
6819 deallocated by the epilogue. */
6820
6821 static void
6822 release_scratch_register_on_entry (struct scratch_reg *sr, HOST_WIDE_INT offset,
6823 bool release_via_pop)
6824 {
6825 if (sr->saved)
6826 {
6827 if (release_via_pop)
6828 {
6829 struct machine_function *m = cfun->machine;
6830 rtx x, insn = emit_insn (gen_pop (sr->reg));
6831
6832 /* The RX FRAME_RELATED_P mechanism doesn't know about pop. */
6833 RTX_FRAME_RELATED_P (insn) = 1;
6834 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
6835 x = gen_rtx_SET (stack_pointer_rtx, x);
6836 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
6837 m->fs.sp_offset -= UNITS_PER_WORD;
6838 }
6839 else
6840 {
6841 rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset));
6842 x = gen_rtx_SET (sr->reg, gen_rtx_MEM (word_mode, x));
6843 emit_insn (x);
6844 }
6845 }
6846 }
6847
6848 /* Emit code to adjust the stack pointer by SIZE bytes while probing it.
6849
6850 This differs from the next routine in that it tries hard to prevent
6851 attacks that jump the stack guard. Thus it is never allowed to allocate
6852 more than PROBE_INTERVAL bytes of stack space without a suitable
6853 probe.
6854
6855 INT_REGISTERS_SAVED is true if integer registers have already been
6856 pushed on the stack. */
6857
6858 static void
6859 ix86_adjust_stack_and_probe_stack_clash (HOST_WIDE_INT size,
6860 const bool int_registers_saved)
6861 {
6862 struct machine_function *m = cfun->machine;
6863
6864 /* If this function does not statically allocate stack space, then
6865 no probes are needed. */
6866 if (!size)
6867 {
6868 /* However, the allocation of space via pushes for register
6869 saves could be viewed as allocating space, but without the
6870 need to probe. */
6871 if (m->frame.nregs || m->frame.nsseregs || frame_pointer_needed)
6872 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
6873 else
6874 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
6875 return;
6876 }
6877
6878 /* If we are a noreturn function, then we have to consider the
6879 possibility that we're called via a jump rather than a call.
6880
6881 Thus we don't have the implicit probe generated by saving the
6882 return address into the stack at the call. Thus, the stack
6883 pointer could be anywhere in the guard page. The safe thing
6884 to do is emit a probe now.
6885
6886 The probe can be avoided if we have already emitted any callee
6887 register saves into the stack or have a frame pointer (which will
6888 have been saved as well). Those saves will function as implicit
6889 probes.
6890
6891 ?!? This should be revamped to work like aarch64 and s390 where
6892 we track the offset from the most recent probe. Normally that
6893 offset would be zero. For a noreturn function we would reset
6894 it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then
6895 we just probe when we cross PROBE_INTERVAL. */
6896 if (TREE_THIS_VOLATILE (cfun->decl)
6897 && !(m->frame.nregs || m->frame.nsseregs || frame_pointer_needed))
6898 {
6899 /* We can safely use any register here since we're just going to push
6900 its value and immediately pop it back. But we do try and avoid
6901 argument passing registers so as not to introduce dependencies in
6902 the pipeline. For 32 bit we use %esi and for 64 bit we use %rax. */
6903 rtx dummy_reg = gen_rtx_REG (word_mode, TARGET_64BIT ? AX_REG : SI_REG);
6904 rtx_insn *insn_push = emit_insn (gen_push (dummy_reg));
6905 rtx_insn *insn_pop = emit_insn (gen_pop (dummy_reg));
6906 m->fs.sp_offset -= UNITS_PER_WORD;
6907 if (m->fs.cfa_reg == stack_pointer_rtx)
6908 {
6909 m->fs.cfa_offset -= UNITS_PER_WORD;
6910 rtx x = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
6911 x = gen_rtx_SET (stack_pointer_rtx, x);
6912 add_reg_note (insn_push, REG_CFA_ADJUST_CFA, x);
6913 RTX_FRAME_RELATED_P (insn_push) = 1;
6914 x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
6915 x = gen_rtx_SET (stack_pointer_rtx, x);
6916 add_reg_note (insn_pop, REG_CFA_ADJUST_CFA, x);
6917 RTX_FRAME_RELATED_P (insn_pop) = 1;
6918 }
6919 emit_insn (gen_blockage ());
6920 }
6921
6922 /* If we allocate less than the size of the guard statically,
6923 then no probing is necessary, but we do need to allocate
6924 the stack. */
6925 if (size < (1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE)))
6926 {
6927 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6928 GEN_INT (-size), -1,
6929 m->fs.cfa_reg == stack_pointer_rtx);
6930 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
6931 return;
6932 }
6933
6934 /* We're allocating a large enough stack frame that we need to
6935 emit probes. Either emit them inline or in a loop depending
6936 on the size. */
6937 HOST_WIDE_INT probe_interval = get_probe_interval ();
6938 if (size <= 4 * probe_interval)
6939 {
6940 HOST_WIDE_INT i;
6941 for (i = probe_interval; i <= size; i += probe_interval)
6942 {
6943 /* Allocate PROBE_INTERVAL bytes. */
6944 rtx insn
6945 = pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6946 GEN_INT (-probe_interval), -1,
6947 m->fs.cfa_reg == stack_pointer_rtx);
6948 add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
6949
6950 /* And probe at *sp. */
6951 emit_stack_probe (stack_pointer_rtx);
6952 emit_insn (gen_blockage ());
6953 }
6954
6955 /* We need to allocate space for the residual, but we do not need
6956 to probe the residual. */
6957 HOST_WIDE_INT residual = (i - probe_interval - size);
6958 if (residual)
6959 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6960 GEN_INT (residual), -1,
6961 m->fs.cfa_reg == stack_pointer_rtx);
6962 dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
6963 }
6964 else
6965 {
6966 /* We expect the GP registers to be saved when probes are used
6967 as the probing sequences might need a scratch register and
6968 the routine to allocate one assumes the integer registers
6969 have already been saved. */
6970 gcc_assert (int_registers_saved);
6971
6972 struct scratch_reg sr;
6973 get_scratch_register_on_entry (&sr);
6974
6975 /* If we needed to save a register, then account for any space
6976 that was pushed (we are not going to pop the register when
6977 we do the restore). */
6978 if (sr.saved)
6979 size -= UNITS_PER_WORD;
6980
6981 /* Step 1: round SIZE down to a multiple of the interval. */
6982 HOST_WIDE_INT rounded_size = size & -probe_interval;
6983
6984 /* Step 2: compute final value of the loop counter. Use lea if
6985 possible. */
6986 rtx addr = plus_constant (Pmode, stack_pointer_rtx, -rounded_size);
6987 rtx insn;
6988 if (address_no_seg_operand (addr, Pmode))
6989 insn = emit_insn (gen_rtx_SET (sr.reg, addr));
6990 else
6991 {
6992 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
6993 insn = emit_insn (gen_rtx_SET (sr.reg,
6994 gen_rtx_PLUS (Pmode, sr.reg,
6995 stack_pointer_rtx)));
6996 }
6997 if (m->fs.cfa_reg == stack_pointer_rtx)
6998 {
6999 add_reg_note (insn, REG_CFA_DEF_CFA,
7000 plus_constant (Pmode, sr.reg,
7001 m->fs.cfa_offset + rounded_size));
7002 RTX_FRAME_RELATED_P (insn) = 1;
7003 }
7004
7005 /* Step 3: the loop. */
7006 rtx size_rtx = GEN_INT (rounded_size);
7007 insn = emit_insn (gen_adjust_stack_and_probe (Pmode, sr.reg, sr.reg,
7008 size_rtx));
7009 if (m->fs.cfa_reg == stack_pointer_rtx)
7010 {
7011 m->fs.cfa_offset += rounded_size;
7012 add_reg_note (insn, REG_CFA_DEF_CFA,
7013 plus_constant (Pmode, stack_pointer_rtx,
7014 m->fs.cfa_offset));
7015 RTX_FRAME_RELATED_P (insn) = 1;
7016 }
7017 m->fs.sp_offset += rounded_size;
7018 emit_insn (gen_blockage ());
7019
7020 /* Step 4: adjust SP if we cannot assert at compile-time that SIZE
7021 is equal to ROUNDED_SIZE. */
7022
7023 if (size != rounded_size)
7024 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7025 GEN_INT (rounded_size - size), -1,
7026 m->fs.cfa_reg == stack_pointer_rtx);
7027 dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
7028
7029 /* This does not deallocate the space reserved for the scratch
7030 register. That will be deallocated in the epilogue. */
7031 release_scratch_register_on_entry (&sr, size, false);
7032 }
7033
7034 /* Make sure nothing is scheduled before we are done. */
7035 emit_insn (gen_blockage ());
7036 }
7037
7038 /* Emit code to adjust the stack pointer by SIZE bytes while probing it.
7039
7040 INT_REGISTERS_SAVED is true if integer registers have already been
7041 pushed on the stack. */
7042
7043 static void
7044 ix86_adjust_stack_and_probe (HOST_WIDE_INT size,
7045 const bool int_registers_saved)
7046 {
7047 /* We skip the probe for the first interval + a small dope of 4 words and
7048 probe that many bytes past the specified size to maintain a protection
7049 area at the botton of the stack. */
7050 const int dope = 4 * UNITS_PER_WORD;
7051 rtx size_rtx = GEN_INT (size), last;
7052
7053 /* See if we have a constant small number of probes to generate. If so,
7054 that's the easy case. The run-time loop is made up of 9 insns in the
7055 generic case while the compile-time loop is made up of 3+2*(n-1) insns
7056 for n # of intervals. */
7057 if (size <= 4 * get_probe_interval ())
7058 {
7059 HOST_WIDE_INT i, adjust;
7060 bool first_probe = true;
7061
7062 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
7063 values of N from 1 until it exceeds SIZE. If only one probe is
7064 needed, this will not generate any code. Then adjust and probe
7065 to PROBE_INTERVAL + SIZE. */
7066 for (i = get_probe_interval (); i < size; i += get_probe_interval ())
7067 {
7068 if (first_probe)
7069 {
7070 adjust = 2 * get_probe_interval () + dope;
7071 first_probe = false;
7072 }
7073 else
7074 adjust = get_probe_interval ();
7075
7076 emit_insn (gen_rtx_SET (stack_pointer_rtx,
7077 plus_constant (Pmode, stack_pointer_rtx,
7078 -adjust)));
7079 emit_stack_probe (stack_pointer_rtx);
7080 }
7081
7082 if (first_probe)
7083 adjust = size + get_probe_interval () + dope;
7084 else
7085 adjust = size + get_probe_interval () - i;
7086
7087 emit_insn (gen_rtx_SET (stack_pointer_rtx,
7088 plus_constant (Pmode, stack_pointer_rtx,
7089 -adjust)));
7090 emit_stack_probe (stack_pointer_rtx);
7091
7092 /* Adjust back to account for the additional first interval. */
7093 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
7094 plus_constant (Pmode, stack_pointer_rtx,
7095 (get_probe_interval ()
7096 + dope))));
7097 }
7098
7099 /* Otherwise, do the same as above, but in a loop. Note that we must be
7100 extra careful with variables wrapping around because we might be at
7101 the very top (or the very bottom) of the address space and we have
7102 to be able to handle this case properly; in particular, we use an
7103 equality test for the loop condition. */
7104 else
7105 {
7106 /* We expect the GP registers to be saved when probes are used
7107 as the probing sequences might need a scratch register and
7108 the routine to allocate one assumes the integer registers
7109 have already been saved. */
7110 gcc_assert (int_registers_saved);
7111
7112 HOST_WIDE_INT rounded_size;
7113 struct scratch_reg sr;
7114
7115 get_scratch_register_on_entry (&sr);
7116
7117 /* If we needed to save a register, then account for any space
7118 that was pushed (we are not going to pop the register when
7119 we do the restore). */
7120 if (sr.saved)
7121 size -= UNITS_PER_WORD;
7122
7123 /* Step 1: round SIZE to the previous multiple of the interval. */
7124
7125 rounded_size = ROUND_DOWN (size, get_probe_interval ());
7126
7127
7128 /* Step 2: compute initial and final value of the loop counter. */
7129
7130 /* SP = SP_0 + PROBE_INTERVAL. */
7131 emit_insn (gen_rtx_SET (stack_pointer_rtx,
7132 plus_constant (Pmode, stack_pointer_rtx,
7133 - (get_probe_interval () + dope))));
7134
7135 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
7136 if (rounded_size <= (HOST_WIDE_INT_1 << 31))
7137 emit_insn (gen_rtx_SET (sr.reg,
7138 plus_constant (Pmode, stack_pointer_rtx,
7139 -rounded_size)));
7140 else
7141 {
7142 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
7143 emit_insn (gen_rtx_SET (sr.reg,
7144 gen_rtx_PLUS (Pmode, sr.reg,
7145 stack_pointer_rtx)));
7146 }
7147
7148
7149 /* Step 3: the loop
7150
7151 do
7152 {
7153 SP = SP + PROBE_INTERVAL
7154 probe at SP
7155 }
7156 while (SP != LAST_ADDR)
7157
7158 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
7159 values of N from 1 until it is equal to ROUNDED_SIZE. */
7160
7161 emit_insn (gen_adjust_stack_and_probe (Pmode, sr.reg, sr.reg, size_rtx));
7162
7163
7164 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
7165 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
7166
7167 if (size != rounded_size)
7168 {
7169 emit_insn (gen_rtx_SET (stack_pointer_rtx,
7170 plus_constant (Pmode, stack_pointer_rtx,
7171 rounded_size - size)));
7172 emit_stack_probe (stack_pointer_rtx);
7173 }
7174
7175 /* Adjust back to account for the additional first interval. */
7176 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
7177 plus_constant (Pmode, stack_pointer_rtx,
7178 (get_probe_interval ()
7179 + dope))));
7180
7181 /* This does not deallocate the space reserved for the scratch
7182 register. That will be deallocated in the epilogue. */
7183 release_scratch_register_on_entry (&sr, size, false);
7184 }
7185
7186 /* Even if the stack pointer isn't the CFA register, we need to correctly
7187 describe the adjustments made to it, in particular differentiate the
7188 frame-related ones from the frame-unrelated ones. */
7189 if (size > 0)
7190 {
7191 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
7192 XVECEXP (expr, 0, 0)
7193 = gen_rtx_SET (stack_pointer_rtx,
7194 plus_constant (Pmode, stack_pointer_rtx, -size));
7195 XVECEXP (expr, 0, 1)
7196 = gen_rtx_SET (stack_pointer_rtx,
7197 plus_constant (Pmode, stack_pointer_rtx,
7198 get_probe_interval () + dope + size));
7199 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
7200 RTX_FRAME_RELATED_P (last) = 1;
7201
7202 cfun->machine->fs.sp_offset += size;
7203 }
7204
7205 /* Make sure nothing is scheduled before we are done. */
7206 emit_insn (gen_blockage ());
7207 }
7208
7209 /* Adjust the stack pointer up to REG while probing it. */
7210
7211 const char *
7212 output_adjust_stack_and_probe (rtx reg)
7213 {
7214 static int labelno = 0;
7215 char loop_lab[32];
7216 rtx xops[2];
7217
7218 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
7219
7220 /* Loop. */
7221 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
7222
7223 /* SP = SP + PROBE_INTERVAL. */
7224 xops[0] = stack_pointer_rtx;
7225 xops[1] = GEN_INT (get_probe_interval ());
7226 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
7227
7228 /* Probe at SP. */
7229 xops[1] = const0_rtx;
7230 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
7231
7232 /* Test if SP == LAST_ADDR. */
7233 xops[0] = stack_pointer_rtx;
7234 xops[1] = reg;
7235 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
7236
7237 /* Branch. */
7238 fputs ("\tjne\t", asm_out_file);
7239 assemble_name_raw (asm_out_file, loop_lab);
7240 fputc ('\n', asm_out_file);
7241
7242 return "";
7243 }
7244
7245 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
7246 inclusive. These are offsets from the current stack pointer.
7247
7248 INT_REGISTERS_SAVED is true if integer registers have already been
7249 pushed on the stack. */
7250
7251 static void
7252 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
7253 const bool int_registers_saved)
7254 {
7255 /* See if we have a constant small number of probes to generate. If so,
7256 that's the easy case. The run-time loop is made up of 6 insns in the
7257 generic case while the compile-time loop is made up of n insns for n #
7258 of intervals. */
7259 if (size <= 6 * get_probe_interval ())
7260 {
7261 HOST_WIDE_INT i;
7262
7263 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
7264 it exceeds SIZE. If only one probe is needed, this will not
7265 generate any code. Then probe at FIRST + SIZE. */
7266 for (i = get_probe_interval (); i < size; i += get_probe_interval ())
7267 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
7268 -(first + i)));
7269
7270 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
7271 -(first + size)));
7272 }
7273
7274 /* Otherwise, do the same as above, but in a loop. Note that we must be
7275 extra careful with variables wrapping around because we might be at
7276 the very top (or the very bottom) of the address space and we have
7277 to be able to handle this case properly; in particular, we use an
7278 equality test for the loop condition. */
7279 else
7280 {
7281 /* We expect the GP registers to be saved when probes are used
7282 as the probing sequences might need a scratch register and
7283 the routine to allocate one assumes the integer registers
7284 have already been saved. */
7285 gcc_assert (int_registers_saved);
7286
7287 HOST_WIDE_INT rounded_size, last;
7288 struct scratch_reg sr;
7289
7290 get_scratch_register_on_entry (&sr);
7291
7292
7293 /* Step 1: round SIZE to the previous multiple of the interval. */
7294
7295 rounded_size = ROUND_DOWN (size, get_probe_interval ());
7296
7297
7298 /* Step 2: compute initial and final value of the loop counter. */
7299
7300 /* TEST_OFFSET = FIRST. */
7301 emit_move_insn (sr.reg, GEN_INT (-first));
7302
7303 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
7304 last = first + rounded_size;
7305
7306
7307 /* Step 3: the loop
7308
7309 do
7310 {
7311 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
7312 probe at TEST_ADDR
7313 }
7314 while (TEST_ADDR != LAST_ADDR)
7315
7316 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
7317 until it is equal to ROUNDED_SIZE. */
7318
7319 emit_insn
7320 (gen_probe_stack_range (Pmode, sr.reg, sr.reg, GEN_INT (-last)));
7321
7322
7323 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
7324 that SIZE is equal to ROUNDED_SIZE. */
7325
7326 if (size != rounded_size)
7327 emit_stack_probe (plus_constant (Pmode,
7328 gen_rtx_PLUS (Pmode,
7329 stack_pointer_rtx,
7330 sr.reg),
7331 rounded_size - size));
7332
7333 release_scratch_register_on_entry (&sr, size, true);
7334 }
7335
7336 /* Make sure nothing is scheduled before we are done. */
7337 emit_insn (gen_blockage ());
7338 }
7339
7340 /* Probe a range of stack addresses from REG to END, inclusive. These are
7341 offsets from the current stack pointer. */
7342
7343 const char *
7344 output_probe_stack_range (rtx reg, rtx end)
7345 {
7346 static int labelno = 0;
7347 char loop_lab[32];
7348 rtx xops[3];
7349
7350 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
7351
7352 /* Loop. */
7353 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
7354
7355 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
7356 xops[0] = reg;
7357 xops[1] = GEN_INT (get_probe_interval ());
7358 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
7359
7360 /* Probe at TEST_ADDR. */
7361 xops[0] = stack_pointer_rtx;
7362 xops[1] = reg;
7363 xops[2] = const0_rtx;
7364 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
7365
7366 /* Test if TEST_ADDR == LAST_ADDR. */
7367 xops[0] = reg;
7368 xops[1] = end;
7369 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
7370
7371 /* Branch. */
7372 fputs ("\tjne\t", asm_out_file);
7373 assemble_name_raw (asm_out_file, loop_lab);
7374 fputc ('\n', asm_out_file);
7375
7376 return "";
7377 }
7378
7379 /* Set stack_frame_required to false if stack frame isn't required.
7380 Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
7381 slot used if stack frame is required and CHECK_STACK_SLOT is true. */
7382
7383 static void
7384 ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
7385 bool check_stack_slot)
7386 {
7387 HARD_REG_SET set_up_by_prologue, prologue_used;
7388 basic_block bb;
7389
7390 CLEAR_HARD_REG_SET (prologue_used);
7391 CLEAR_HARD_REG_SET (set_up_by_prologue);
7392 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
7393 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
7394 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
7395 HARD_FRAME_POINTER_REGNUM);
7396
7397 /* The preferred stack alignment is the minimum stack alignment. */
7398 if (stack_alignment > crtl->preferred_stack_boundary)
7399 stack_alignment = crtl->preferred_stack_boundary;
7400
7401 bool require_stack_frame = false;
7402
7403 FOR_EACH_BB_FN (bb, cfun)
7404 {
7405 rtx_insn *insn;
7406 FOR_BB_INSNS (bb, insn)
7407 if (NONDEBUG_INSN_P (insn)
7408 && requires_stack_frame_p (insn, prologue_used,
7409 set_up_by_prologue))
7410 {
7411 require_stack_frame = true;
7412
7413 if (check_stack_slot)
7414 {
7415 /* Find the maximum stack alignment. */
7416 subrtx_iterator::array_type array;
7417 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
7418 if (MEM_P (*iter)
7419 && (reg_mentioned_p (stack_pointer_rtx,
7420 *iter)
7421 || reg_mentioned_p (frame_pointer_rtx,
7422 *iter)))
7423 {
7424 unsigned int alignment = MEM_ALIGN (*iter);
7425 if (alignment > stack_alignment)
7426 stack_alignment = alignment;
7427 }
7428 }
7429 }
7430 }
7431
7432 cfun->machine->stack_frame_required = require_stack_frame;
7433 }
7434
7435 /* Finalize stack_realign_needed and frame_pointer_needed flags, which
7436 will guide prologue/epilogue to be generated in correct form. */
7437
7438 static void
7439 ix86_finalize_stack_frame_flags (void)
7440 {
7441 /* Check if stack realign is really needed after reload, and
7442 stores result in cfun */
7443 unsigned int incoming_stack_boundary
7444 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
7445 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
7446 unsigned int stack_alignment
7447 = (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor
7448 ? crtl->max_used_stack_slot_alignment
7449 : crtl->stack_alignment_needed);
7450 unsigned int stack_realign
7451 = (incoming_stack_boundary < stack_alignment);
7452 bool recompute_frame_layout_p = false;
7453
7454 if (crtl->stack_realign_finalized)
7455 {
7456 /* After stack_realign_needed is finalized, we can't no longer
7457 change it. */
7458 gcc_assert (crtl->stack_realign_needed == stack_realign);
7459 return;
7460 }
7461
7462 /* It is always safe to compute max_used_stack_alignment. We
7463 compute it only if 128-bit aligned load/store may be generated
7464 on misaligned stack slot which will lead to segfault. */
7465 bool check_stack_slot
7466 = (stack_realign || crtl->max_used_stack_slot_alignment >= 128);
7467 ix86_find_max_used_stack_alignment (stack_alignment,
7468 check_stack_slot);
7469
7470 /* If the only reason for frame_pointer_needed is that we conservatively
7471 assumed stack realignment might be needed or -fno-omit-frame-pointer
7472 is used, but in the end nothing that needed the stack alignment had
7473 been spilled nor stack access, clear frame_pointer_needed and say we
7474 don't need stack realignment. */
7475 if ((stack_realign || (!flag_omit_frame_pointer && optimize))
7476 && frame_pointer_needed
7477 && crtl->is_leaf
7478 && crtl->sp_is_unchanging
7479 && !ix86_current_function_calls_tls_descriptor
7480 && !crtl->accesses_prior_frames
7481 && !cfun->calls_alloca
7482 && !crtl->calls_eh_return
7483 /* See ira_setup_eliminable_regset for the rationale. */
7484 && !(STACK_CHECK_MOVING_SP
7485 && flag_stack_check
7486 && flag_exceptions
7487 && cfun->can_throw_non_call_exceptions)
7488 && !ix86_frame_pointer_required ()
7489 && ix86_get_frame_size () == 0
7490 && ix86_nsaved_sseregs () == 0
7491 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
7492 {
7493 if (cfun->machine->stack_frame_required)
7494 {
7495 /* Stack frame is required. If stack alignment needed is less
7496 than incoming stack boundary, don't realign stack. */
7497 stack_realign = incoming_stack_boundary < stack_alignment;
7498 if (!stack_realign)
7499 {
7500 crtl->max_used_stack_slot_alignment
7501 = incoming_stack_boundary;
7502 crtl->stack_alignment_needed
7503 = incoming_stack_boundary;
7504 /* Also update preferred_stack_boundary for leaf
7505 functions. */
7506 crtl->preferred_stack_boundary
7507 = incoming_stack_boundary;
7508 }
7509 }
7510 else
7511 {
7512 /* If drap has been set, but it actually isn't live at the
7513 start of the function, there is no reason to set it up. */
7514 if (crtl->drap_reg)
7515 {
7516 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
7517 if (! REGNO_REG_SET_P (DF_LR_IN (bb),
7518 REGNO (crtl->drap_reg)))
7519 {
7520 crtl->drap_reg = NULL_RTX;
7521 crtl->need_drap = false;
7522 }
7523 }
7524 else
7525 cfun->machine->no_drap_save_restore = true;
7526
7527 frame_pointer_needed = false;
7528 stack_realign = false;
7529 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
7530 crtl->stack_alignment_needed = incoming_stack_boundary;
7531 crtl->stack_alignment_estimated = incoming_stack_boundary;
7532 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
7533 crtl->preferred_stack_boundary = incoming_stack_boundary;
7534 df_finish_pass (true);
7535 df_scan_alloc (NULL);
7536 df_scan_blocks ();
7537 df_compute_regs_ever_live (true);
7538 df_analyze ();
7539
7540 if (flag_var_tracking)
7541 {
7542 /* Since frame pointer is no longer available, replace it with
7543 stack pointer - UNITS_PER_WORD in debug insns. */
7544 df_ref ref, next;
7545 for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM);
7546 ref; ref = next)
7547 {
7548 next = DF_REF_NEXT_REG (ref);
7549 if (!DF_REF_INSN_INFO (ref))
7550 continue;
7551
7552 /* Make sure the next ref is for a different instruction,
7553 so that we're not affected by the rescan. */
7554 rtx_insn *insn = DF_REF_INSN (ref);
7555 while (next && DF_REF_INSN (next) == insn)
7556 next = DF_REF_NEXT_REG (next);
7557
7558 if (DEBUG_INSN_P (insn))
7559 {
7560 bool changed = false;
7561 for (; ref != next; ref = DF_REF_NEXT_REG (ref))
7562 {
7563 rtx *loc = DF_REF_LOC (ref);
7564 if (*loc == hard_frame_pointer_rtx)
7565 {
7566 *loc = plus_constant (Pmode,
7567 stack_pointer_rtx,
7568 -UNITS_PER_WORD);
7569 changed = true;
7570 }
7571 }
7572 if (changed)
7573 df_insn_rescan (insn);
7574 }
7575 }
7576 }
7577
7578 recompute_frame_layout_p = true;
7579 }
7580 }
7581 else if (crtl->max_used_stack_slot_alignment >= 128
7582 && cfun->machine->stack_frame_required)
7583 {
7584 /* We don't need to realign stack. max_used_stack_alignment is
7585 used to decide how stack frame should be aligned. This is
7586 independent of any psABIs nor 32-bit vs 64-bit. */
7587 cfun->machine->max_used_stack_alignment
7588 = stack_alignment / BITS_PER_UNIT;
7589 }
7590
7591 if (crtl->stack_realign_needed != stack_realign)
7592 recompute_frame_layout_p = true;
7593 crtl->stack_realign_needed = stack_realign;
7594 crtl->stack_realign_finalized = true;
7595 if (recompute_frame_layout_p)
7596 ix86_compute_frame_layout ();
7597 }
7598
7599 /* Delete SET_GOT right after entry block if it is allocated to reg. */
7600
7601 static void
7602 ix86_elim_entry_set_got (rtx reg)
7603 {
7604 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
7605 rtx_insn *c_insn = BB_HEAD (bb);
7606 if (!NONDEBUG_INSN_P (c_insn))
7607 c_insn = next_nonnote_nondebug_insn (c_insn);
7608 if (c_insn && NONJUMP_INSN_P (c_insn))
7609 {
7610 rtx pat = PATTERN (c_insn);
7611 if (GET_CODE (pat) == PARALLEL)
7612 {
7613 rtx vec = XVECEXP (pat, 0, 0);
7614 if (GET_CODE (vec) == SET
7615 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
7616 && REGNO (XEXP (vec, 0)) == REGNO (reg))
7617 delete_insn (c_insn);
7618 }
7619 }
7620 }
7621
7622 static rtx
7623 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
7624 {
7625 rtx addr, mem;
7626
7627 if (offset)
7628 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
7629 mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg);
7630 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
7631 }
7632
7633 static inline rtx
7634 gen_frame_load (rtx reg, rtx frame_reg, int offset)
7635 {
7636 return gen_frame_set (reg, frame_reg, offset, false);
7637 }
7638
7639 static inline rtx
7640 gen_frame_store (rtx reg, rtx frame_reg, int offset)
7641 {
7642 return gen_frame_set (reg, frame_reg, offset, true);
7643 }
7644
7645 static void
7646 ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame)
7647 {
7648 struct machine_function *m = cfun->machine;
7649 const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
7650 + m->call_ms2sysv_extra_regs;
7651 rtvec v = rtvec_alloc (ncregs + 1);
7652 unsigned int align, i, vi = 0;
7653 rtx_insn *insn;
7654 rtx sym, addr;
7655 rtx rax = gen_rtx_REG (word_mode, AX_REG);
7656 const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
7657
7658 /* AL should only be live with sysv_abi. */
7659 gcc_assert (!ix86_eax_live_at_start_p ());
7660 gcc_assert (m->fs.sp_offset >= frame.sse_reg_save_offset);
7661
7662 /* Setup RAX as the stub's base pointer. We use stack_realign_offset rather
7663 we've actually realigned the stack or not. */
7664 align = GET_MODE_ALIGNMENT (V4SFmode);
7665 addr = choose_baseaddr (frame.stack_realign_offset
7666 + xlogue.get_stub_ptr_offset (), &align, AX_REG);
7667 gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
7668
7669 emit_insn (gen_rtx_SET (rax, addr));
7670
7671 /* Get the stub symbol. */
7672 sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP
7673 : XLOGUE_STUB_SAVE);
7674 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
7675
7676 for (i = 0; i < ncregs; ++i)
7677 {
7678 const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
7679 rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode),
7680 r.regno);
7681 RTVEC_ELT (v, vi++) = gen_frame_store (reg, rax, -r.offset);
7682 }
7683
7684 gcc_assert (vi == (unsigned)GET_NUM_ELEM (v));
7685
7686 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v));
7687 RTX_FRAME_RELATED_P (insn) = true;
7688 }
7689
7690 /* Generate and return an insn body to AND X with Y. */
7691
7692 static rtx_insn *
7693 gen_and2_insn (rtx x, rtx y)
7694 {
7695 enum insn_code icode = optab_handler (and_optab, GET_MODE (x));
7696
7697 gcc_assert (insn_operand_matches (icode, 0, x));
7698 gcc_assert (insn_operand_matches (icode, 1, x));
7699 gcc_assert (insn_operand_matches (icode, 2, y));
7700
7701 return GEN_FCN (icode) (x, x, y);
7702 }
7703
7704 /* Expand the prologue into a bunch of separate insns. */
7705
7706 void
7707 ix86_expand_prologue (void)
7708 {
7709 struct machine_function *m = cfun->machine;
7710 rtx insn, t;
7711 HOST_WIDE_INT allocate;
7712 bool int_registers_saved;
7713 bool sse_registers_saved;
7714 bool save_stub_call_needed;
7715 rtx static_chain = NULL_RTX;
7716
7717 if (ix86_function_naked (current_function_decl))
7718 return;
7719
7720 ix86_finalize_stack_frame_flags ();
7721
7722 /* DRAP should not coexist with stack_realign_fp */
7723 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
7724
7725 memset (&m->fs, 0, sizeof (m->fs));
7726
7727 /* Initialize CFA state for before the prologue. */
7728 m->fs.cfa_reg = stack_pointer_rtx;
7729 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
7730
7731 /* Track SP offset to the CFA. We continue tracking this after we've
7732 swapped the CFA register away from SP. In the case of re-alignment
7733 this is fudged; we're interested to offsets within the local frame. */
7734 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
7735 m->fs.sp_valid = true;
7736 m->fs.sp_realigned = false;
7737
7738 const struct ix86_frame &frame = cfun->machine->frame;
7739
7740 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
7741 {
7742 /* We should have already generated an error for any use of
7743 ms_hook on a nested function. */
7744 gcc_checking_assert (!ix86_static_chain_on_stack);
7745
7746 /* Check if profiling is active and we shall use profiling before
7747 prologue variant. If so sorry. */
7748 if (crtl->profile && flag_fentry != 0)
7749 sorry ("%<ms_hook_prologue%> attribute is not compatible "
7750 "with %<-mfentry%> for 32-bit");
7751
7752 /* In ix86_asm_output_function_label we emitted:
7753 8b ff movl.s %edi,%edi
7754 55 push %ebp
7755 8b ec movl.s %esp,%ebp
7756
7757 This matches the hookable function prologue in Win32 API
7758 functions in Microsoft Windows XP Service Pack 2 and newer.
7759 Wine uses this to enable Windows apps to hook the Win32 API
7760 functions provided by Wine.
7761
7762 What that means is that we've already set up the frame pointer. */
7763
7764 if (frame_pointer_needed
7765 && !(crtl->drap_reg && crtl->stack_realign_needed))
7766 {
7767 rtx push, mov;
7768
7769 /* We've decided to use the frame pointer already set up.
7770 Describe this to the unwinder by pretending that both
7771 push and mov insns happen right here.
7772
7773 Putting the unwind info here at the end of the ms_hook
7774 is done so that we can make absolutely certain we get
7775 the required byte sequence at the start of the function,
7776 rather than relying on an assembler that can produce
7777 the exact encoding required.
7778
7779 However it does mean (in the unpatched case) that we have
7780 a 1 insn window where the asynchronous unwind info is
7781 incorrect. However, if we placed the unwind info at
7782 its correct location we would have incorrect unwind info
7783 in the patched case. Which is probably all moot since
7784 I don't expect Wine generates dwarf2 unwind info for the
7785 system libraries that use this feature. */
7786
7787 insn = emit_insn (gen_blockage ());
7788
7789 push = gen_push (hard_frame_pointer_rtx);
7790 mov = gen_rtx_SET (hard_frame_pointer_rtx,
7791 stack_pointer_rtx);
7792 RTX_FRAME_RELATED_P (push) = 1;
7793 RTX_FRAME_RELATED_P (mov) = 1;
7794
7795 RTX_FRAME_RELATED_P (insn) = 1;
7796 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
7797 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
7798
7799 /* Note that gen_push incremented m->fs.cfa_offset, even
7800 though we didn't emit the push insn here. */
7801 m->fs.cfa_reg = hard_frame_pointer_rtx;
7802 m->fs.fp_offset = m->fs.cfa_offset;
7803 m->fs.fp_valid = true;
7804 }
7805 else
7806 {
7807 /* The frame pointer is not needed so pop %ebp again.
7808 This leaves us with a pristine state. */
7809 emit_insn (gen_pop (hard_frame_pointer_rtx));
7810 }
7811 }
7812
7813 /* The first insn of a function that accepts its static chain on the
7814 stack is to push the register that would be filled in by a direct
7815 call. This insn will be skipped by the trampoline. */
7816 else if (ix86_static_chain_on_stack)
7817 {
7818 static_chain = ix86_static_chain (cfun->decl, false);
7819 insn = emit_insn (gen_push (static_chain));
7820 emit_insn (gen_blockage ());
7821
7822 /* We don't want to interpret this push insn as a register save,
7823 only as a stack adjustment. The real copy of the register as
7824 a save will be done later, if needed. */
7825 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
7826 t = gen_rtx_SET (stack_pointer_rtx, t);
7827 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
7828 RTX_FRAME_RELATED_P (insn) = 1;
7829 }
7830
7831 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
7832 of DRAP is needed and stack realignment is really needed after reload */
7833 if (stack_realign_drap)
7834 {
7835 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
7836
7837 /* Can't use DRAP in interrupt function. */
7838 if (cfun->machine->func_type != TYPE_NORMAL)
7839 sorry ("Dynamic Realign Argument Pointer (DRAP) not supported "
7840 "in interrupt service routine. This may be worked "
7841 "around by avoiding functions with aggregate return.");
7842
7843 /* Only need to push parameter pointer reg if it is caller saved. */
7844 if (!call_used_regs[REGNO (crtl->drap_reg)])
7845 {
7846 /* Push arg pointer reg */
7847 insn = emit_insn (gen_push (crtl->drap_reg));
7848 RTX_FRAME_RELATED_P (insn) = 1;
7849 }
7850
7851 /* Grab the argument pointer. */
7852 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
7853 insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
7854 RTX_FRAME_RELATED_P (insn) = 1;
7855 m->fs.cfa_reg = crtl->drap_reg;
7856 m->fs.cfa_offset = 0;
7857
7858 /* Align the stack. */
7859 insn = emit_insn (gen_and2_insn (stack_pointer_rtx,
7860 GEN_INT (-align_bytes)));
7861 RTX_FRAME_RELATED_P (insn) = 1;
7862
7863 /* Replicate the return address on the stack so that return
7864 address can be reached via (argp - 1) slot. This is needed
7865 to implement macro RETURN_ADDR_RTX and intrinsic function
7866 expand_builtin_return_addr etc. */
7867 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
7868 t = gen_frame_mem (word_mode, t);
7869 insn = emit_insn (gen_push (t));
7870 RTX_FRAME_RELATED_P (insn) = 1;
7871
7872 /* For the purposes of frame and register save area addressing,
7873 we've started over with a new frame. */
7874 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
7875 m->fs.realigned = true;
7876
7877 if (static_chain)
7878 {
7879 /* Replicate static chain on the stack so that static chain
7880 can be reached via (argp - 2) slot. This is needed for
7881 nested function with stack realignment. */
7882 insn = emit_insn (gen_push (static_chain));
7883 RTX_FRAME_RELATED_P (insn) = 1;
7884 }
7885 }
7886
7887 int_registers_saved = (frame.nregs == 0);
7888 sse_registers_saved = (frame.nsseregs == 0);
7889 save_stub_call_needed = (m->call_ms2sysv);
7890 gcc_assert (sse_registers_saved || !save_stub_call_needed);
7891
7892 if (frame_pointer_needed && !m->fs.fp_valid)
7893 {
7894 /* Note: AT&T enter does NOT have reversed args. Enter is probably
7895 slower on all targets. Also sdb didn't like it. */
7896 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
7897 RTX_FRAME_RELATED_P (insn) = 1;
7898
7899 /* Push registers now, before setting the frame pointer
7900 on SEH target. */
7901 if (!int_registers_saved
7902 && TARGET_SEH
7903 && !frame.save_regs_using_mov)
7904 {
7905 ix86_emit_save_regs ();
7906 int_registers_saved = true;
7907 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
7908 }
7909
7910 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
7911 {
7912 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
7913 RTX_FRAME_RELATED_P (insn) = 1;
7914
7915 if (m->fs.cfa_reg == stack_pointer_rtx)
7916 m->fs.cfa_reg = hard_frame_pointer_rtx;
7917 m->fs.fp_offset = m->fs.sp_offset;
7918 m->fs.fp_valid = true;
7919 }
7920 }
7921
7922 if (!int_registers_saved)
7923 {
7924 /* If saving registers via PUSH, do so now. */
7925 if (!frame.save_regs_using_mov)
7926 {
7927 ix86_emit_save_regs ();
7928 int_registers_saved = true;
7929 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
7930 }
7931
7932 /* When using red zone we may start register saving before allocating
7933 the stack frame saving one cycle of the prologue. However, avoid
7934 doing this if we have to probe the stack; at least on x86_64 the
7935 stack probe can turn into a call that clobbers a red zone location. */
7936 else if (ix86_using_red_zone ()
7937 && (! TARGET_STACK_PROBE
7938 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
7939 {
7940 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
7941 int_registers_saved = true;
7942 }
7943 }
7944
7945 if (stack_realign_fp)
7946 {
7947 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
7948 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
7949
7950 /* Record last valid frame pointer offset. */
7951 m->fs.sp_realigned_fp_last = frame.reg_save_offset;
7952
7953 /* The computation of the size of the re-aligned stack frame means
7954 that we must allocate the size of the register save area before
7955 performing the actual alignment. Otherwise we cannot guarantee
7956 that there's enough storage above the realignment point. */
7957 allocate = frame.reg_save_offset - m->fs.sp_offset
7958 + frame.stack_realign_allocate;
7959 if (allocate)
7960 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7961 GEN_INT (-allocate), -1, false);
7962
7963 /* Align the stack. */
7964 emit_insn (gen_and2_insn (stack_pointer_rtx, GEN_INT (-align_bytes)));
7965 m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes);
7966 m->fs.sp_realigned_offset = m->fs.sp_offset
7967 - frame.stack_realign_allocate;
7968 /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset.
7969 Beyond this point, stack access should be done via choose_baseaddr or
7970 by using sp_valid_at and fp_valid_at to determine the correct base
7971 register. Henceforth, any CFA offset should be thought of as logical
7972 and not physical. */
7973 gcc_assert (m->fs.sp_realigned_offset >= m->fs.sp_realigned_fp_last);
7974 gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset);
7975 m->fs.sp_realigned = true;
7976
7977 /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which
7978 is needed to describe where a register is saved using a realigned
7979 stack pointer, so we need to invalidate the stack pointer for that
7980 target. */
7981 if (TARGET_SEH)
7982 m->fs.sp_valid = false;
7983
7984 /* If SP offset is non-immediate after allocation of the stack frame,
7985 then emit SSE saves or stub call prior to allocating the rest of the
7986 stack frame. This is less efficient for the out-of-line stub because
7987 we can't combine allocations across the call barrier, but it's better
7988 than using a scratch register. */
7989 else if (!x86_64_immediate_operand (GEN_INT (frame.stack_pointer_offset
7990 - m->fs.sp_realigned_offset),
7991 Pmode))
7992 {
7993 if (!sse_registers_saved)
7994 {
7995 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
7996 sse_registers_saved = true;
7997 }
7998 else if (save_stub_call_needed)
7999 {
8000 ix86_emit_outlined_ms2sysv_save (frame);
8001 save_stub_call_needed = false;
8002 }
8003 }
8004 }
8005
8006 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
8007
8008 if (flag_stack_usage_info)
8009 {
8010 /* We start to count from ARG_POINTER. */
8011 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
8012
8013 /* If it was realigned, take into account the fake frame. */
8014 if (stack_realign_drap)
8015 {
8016 if (ix86_static_chain_on_stack)
8017 stack_size += UNITS_PER_WORD;
8018
8019 if (!call_used_regs[REGNO (crtl->drap_reg)])
8020 stack_size += UNITS_PER_WORD;
8021
8022 /* This over-estimates by 1 minimal-stack-alignment-unit but
8023 mitigates that by counting in the new return address slot. */
8024 current_function_dynamic_stack_size
8025 += crtl->stack_alignment_needed / BITS_PER_UNIT;
8026 }
8027
8028 current_function_static_stack_size = stack_size;
8029 }
8030
8031 /* On SEH target with very large frame size, allocate an area to save
8032 SSE registers (as the very large allocation won't be described). */
8033 if (TARGET_SEH
8034 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
8035 && !sse_registers_saved)
8036 {
8037 HOST_WIDE_INT sse_size
8038 = frame.sse_reg_save_offset - frame.reg_save_offset;
8039
8040 gcc_assert (int_registers_saved);
8041
8042 /* No need to do stack checking as the area will be immediately
8043 written. */
8044 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8045 GEN_INT (-sse_size), -1,
8046 m->fs.cfa_reg == stack_pointer_rtx);
8047 allocate -= sse_size;
8048 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
8049 sse_registers_saved = true;
8050 }
8051
8052 /* The stack has already been decremented by the instruction calling us
8053 so probe if the size is non-negative to preserve the protection area. */
8054 if (allocate >= 0
8055 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
8056 || flag_stack_clash_protection))
8057 {
8058 if (flag_stack_clash_protection)
8059 {
8060 ix86_adjust_stack_and_probe_stack_clash (allocate,
8061 int_registers_saved);
8062 allocate = 0;
8063 }
8064 else if (STACK_CHECK_MOVING_SP)
8065 {
8066 if (!(crtl->is_leaf && !cfun->calls_alloca
8067 && allocate <= get_probe_interval ()))
8068 {
8069 ix86_adjust_stack_and_probe (allocate, int_registers_saved);
8070 allocate = 0;
8071 }
8072 }
8073 else
8074 {
8075 HOST_WIDE_INT size = allocate;
8076
8077 if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000))
8078 size = 0x80000000 - get_stack_check_protect () - 1;
8079
8080 if (TARGET_STACK_PROBE)
8081 {
8082 if (crtl->is_leaf && !cfun->calls_alloca)
8083 {
8084 if (size > get_probe_interval ())
8085 ix86_emit_probe_stack_range (0, size, int_registers_saved);
8086 }
8087 else
8088 ix86_emit_probe_stack_range (0,
8089 size + get_stack_check_protect (),
8090 int_registers_saved);
8091 }
8092 else
8093 {
8094 if (crtl->is_leaf && !cfun->calls_alloca)
8095 {
8096 if (size > get_probe_interval ()
8097 && size > get_stack_check_protect ())
8098 ix86_emit_probe_stack_range (get_stack_check_protect (),
8099 (size
8100 - get_stack_check_protect ()),
8101 int_registers_saved);
8102 }
8103 else
8104 ix86_emit_probe_stack_range (get_stack_check_protect (), size,
8105 int_registers_saved);
8106 }
8107 }
8108 }
8109
8110 if (allocate == 0)
8111 ;
8112 else if (!ix86_target_stack_probe ()
8113 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
8114 {
8115 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8116 GEN_INT (-allocate), -1,
8117 m->fs.cfa_reg == stack_pointer_rtx);
8118 }
8119 else
8120 {
8121 rtx eax = gen_rtx_REG (Pmode, AX_REG);
8122 rtx r10 = NULL;
8123 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
8124 bool eax_live = ix86_eax_live_at_start_p ();
8125 bool r10_live = false;
8126
8127 if (TARGET_64BIT)
8128 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
8129
8130 if (eax_live)
8131 {
8132 insn = emit_insn (gen_push (eax));
8133 allocate -= UNITS_PER_WORD;
8134 /* Note that SEH directives need to continue tracking the stack
8135 pointer even after the frame pointer has been set up. */
8136 if (sp_is_cfa_reg || TARGET_SEH)
8137 {
8138 if (sp_is_cfa_reg)
8139 m->fs.cfa_offset += UNITS_PER_WORD;
8140 RTX_FRAME_RELATED_P (insn) = 1;
8141 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8142 gen_rtx_SET (stack_pointer_rtx,
8143 plus_constant (Pmode,
8144 stack_pointer_rtx,
8145 -UNITS_PER_WORD)));
8146 }
8147 }
8148
8149 if (r10_live)
8150 {
8151 r10 = gen_rtx_REG (Pmode, R10_REG);
8152 insn = emit_insn (gen_push (r10));
8153 allocate -= UNITS_PER_WORD;
8154 if (sp_is_cfa_reg || TARGET_SEH)
8155 {
8156 if (sp_is_cfa_reg)
8157 m->fs.cfa_offset += UNITS_PER_WORD;
8158 RTX_FRAME_RELATED_P (insn) = 1;
8159 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8160 gen_rtx_SET (stack_pointer_rtx,
8161 plus_constant (Pmode,
8162 stack_pointer_rtx,
8163 -UNITS_PER_WORD)));
8164 }
8165 }
8166
8167 emit_move_insn (eax, GEN_INT (allocate));
8168 emit_insn (gen_allocate_stack_worker_probe (Pmode, eax, eax));
8169
8170 /* Use the fact that AX still contains ALLOCATE. */
8171 insn = emit_insn (gen_pro_epilogue_adjust_stack_sub
8172 (Pmode, stack_pointer_rtx, stack_pointer_rtx, eax));
8173
8174 if (sp_is_cfa_reg || TARGET_SEH)
8175 {
8176 if (sp_is_cfa_reg)
8177 m->fs.cfa_offset += allocate;
8178 RTX_FRAME_RELATED_P (insn) = 1;
8179 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8180 gen_rtx_SET (stack_pointer_rtx,
8181 plus_constant (Pmode, stack_pointer_rtx,
8182 -allocate)));
8183 }
8184 m->fs.sp_offset += allocate;
8185
8186 /* Use stack_pointer_rtx for relative addressing so that code works for
8187 realigned stack. But this means that we need a blockage to prevent
8188 stores based on the frame pointer from being scheduled before. */
8189 if (r10_live && eax_live)
8190 {
8191 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
8192 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
8193 gen_frame_mem (word_mode, t));
8194 t = plus_constant (Pmode, t, UNITS_PER_WORD);
8195 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
8196 gen_frame_mem (word_mode, t));
8197 emit_insn (gen_memory_blockage ());
8198 }
8199 else if (eax_live || r10_live)
8200 {
8201 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
8202 emit_move_insn (gen_rtx_REG (word_mode,
8203 (eax_live ? AX_REG : R10_REG)),
8204 gen_frame_mem (word_mode, t));
8205 emit_insn (gen_memory_blockage ());
8206 }
8207 }
8208 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
8209
8210 /* If we havn't already set up the frame pointer, do so now. */
8211 if (frame_pointer_needed && !m->fs.fp_valid)
8212 {
8213 insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx,
8214 GEN_INT (frame.stack_pointer_offset
8215 - frame.hard_frame_pointer_offset));
8216 insn = emit_insn (insn);
8217 RTX_FRAME_RELATED_P (insn) = 1;
8218 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
8219
8220 if (m->fs.cfa_reg == stack_pointer_rtx)
8221 m->fs.cfa_reg = hard_frame_pointer_rtx;
8222 m->fs.fp_offset = frame.hard_frame_pointer_offset;
8223 m->fs.fp_valid = true;
8224 }
8225
8226 if (!int_registers_saved)
8227 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
8228 if (!sse_registers_saved)
8229 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
8230 else if (save_stub_call_needed)
8231 ix86_emit_outlined_ms2sysv_save (frame);
8232
8233 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
8234 in PROLOGUE. */
8235 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
8236 {
8237 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
8238 insn = emit_insn (gen_set_got (pic));
8239 RTX_FRAME_RELATED_P (insn) = 1;
8240 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
8241 emit_insn (gen_prologue_use (pic));
8242 /* Deleting already emmitted SET_GOT if exist and allocated to
8243 REAL_PIC_OFFSET_TABLE_REGNUM. */
8244 ix86_elim_entry_set_got (pic);
8245 }
8246
8247 if (crtl->drap_reg && !crtl->stack_realign_needed)
8248 {
8249 /* vDRAP is setup but after reload it turns out stack realign
8250 isn't necessary, here we will emit prologue to setup DRAP
8251 without stack realign adjustment */
8252 t = choose_baseaddr (0, NULL);
8253 emit_insn (gen_rtx_SET (crtl->drap_reg, t));
8254 }
8255
8256 /* Prevent instructions from being scheduled into register save push
8257 sequence when access to the redzone area is done through frame pointer.
8258 The offset between the frame pointer and the stack pointer is calculated
8259 relative to the value of the stack pointer at the end of the function
8260 prologue, and moving instructions that access redzone area via frame
8261 pointer inside push sequence violates this assumption. */
8262 if (frame_pointer_needed && frame.red_zone_size)
8263 emit_insn (gen_memory_blockage ());
8264
8265 /* SEH requires that the prologue end within 256 bytes of the start of
8266 the function. Prevent instruction schedules that would extend that.
8267 Further, prevent alloca modifications to the stack pointer from being
8268 combined with prologue modifications. */
8269 if (TARGET_SEH)
8270 emit_insn (gen_prologue_use (stack_pointer_rtx));
8271 }
8272
8273 /* Emit code to restore REG using a POP insn. */
8274
8275 static void
8276 ix86_emit_restore_reg_using_pop (rtx reg)
8277 {
8278 struct machine_function *m = cfun->machine;
8279 rtx_insn *insn = emit_insn (gen_pop (reg));
8280
8281 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
8282 m->fs.sp_offset -= UNITS_PER_WORD;
8283
8284 if (m->fs.cfa_reg == crtl->drap_reg
8285 && REGNO (reg) == REGNO (crtl->drap_reg))
8286 {
8287 /* Previously we'd represented the CFA as an expression
8288 like *(%ebp - 8). We've just popped that value from
8289 the stack, which means we need to reset the CFA to
8290 the drap register. This will remain until we restore
8291 the stack pointer. */
8292 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
8293 RTX_FRAME_RELATED_P (insn) = 1;
8294
8295 /* This means that the DRAP register is valid for addressing too. */
8296 m->fs.drap_valid = true;
8297 return;
8298 }
8299
8300 if (m->fs.cfa_reg == stack_pointer_rtx)
8301 {
8302 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
8303 x = gen_rtx_SET (stack_pointer_rtx, x);
8304 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
8305 RTX_FRAME_RELATED_P (insn) = 1;
8306
8307 m->fs.cfa_offset -= UNITS_PER_WORD;
8308 }
8309
8310 /* When the frame pointer is the CFA, and we pop it, we are
8311 swapping back to the stack pointer as the CFA. This happens
8312 for stack frames that don't allocate other data, so we assume
8313 the stack pointer is now pointing at the return address, i.e.
8314 the function entry state, which makes the offset be 1 word. */
8315 if (reg == hard_frame_pointer_rtx)
8316 {
8317 m->fs.fp_valid = false;
8318 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
8319 {
8320 m->fs.cfa_reg = stack_pointer_rtx;
8321 m->fs.cfa_offset -= UNITS_PER_WORD;
8322
8323 add_reg_note (insn, REG_CFA_DEF_CFA,
8324 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8325 GEN_INT (m->fs.cfa_offset)));
8326 RTX_FRAME_RELATED_P (insn) = 1;
8327 }
8328 }
8329 }
8330
8331 /* Emit code to restore saved registers using POP insns. */
8332
8333 static void
8334 ix86_emit_restore_regs_using_pop (void)
8335 {
8336 unsigned int regno;
8337
8338 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8339 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
8340 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
8341 }
8342
8343 /* Emit code and notes for the LEAVE instruction. If insn is non-null,
8344 omits the emit and only attaches the notes. */
8345
8346 static void
8347 ix86_emit_leave (rtx_insn *insn)
8348 {
8349 struct machine_function *m = cfun->machine;
8350
8351 if (!insn)
8352 insn = emit_insn (gen_leave (word_mode));
8353
8354 ix86_add_queued_cfa_restore_notes (insn);
8355
8356 gcc_assert (m->fs.fp_valid);
8357 m->fs.sp_valid = true;
8358 m->fs.sp_realigned = false;
8359 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
8360 m->fs.fp_valid = false;
8361
8362 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
8363 {
8364 m->fs.cfa_reg = stack_pointer_rtx;
8365 m->fs.cfa_offset = m->fs.sp_offset;
8366
8367 add_reg_note (insn, REG_CFA_DEF_CFA,
8368 plus_constant (Pmode, stack_pointer_rtx,
8369 m->fs.sp_offset));
8370 RTX_FRAME_RELATED_P (insn) = 1;
8371 }
8372 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
8373 m->fs.fp_offset);
8374 }
8375
8376 /* Emit code to restore saved registers using MOV insns.
8377 First register is restored from CFA - CFA_OFFSET. */
8378 static void
8379 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
8380 bool maybe_eh_return)
8381 {
8382 struct machine_function *m = cfun->machine;
8383 unsigned int regno;
8384
8385 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8386 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
8387 {
8388 rtx reg = gen_rtx_REG (word_mode, regno);
8389 rtx mem;
8390 rtx_insn *insn;
8391
8392 mem = choose_baseaddr (cfa_offset, NULL);
8393 mem = gen_frame_mem (word_mode, mem);
8394 insn = emit_move_insn (reg, mem);
8395
8396 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8397 {
8398 /* Previously we'd represented the CFA as an expression
8399 like *(%ebp - 8). We've just popped that value from
8400 the stack, which means we need to reset the CFA to
8401 the drap register. This will remain until we restore
8402 the stack pointer. */
8403 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
8404 RTX_FRAME_RELATED_P (insn) = 1;
8405
8406 /* This means that the DRAP register is valid for addressing. */
8407 m->fs.drap_valid = true;
8408 }
8409 else
8410 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
8411
8412 cfa_offset -= UNITS_PER_WORD;
8413 }
8414 }
8415
8416 /* Emit code to restore saved registers using MOV insns.
8417 First register is restored from CFA - CFA_OFFSET. */
8418 static void
8419 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
8420 bool maybe_eh_return)
8421 {
8422 unsigned int regno;
8423
8424 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8425 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
8426 {
8427 rtx reg = gen_rtx_REG (V4SFmode, regno);
8428 rtx mem;
8429 unsigned int align = GET_MODE_ALIGNMENT (V4SFmode);
8430
8431 mem = choose_baseaddr (cfa_offset, &align);
8432 mem = gen_rtx_MEM (V4SFmode, mem);
8433
8434 /* The location aligment depends upon the base register. */
8435 align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align);
8436 gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
8437 set_mem_align (mem, align);
8438 emit_insn (gen_rtx_SET (reg, mem));
8439
8440 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
8441
8442 cfa_offset -= GET_MODE_SIZE (V4SFmode);
8443 }
8444 }
8445
8446 static void
8447 ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame,
8448 bool use_call, int style)
8449 {
8450 struct machine_function *m = cfun->machine;
8451 const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
8452 + m->call_ms2sysv_extra_regs;
8453 rtvec v;
8454 unsigned int elems_needed, align, i, vi = 0;
8455 rtx_insn *insn;
8456 rtx sym, tmp;
8457 rtx rsi = gen_rtx_REG (word_mode, SI_REG);
8458 rtx r10 = NULL_RTX;
8459 const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
8460 HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset ();
8461 HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset;
8462 rtx rsi_frame_load = NULL_RTX;
8463 HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1;
8464 enum xlogue_stub stub;
8465
8466 gcc_assert (!m->fs.fp_valid || frame_pointer_needed);
8467
8468 /* If using a realigned stack, we should never start with padding. */
8469 gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ());
8470
8471 /* Setup RSI as the stub's base pointer. */
8472 align = GET_MODE_ALIGNMENT (V4SFmode);
8473 tmp = choose_baseaddr (rsi_offset, &align, SI_REG);
8474 gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
8475
8476 emit_insn (gen_rtx_SET (rsi, tmp));
8477
8478 /* Get a symbol for the stub. */
8479 if (frame_pointer_needed)
8480 stub = use_call ? XLOGUE_STUB_RESTORE_HFP
8481 : XLOGUE_STUB_RESTORE_HFP_TAIL;
8482 else
8483 stub = use_call ? XLOGUE_STUB_RESTORE
8484 : XLOGUE_STUB_RESTORE_TAIL;
8485 sym = xlogue.get_stub_rtx (stub);
8486
8487 elems_needed = ncregs;
8488 if (use_call)
8489 elems_needed += 1;
8490 else
8491 elems_needed += frame_pointer_needed ? 5 : 3;
8492 v = rtvec_alloc (elems_needed);
8493
8494 /* We call the epilogue stub when we need to pop incoming args or we are
8495 doing a sibling call as the tail. Otherwise, we will emit a jmp to the
8496 epilogue stub and it is the tail-call. */
8497 if (use_call)
8498 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
8499 else
8500 {
8501 RTVEC_ELT (v, vi++) = ret_rtx;
8502 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
8503 if (frame_pointer_needed)
8504 {
8505 rtx rbp = gen_rtx_REG (DImode, BP_REG);
8506 gcc_assert (m->fs.fp_valid);
8507 gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx);
8508
8509 tmp = gen_rtx_PLUS (DImode, rbp, GEN_INT (8));
8510 RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, tmp);
8511 RTVEC_ELT (v, vi++) = gen_rtx_SET (rbp, gen_rtx_MEM (DImode, rbp));
8512 tmp = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
8513 RTVEC_ELT (v, vi++) = gen_rtx_CLOBBER (VOIDmode, tmp);
8514 }
8515 else
8516 {
8517 /* If no hard frame pointer, we set R10 to the SP restore value. */
8518 gcc_assert (!m->fs.fp_valid);
8519 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
8520 gcc_assert (m->fs.sp_valid);
8521
8522 r10 = gen_rtx_REG (DImode, R10_REG);
8523 tmp = gen_rtx_PLUS (Pmode, rsi, GEN_INT (stub_ptr_offset));
8524 emit_insn (gen_rtx_SET (r10, tmp));
8525
8526 RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10);
8527 }
8528 }
8529
8530 /* Generate frame load insns and restore notes. */
8531 for (i = 0; i < ncregs; ++i)
8532 {
8533 const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
8534 machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode;
8535 rtx reg, frame_load;
8536
8537 reg = gen_rtx_REG (mode, r.regno);
8538 frame_load = gen_frame_load (reg, rsi, r.offset);
8539
8540 /* Save RSI frame load insn & note to add last. */
8541 if (r.regno == SI_REG)
8542 {
8543 gcc_assert (!rsi_frame_load);
8544 rsi_frame_load = frame_load;
8545 rsi_restore_offset = r.offset;
8546 }
8547 else
8548 {
8549 RTVEC_ELT (v, vi++) = frame_load;
8550 ix86_add_cfa_restore_note (NULL, reg, r.offset);
8551 }
8552 }
8553
8554 /* Add RSI frame load & restore note at the end. */
8555 gcc_assert (rsi_frame_load);
8556 gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1);
8557 RTVEC_ELT (v, vi++) = rsi_frame_load;
8558 ix86_add_cfa_restore_note (NULL, gen_rtx_REG (DImode, SI_REG),
8559 rsi_restore_offset);
8560
8561 /* Finally, for tail-call w/o a hard frame pointer, set SP to R10. */
8562 if (!use_call && !frame_pointer_needed)
8563 {
8564 gcc_assert (m->fs.sp_valid);
8565 gcc_assert (!m->fs.sp_realigned);
8566
8567 /* At this point, R10 should point to frame.stack_realign_offset. */
8568 if (m->fs.cfa_reg == stack_pointer_rtx)
8569 m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset;
8570 m->fs.sp_offset = frame.stack_realign_offset;
8571 }
8572
8573 gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v));
8574 tmp = gen_rtx_PARALLEL (VOIDmode, v);
8575 if (use_call)
8576 insn = emit_insn (tmp);
8577 else
8578 {
8579 insn = emit_jump_insn (tmp);
8580 JUMP_LABEL (insn) = ret_rtx;
8581
8582 if (frame_pointer_needed)
8583 ix86_emit_leave (insn);
8584 else
8585 {
8586 /* Need CFA adjust note. */
8587 tmp = gen_rtx_SET (stack_pointer_rtx, r10);
8588 add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp);
8589 }
8590 }
8591
8592 RTX_FRAME_RELATED_P (insn) = true;
8593 ix86_add_queued_cfa_restore_notes (insn);
8594
8595 /* If we're not doing a tail-call, we need to adjust the stack. */
8596 if (use_call && m->fs.sp_valid)
8597 {
8598 HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset;
8599 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8600 GEN_INT (dealloc), style,
8601 m->fs.cfa_reg == stack_pointer_rtx);
8602 }
8603 }
8604
8605 /* Restore function stack, frame, and registers. */
8606
8607 void
8608 ix86_expand_epilogue (int style)
8609 {
8610 struct machine_function *m = cfun->machine;
8611 struct machine_frame_state frame_state_save = m->fs;
8612 bool restore_regs_via_mov;
8613 bool using_drap;
8614 bool restore_stub_is_tail = false;
8615
8616 if (ix86_function_naked (current_function_decl))
8617 {
8618 /* The program should not reach this point. */
8619 emit_insn (gen_ud2 ());
8620 return;
8621 }
8622
8623 ix86_finalize_stack_frame_flags ();
8624 const struct ix86_frame &frame = cfun->machine->frame;
8625
8626 m->fs.sp_realigned = stack_realign_fp;
8627 m->fs.sp_valid = stack_realign_fp
8628 || !frame_pointer_needed
8629 || crtl->sp_is_unchanging;
8630 gcc_assert (!m->fs.sp_valid
8631 || m->fs.sp_offset == frame.stack_pointer_offset);
8632
8633 /* The FP must be valid if the frame pointer is present. */
8634 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
8635 gcc_assert (!m->fs.fp_valid
8636 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
8637
8638 /* We must have *some* valid pointer to the stack frame. */
8639 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
8640
8641 /* The DRAP is never valid at this point. */
8642 gcc_assert (!m->fs.drap_valid);
8643
8644 /* See the comment about red zone and frame
8645 pointer usage in ix86_expand_prologue. */
8646 if (frame_pointer_needed && frame.red_zone_size)
8647 emit_insn (gen_memory_blockage ());
8648
8649 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
8650 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
8651
8652 /* Determine the CFA offset of the end of the red-zone. */
8653 m->fs.red_zone_offset = 0;
8654 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
8655 {
8656 /* The red-zone begins below return address and error code in
8657 exception handler. */
8658 m->fs.red_zone_offset = RED_ZONE_SIZE + INCOMING_FRAME_SP_OFFSET;
8659
8660 /* When the register save area is in the aligned portion of
8661 the stack, determine the maximum runtime displacement that
8662 matches up with the aligned frame. */
8663 if (stack_realign_drap)
8664 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
8665 + UNITS_PER_WORD);
8666 }
8667
8668 HOST_WIDE_INT reg_save_offset = frame.reg_save_offset;
8669
8670 /* Special care must be taken for the normal return case of a function
8671 using eh_return: the eax and edx registers are marked as saved, but
8672 not restored along this path. Adjust the save location to match. */
8673 if (crtl->calls_eh_return && style != 2)
8674 reg_save_offset -= 2 * UNITS_PER_WORD;
8675
8676 /* EH_RETURN requires the use of moves to function properly. */
8677 if (crtl->calls_eh_return)
8678 restore_regs_via_mov = true;
8679 /* SEH requires the use of pops to identify the epilogue. */
8680 else if (TARGET_SEH)
8681 restore_regs_via_mov = false;
8682 /* If we're only restoring one register and sp cannot be used then
8683 using a move instruction to restore the register since it's
8684 less work than reloading sp and popping the register. */
8685 else if (!sp_valid_at (frame.hfp_save_offset) && frame.nregs <= 1)
8686 restore_regs_via_mov = true;
8687 else if (TARGET_EPILOGUE_USING_MOVE
8688 && cfun->machine->use_fast_prologue_epilogue
8689 && (frame.nregs > 1
8690 || m->fs.sp_offset != reg_save_offset))
8691 restore_regs_via_mov = true;
8692 else if (frame_pointer_needed
8693 && !frame.nregs
8694 && m->fs.sp_offset != reg_save_offset)
8695 restore_regs_via_mov = true;
8696 else if (frame_pointer_needed
8697 && TARGET_USE_LEAVE
8698 && cfun->machine->use_fast_prologue_epilogue
8699 && frame.nregs == 1)
8700 restore_regs_via_mov = true;
8701 else
8702 restore_regs_via_mov = false;
8703
8704 if (restore_regs_via_mov || frame.nsseregs)
8705 {
8706 /* Ensure that the entire register save area is addressable via
8707 the stack pointer, if we will restore SSE regs via sp. */
8708 if (TARGET_64BIT
8709 && m->fs.sp_offset > 0x7fffffff
8710 && sp_valid_at (frame.stack_realign_offset + 1)
8711 && (frame.nsseregs + frame.nregs) != 0)
8712 {
8713 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8714 GEN_INT (m->fs.sp_offset
8715 - frame.sse_reg_save_offset),
8716 style,
8717 m->fs.cfa_reg == stack_pointer_rtx);
8718 }
8719 }
8720
8721 /* If there are any SSE registers to restore, then we have to do it
8722 via moves, since there's obviously no pop for SSE regs. */
8723 if (frame.nsseregs)
8724 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
8725 style == 2);
8726
8727 if (m->call_ms2sysv)
8728 {
8729 int pop_incoming_args = crtl->args.pops_args && crtl->args.size;
8730
8731 /* We cannot use a tail-call for the stub if:
8732 1. We have to pop incoming args,
8733 2. We have additional int regs to restore, or
8734 3. A sibling call will be the tail-call, or
8735 4. We are emitting an eh_return_internal epilogue.
8736
8737 TODO: Item 4 has not yet tested!
8738
8739 If any of the above are true, we will call the stub rather than
8740 jump to it. */
8741 restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1);
8742 ix86_emit_outlined_ms2sysv_restore (frame, !restore_stub_is_tail, style);
8743 }
8744
8745 /* If using out-of-line stub that is a tail-call, then...*/
8746 if (m->call_ms2sysv && restore_stub_is_tail)
8747 {
8748 /* TODO: parinoid tests. (remove eventually) */
8749 gcc_assert (m->fs.sp_valid);
8750 gcc_assert (!m->fs.sp_realigned);
8751 gcc_assert (!m->fs.fp_valid);
8752 gcc_assert (!m->fs.realigned);
8753 gcc_assert (m->fs.sp_offset == UNITS_PER_WORD);
8754 gcc_assert (!crtl->drap_reg);
8755 gcc_assert (!frame.nregs);
8756 }
8757 else if (restore_regs_via_mov)
8758 {
8759 rtx t;
8760
8761 if (frame.nregs)
8762 ix86_emit_restore_regs_using_mov (reg_save_offset, style == 2);
8763
8764 /* eh_return epilogues need %ecx added to the stack pointer. */
8765 if (style == 2)
8766 {
8767 rtx sa = EH_RETURN_STACKADJ_RTX;
8768 rtx_insn *insn;
8769
8770 /* %ecx can't be used for both DRAP register and eh_return. */
8771 if (crtl->drap_reg)
8772 gcc_assert (REGNO (crtl->drap_reg) != CX_REG);
8773
8774 /* regparm nested functions don't work with eh_return. */
8775 gcc_assert (!ix86_static_chain_on_stack);
8776
8777 if (frame_pointer_needed)
8778 {
8779 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
8780 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
8781 emit_insn (gen_rtx_SET (sa, t));
8782
8783 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
8784 insn = emit_move_insn (hard_frame_pointer_rtx, t);
8785
8786 /* Note that we use SA as a temporary CFA, as the return
8787 address is at the proper place relative to it. We
8788 pretend this happens at the FP restore insn because
8789 prior to this insn the FP would be stored at the wrong
8790 offset relative to SA, and after this insn we have no
8791 other reasonable register to use for the CFA. We don't
8792 bother resetting the CFA to the SP for the duration of
8793 the return insn, unless the control flow instrumentation
8794 is done. In this case the SP is used later and we have
8795 to reset CFA to SP. */
8796 add_reg_note (insn, REG_CFA_DEF_CFA,
8797 plus_constant (Pmode, sa, UNITS_PER_WORD));
8798 ix86_add_queued_cfa_restore_notes (insn);
8799 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
8800 RTX_FRAME_RELATED_P (insn) = 1;
8801
8802 m->fs.cfa_reg = sa;
8803 m->fs.cfa_offset = UNITS_PER_WORD;
8804 m->fs.fp_valid = false;
8805
8806 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
8807 const0_rtx, style,
8808 flag_cf_protection);
8809 }
8810 else
8811 {
8812 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
8813 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
8814 insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
8815 ix86_add_queued_cfa_restore_notes (insn);
8816
8817 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
8818 if (m->fs.cfa_offset != UNITS_PER_WORD)
8819 {
8820 m->fs.cfa_offset = UNITS_PER_WORD;
8821 add_reg_note (insn, REG_CFA_DEF_CFA,
8822 plus_constant (Pmode, stack_pointer_rtx,
8823 UNITS_PER_WORD));
8824 RTX_FRAME_RELATED_P (insn) = 1;
8825 }
8826 }
8827 m->fs.sp_offset = UNITS_PER_WORD;
8828 m->fs.sp_valid = true;
8829 m->fs.sp_realigned = false;
8830 }
8831 }
8832 else
8833 {
8834 /* SEH requires that the function end with (1) a stack adjustment
8835 if necessary, (2) a sequence of pops, and (3) a return or
8836 jump instruction. Prevent insns from the function body from
8837 being scheduled into this sequence. */
8838 if (TARGET_SEH)
8839 {
8840 /* Prevent a catch region from being adjacent to the standard
8841 epilogue sequence. Unfortunately neither crtl->uses_eh_lsda
8842 nor several other flags that would be interesting to test are
8843 set up yet. */
8844 if (flag_non_call_exceptions)
8845 emit_insn (gen_nops (const1_rtx));
8846 else
8847 emit_insn (gen_blockage ());
8848 }
8849
8850 /* First step is to deallocate the stack frame so that we can
8851 pop the registers. If the stack pointer was realigned, it needs
8852 to be restored now. Also do it on SEH target for very large
8853 frame as the emitted instructions aren't allowed by the ABI
8854 in epilogues. */
8855 if (!m->fs.sp_valid || m->fs.sp_realigned
8856 || (TARGET_SEH
8857 && (m->fs.sp_offset - reg_save_offset
8858 >= SEH_MAX_FRAME_SIZE)))
8859 {
8860 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
8861 GEN_INT (m->fs.fp_offset
8862 - reg_save_offset),
8863 style, false);
8864 }
8865 else if (m->fs.sp_offset != reg_save_offset)
8866 {
8867 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8868 GEN_INT (m->fs.sp_offset
8869 - reg_save_offset),
8870 style,
8871 m->fs.cfa_reg == stack_pointer_rtx);
8872 }
8873
8874 ix86_emit_restore_regs_using_pop ();
8875 }
8876
8877 /* If we used a stack pointer and haven't already got rid of it,
8878 then do so now. */
8879 if (m->fs.fp_valid)
8880 {
8881 /* If the stack pointer is valid and pointing at the frame
8882 pointer store address, then we only need a pop. */
8883 if (sp_valid_at (frame.hfp_save_offset)
8884 && m->fs.sp_offset == frame.hfp_save_offset)
8885 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
8886 /* Leave results in shorter dependency chains on CPUs that are
8887 able to grok it fast. */
8888 else if (TARGET_USE_LEAVE
8889 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
8890 || !cfun->machine->use_fast_prologue_epilogue)
8891 ix86_emit_leave (NULL);
8892 else
8893 {
8894 pro_epilogue_adjust_stack (stack_pointer_rtx,
8895 hard_frame_pointer_rtx,
8896 const0_rtx, style, !using_drap);
8897 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
8898 }
8899 }
8900
8901 if (using_drap)
8902 {
8903 int param_ptr_offset = UNITS_PER_WORD;
8904 rtx_insn *insn;
8905
8906 gcc_assert (stack_realign_drap);
8907
8908 if (ix86_static_chain_on_stack)
8909 param_ptr_offset += UNITS_PER_WORD;
8910 if (!call_used_regs[REGNO (crtl->drap_reg)])
8911 param_ptr_offset += UNITS_PER_WORD;
8912
8913 insn = emit_insn (gen_rtx_SET
8914 (stack_pointer_rtx,
8915 gen_rtx_PLUS (Pmode,
8916 crtl->drap_reg,
8917 GEN_INT (-param_ptr_offset))));
8918 m->fs.cfa_reg = stack_pointer_rtx;
8919 m->fs.cfa_offset = param_ptr_offset;
8920 m->fs.sp_offset = param_ptr_offset;
8921 m->fs.realigned = false;
8922
8923 add_reg_note (insn, REG_CFA_DEF_CFA,
8924 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8925 GEN_INT (param_ptr_offset)));
8926 RTX_FRAME_RELATED_P (insn) = 1;
8927
8928 if (!call_used_regs[REGNO (crtl->drap_reg)])
8929 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
8930 }
8931
8932 /* At this point the stack pointer must be valid, and we must have
8933 restored all of the registers. We may not have deallocated the
8934 entire stack frame. We've delayed this until now because it may
8935 be possible to merge the local stack deallocation with the
8936 deallocation forced by ix86_static_chain_on_stack. */
8937 gcc_assert (m->fs.sp_valid);
8938 gcc_assert (!m->fs.sp_realigned);
8939 gcc_assert (!m->fs.fp_valid);
8940 gcc_assert (!m->fs.realigned);
8941 if (m->fs.sp_offset != UNITS_PER_WORD)
8942 {
8943 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8944 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
8945 style, true);
8946 }
8947 else
8948 ix86_add_queued_cfa_restore_notes (get_last_insn ());
8949
8950 /* Sibcall epilogues don't want a return instruction. */
8951 if (style == 0)
8952 {
8953 m->fs = frame_state_save;
8954 return;
8955 }
8956
8957 if (cfun->machine->func_type != TYPE_NORMAL)
8958 emit_jump_insn (gen_interrupt_return ());
8959 else if (crtl->args.pops_args && crtl->args.size)
8960 {
8961 rtx popc = GEN_INT (crtl->args.pops_args);
8962
8963 /* i386 can only pop 64K bytes. If asked to pop more, pop return
8964 address, do explicit add, and jump indirectly to the caller. */
8965
8966 if (crtl->args.pops_args >= 65536)
8967 {
8968 rtx ecx = gen_rtx_REG (SImode, CX_REG);
8969 rtx_insn *insn;
8970
8971 /* There is no "pascal" calling convention in any 64bit ABI. */
8972 gcc_assert (!TARGET_64BIT);
8973
8974 insn = emit_insn (gen_pop (ecx));
8975 m->fs.cfa_offset -= UNITS_PER_WORD;
8976 m->fs.sp_offset -= UNITS_PER_WORD;
8977
8978 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
8979 x = gen_rtx_SET (stack_pointer_rtx, x);
8980 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
8981 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
8982 RTX_FRAME_RELATED_P (insn) = 1;
8983
8984 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8985 popc, -1, true);
8986 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
8987 }
8988 else
8989 emit_jump_insn (gen_simple_return_pop_internal (popc));
8990 }
8991 else if (!m->call_ms2sysv || !restore_stub_is_tail)
8992 {
8993 /* In case of return from EH a simple return cannot be used
8994 as a return address will be compared with a shadow stack
8995 return address. Use indirect jump instead. */
8996 if (style == 2 && flag_cf_protection)
8997 {
8998 /* Register used in indirect jump must be in word_mode. But
8999 Pmode may not be the same as word_mode for x32. */
9000 rtx ecx = gen_rtx_REG (word_mode, CX_REG);
9001 rtx_insn *insn;
9002
9003 insn = emit_insn (gen_pop (ecx));
9004 m->fs.cfa_offset -= UNITS_PER_WORD;
9005 m->fs.sp_offset -= UNITS_PER_WORD;
9006
9007 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
9008 x = gen_rtx_SET (stack_pointer_rtx, x);
9009 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9010 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
9011 RTX_FRAME_RELATED_P (insn) = 1;
9012
9013 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
9014 }
9015 else
9016 emit_jump_insn (gen_simple_return_internal ());
9017 }
9018
9019 /* Restore the state back to the state from the prologue,
9020 so that it's correct for the next epilogue. */
9021 m->fs = frame_state_save;
9022 }
9023
9024 /* Reset from the function's potential modifications. */
9025
9026 static void
9027 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED)
9028 {
9029 if (pic_offset_table_rtx
9030 && !ix86_use_pseudo_pic_reg ())
9031 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
9032
9033 if (TARGET_MACHO)
9034 {
9035 rtx_insn *insn = get_last_insn ();
9036 rtx_insn *deleted_debug_label = NULL;
9037
9038 /* Mach-O doesn't support labels at the end of objects, so if
9039 it looks like we might want one, take special action.
9040 First, collect any sequence of deleted debug labels. */
9041 while (insn
9042 && NOTE_P (insn)
9043 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
9044 {
9045 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
9046 notes only, instead set their CODE_LABEL_NUMBER to -1,
9047 otherwise there would be code generation differences
9048 in between -g and -g0. */
9049 if (NOTE_P (insn) && NOTE_KIND (insn)
9050 == NOTE_INSN_DELETED_DEBUG_LABEL)
9051 deleted_debug_label = insn;
9052 insn = PREV_INSN (insn);
9053 }
9054
9055 /* If we have:
9056 label:
9057 barrier
9058 then this needs to be detected, so skip past the barrier. */
9059
9060 if (insn && BARRIER_P (insn))
9061 insn = PREV_INSN (insn);
9062
9063 /* Up to now we've only seen notes or barriers. */
9064 if (insn)
9065 {
9066 if (LABEL_P (insn)
9067 || (NOTE_P (insn)
9068 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
9069 /* Trailing label. */
9070 fputs ("\tnop\n", file);
9071 else if (cfun && ! cfun->is_thunk)
9072 {
9073 /* See if we have a completely empty function body, skipping
9074 the special case of the picbase thunk emitted as asm. */
9075 while (insn && ! INSN_P (insn))
9076 insn = PREV_INSN (insn);
9077 /* If we don't find any insns, we've got an empty function body;
9078 I.e. completely empty - without a return or branch. This is
9079 taken as the case where a function body has been removed
9080 because it contains an inline __builtin_unreachable(). GCC
9081 declares that reaching __builtin_unreachable() means UB so
9082 we're not obliged to do anything special; however, we want
9083 non-zero-sized function bodies. To meet this, and help the
9084 user out, let's trap the case. */
9085 if (insn == NULL)
9086 fputs ("\tud2\n", file);
9087 }
9088 }
9089 else if (deleted_debug_label)
9090 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
9091 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
9092 CODE_LABEL_NUMBER (insn) = -1;
9093 }
9094 }
9095
9096 /* Return a scratch register to use in the split stack prologue. The
9097 split stack prologue is used for -fsplit-stack. It is the first
9098 instructions in the function, even before the regular prologue.
9099 The scratch register can be any caller-saved register which is not
9100 used for parameters or for the static chain. */
9101
9102 static unsigned int
9103 split_stack_prologue_scratch_regno (void)
9104 {
9105 if (TARGET_64BIT)
9106 return R11_REG;
9107 else
9108 {
9109 bool is_fastcall, is_thiscall;
9110 int regparm;
9111
9112 is_fastcall = (lookup_attribute ("fastcall",
9113 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
9114 != NULL);
9115 is_thiscall = (lookup_attribute ("thiscall",
9116 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
9117 != NULL);
9118 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
9119
9120 if (is_fastcall)
9121 {
9122 if (DECL_STATIC_CHAIN (cfun->decl))
9123 {
9124 sorry ("%<-fsplit-stack%> does not support fastcall with "
9125 "nested function");
9126 return INVALID_REGNUM;
9127 }
9128 return AX_REG;
9129 }
9130 else if (is_thiscall)
9131 {
9132 if (!DECL_STATIC_CHAIN (cfun->decl))
9133 return DX_REG;
9134 return AX_REG;
9135 }
9136 else if (regparm < 3)
9137 {
9138 if (!DECL_STATIC_CHAIN (cfun->decl))
9139 return CX_REG;
9140 else
9141 {
9142 if (regparm >= 2)
9143 {
9144 sorry ("%<-fsplit-stack%> does not support 2 register "
9145 "parameters for a nested function");
9146 return INVALID_REGNUM;
9147 }
9148 return DX_REG;
9149 }
9150 }
9151 else
9152 {
9153 /* FIXME: We could make this work by pushing a register
9154 around the addition and comparison. */
9155 sorry ("%<-fsplit-stack%> does not support 3 register parameters");
9156 return INVALID_REGNUM;
9157 }
9158 }
9159 }
9160
9161 /* A SYMBOL_REF for the function which allocates new stackspace for
9162 -fsplit-stack. */
9163
9164 static GTY(()) rtx split_stack_fn;
9165
9166 /* A SYMBOL_REF for the more stack function when using the large
9167 model. */
9168
9169 static GTY(()) rtx split_stack_fn_large;
9170
9171 /* Return location of the stack guard value in the TLS block. */
9172
9173 rtx
9174 ix86_split_stack_guard (void)
9175 {
9176 int offset;
9177 addr_space_t as = DEFAULT_TLS_SEG_REG;
9178 rtx r;
9179
9180 gcc_assert (flag_split_stack);
9181
9182 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
9183 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
9184 #else
9185 gcc_unreachable ();
9186 #endif
9187
9188 r = GEN_INT (offset);
9189 r = gen_const_mem (Pmode, r);
9190 set_mem_addr_space (r, as);
9191
9192 return r;
9193 }
9194
9195 /* Handle -fsplit-stack. These are the first instructions in the
9196 function, even before the regular prologue. */
9197
9198 void
9199 ix86_expand_split_stack_prologue (void)
9200 {
9201 HOST_WIDE_INT allocate;
9202 unsigned HOST_WIDE_INT args_size;
9203 rtx_code_label *label;
9204 rtx limit, current, allocate_rtx, call_fusage;
9205 rtx_insn *call_insn;
9206 rtx scratch_reg = NULL_RTX;
9207 rtx_code_label *varargs_label = NULL;
9208 rtx fn;
9209
9210 gcc_assert (flag_split_stack && reload_completed);
9211
9212 ix86_finalize_stack_frame_flags ();
9213 struct ix86_frame &frame = cfun->machine->frame;
9214 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
9215
9216 /* This is the label we will branch to if we have enough stack
9217 space. We expect the basic block reordering pass to reverse this
9218 branch if optimizing, so that we branch in the unlikely case. */
9219 label = gen_label_rtx ();
9220
9221 /* We need to compare the stack pointer minus the frame size with
9222 the stack boundary in the TCB. The stack boundary always gives
9223 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
9224 can compare directly. Otherwise we need to do an addition. */
9225
9226 limit = ix86_split_stack_guard ();
9227
9228 if (allocate < SPLIT_STACK_AVAILABLE)
9229 current = stack_pointer_rtx;
9230 else
9231 {
9232 unsigned int scratch_regno;
9233 rtx offset;
9234
9235 /* We need a scratch register to hold the stack pointer minus
9236 the required frame size. Since this is the very start of the
9237 function, the scratch register can be any caller-saved
9238 register which is not used for parameters. */
9239 offset = GEN_INT (- allocate);
9240 scratch_regno = split_stack_prologue_scratch_regno ();
9241 if (scratch_regno == INVALID_REGNUM)
9242 return;
9243 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
9244 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
9245 {
9246 /* We don't use gen_add in this case because it will
9247 want to split to lea, but when not optimizing the insn
9248 will not be split after this point. */
9249 emit_insn (gen_rtx_SET (scratch_reg,
9250 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
9251 offset)));
9252 }
9253 else
9254 {
9255 emit_move_insn (scratch_reg, offset);
9256 emit_insn (gen_add2_insn (scratch_reg, stack_pointer_rtx));
9257 }
9258 current = scratch_reg;
9259 }
9260
9261 ix86_expand_branch (GEU, current, limit, label);
9262 rtx_insn *jump_insn = get_last_insn ();
9263 JUMP_LABEL (jump_insn) = label;
9264
9265 /* Mark the jump as very likely to be taken. */
9266 add_reg_br_prob_note (jump_insn, profile_probability::very_likely ());
9267
9268 if (split_stack_fn == NULL_RTX)
9269 {
9270 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
9271 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
9272 }
9273 fn = split_stack_fn;
9274
9275 /* Get more stack space. We pass in the desired stack space and the
9276 size of the arguments to copy to the new stack. In 32-bit mode
9277 we push the parameters; __morestack will return on a new stack
9278 anyhow. In 64-bit mode we pass the parameters in r10 and
9279 r11. */
9280 allocate_rtx = GEN_INT (allocate);
9281 args_size = crtl->args.size >= 0 ? (HOST_WIDE_INT) crtl->args.size : 0;
9282 call_fusage = NULL_RTX;
9283 rtx pop = NULL_RTX;
9284 if (TARGET_64BIT)
9285 {
9286 rtx reg10, reg11;
9287
9288 reg10 = gen_rtx_REG (Pmode, R10_REG);
9289 reg11 = gen_rtx_REG (Pmode, R11_REG);
9290
9291 /* If this function uses a static chain, it will be in %r10.
9292 Preserve it across the call to __morestack. */
9293 if (DECL_STATIC_CHAIN (cfun->decl))
9294 {
9295 rtx rax;
9296
9297 rax = gen_rtx_REG (word_mode, AX_REG);
9298 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
9299 use_reg (&call_fusage, rax);
9300 }
9301
9302 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
9303 && !TARGET_PECOFF)
9304 {
9305 HOST_WIDE_INT argval;
9306
9307 gcc_assert (Pmode == DImode);
9308 /* When using the large model we need to load the address
9309 into a register, and we've run out of registers. So we
9310 switch to a different calling convention, and we call a
9311 different function: __morestack_large. We pass the
9312 argument size in the upper 32 bits of r10 and pass the
9313 frame size in the lower 32 bits. */
9314 gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate);
9315 gcc_assert ((args_size & 0xffffffff) == args_size);
9316
9317 if (split_stack_fn_large == NULL_RTX)
9318 {
9319 split_stack_fn_large
9320 = gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
9321 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
9322 }
9323 if (ix86_cmodel == CM_LARGE_PIC)
9324 {
9325 rtx_code_label *label;
9326 rtx x;
9327
9328 label = gen_label_rtx ();
9329 emit_label (label);
9330 LABEL_PRESERVE_P (label) = 1;
9331 emit_insn (gen_set_rip_rex64 (reg10, label));
9332 emit_insn (gen_set_got_offset_rex64 (reg11, label));
9333 emit_insn (gen_add2_insn (reg10, reg11));
9334 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
9335 UNSPEC_GOT);
9336 x = gen_rtx_CONST (Pmode, x);
9337 emit_move_insn (reg11, x);
9338 x = gen_rtx_PLUS (Pmode, reg10, reg11);
9339 x = gen_const_mem (Pmode, x);
9340 emit_move_insn (reg11, x);
9341 }
9342 else
9343 emit_move_insn (reg11, split_stack_fn_large);
9344
9345 fn = reg11;
9346
9347 argval = ((args_size << 16) << 16) + allocate;
9348 emit_move_insn (reg10, GEN_INT (argval));
9349 }
9350 else
9351 {
9352 emit_move_insn (reg10, allocate_rtx);
9353 emit_move_insn (reg11, GEN_INT (args_size));
9354 use_reg (&call_fusage, reg11);
9355 }
9356
9357 use_reg (&call_fusage, reg10);
9358 }
9359 else
9360 {
9361 rtx_insn *insn = emit_insn (gen_push (GEN_INT (args_size)));
9362 add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (UNITS_PER_WORD));
9363 insn = emit_insn (gen_push (allocate_rtx));
9364 add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (2 * UNITS_PER_WORD));
9365 pop = GEN_INT (2 * UNITS_PER_WORD);
9366 }
9367 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
9368 GEN_INT (UNITS_PER_WORD), constm1_rtx,
9369 pop, false);
9370 add_function_usage_to (call_insn, call_fusage);
9371 if (!TARGET_64BIT)
9372 add_reg_note (call_insn, REG_ARGS_SIZE, GEN_INT (0));
9373 /* Indicate that this function can't jump to non-local gotos. */
9374 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
9375
9376 /* In order to make call/return prediction work right, we now need
9377 to execute a return instruction. See
9378 libgcc/config/i386/morestack.S for the details on how this works.
9379
9380 For flow purposes gcc must not see this as a return
9381 instruction--we need control flow to continue at the subsequent
9382 label. Therefore, we use an unspec. */
9383 gcc_assert (crtl->args.pops_args < 65536);
9384 rtx_insn *ret_insn
9385 = emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
9386
9387 if ((flag_cf_protection & CF_BRANCH))
9388 {
9389 /* Insert ENDBR since __morestack will jump back here via indirect
9390 call. */
9391 rtx cet_eb = gen_nop_endbr ();
9392 emit_insn_after (cet_eb, ret_insn);
9393 }
9394
9395 /* If we are in 64-bit mode and this function uses a static chain,
9396 we saved %r10 in %rax before calling _morestack. */
9397 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
9398 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
9399 gen_rtx_REG (word_mode, AX_REG));
9400
9401 /* If this function calls va_start, we need to store a pointer to
9402 the arguments on the old stack, because they may not have been
9403 all copied to the new stack. At this point the old stack can be
9404 found at the frame pointer value used by __morestack, because
9405 __morestack has set that up before calling back to us. Here we
9406 store that pointer in a scratch register, and in
9407 ix86_expand_prologue we store the scratch register in a stack
9408 slot. */
9409 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
9410 {
9411 unsigned int scratch_regno;
9412 rtx frame_reg;
9413 int words;
9414
9415 scratch_regno = split_stack_prologue_scratch_regno ();
9416 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
9417 frame_reg = gen_rtx_REG (Pmode, BP_REG);
9418
9419 /* 64-bit:
9420 fp -> old fp value
9421 return address within this function
9422 return address of caller of this function
9423 stack arguments
9424 So we add three words to get to the stack arguments.
9425
9426 32-bit:
9427 fp -> old fp value
9428 return address within this function
9429 first argument to __morestack
9430 second argument to __morestack
9431 return address of caller of this function
9432 stack arguments
9433 So we add five words to get to the stack arguments.
9434 */
9435 words = TARGET_64BIT ? 3 : 5;
9436 emit_insn (gen_rtx_SET (scratch_reg,
9437 gen_rtx_PLUS (Pmode, frame_reg,
9438 GEN_INT (words * UNITS_PER_WORD))));
9439
9440 varargs_label = gen_label_rtx ();
9441 emit_jump_insn (gen_jump (varargs_label));
9442 JUMP_LABEL (get_last_insn ()) = varargs_label;
9443
9444 emit_barrier ();
9445 }
9446
9447 emit_label (label);
9448 LABEL_NUSES (label) = 1;
9449
9450 /* If this function calls va_start, we now have to set the scratch
9451 register for the case where we do not call __morestack. In this
9452 case we need to set it based on the stack pointer. */
9453 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
9454 {
9455 emit_insn (gen_rtx_SET (scratch_reg,
9456 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
9457 GEN_INT (UNITS_PER_WORD))));
9458
9459 emit_label (varargs_label);
9460 LABEL_NUSES (varargs_label) = 1;
9461 }
9462 }
9463
9464 /* We may have to tell the dataflow pass that the split stack prologue
9465 is initializing a scratch register. */
9466
9467 static void
9468 ix86_live_on_entry (bitmap regs)
9469 {
9470 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
9471 {
9472 gcc_assert (flag_split_stack);
9473 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
9474 }
9475 }
9476 \f
9477 /* Extract the parts of an RTL expression that is a valid memory address
9478 for an instruction. Return 0 if the structure of the address is
9479 grossly off. Return -1 if the address contains ASHIFT, so it is not
9480 strictly valid, but still used for computing length of lea instruction. */
9481
9482 int
9483 ix86_decompose_address (rtx addr, struct ix86_address *out)
9484 {
9485 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
9486 rtx base_reg, index_reg;
9487 HOST_WIDE_INT scale = 1;
9488 rtx scale_rtx = NULL_RTX;
9489 rtx tmp;
9490 int retval = 1;
9491 addr_space_t seg = ADDR_SPACE_GENERIC;
9492
9493 /* Allow zero-extended SImode addresses,
9494 they will be emitted with addr32 prefix. */
9495 if (TARGET_64BIT && GET_MODE (addr) == DImode)
9496 {
9497 if (GET_CODE (addr) == ZERO_EXTEND
9498 && GET_MODE (XEXP (addr, 0)) == SImode)
9499 {
9500 addr = XEXP (addr, 0);
9501 if (CONST_INT_P (addr))
9502 return 0;
9503 }
9504 else if (GET_CODE (addr) == AND
9505 && const_32bit_mask (XEXP (addr, 1), DImode))
9506 {
9507 addr = lowpart_subreg (SImode, XEXP (addr, 0), DImode);
9508 if (addr == NULL_RTX)
9509 return 0;
9510
9511 if (CONST_INT_P (addr))
9512 return 0;
9513 }
9514 }
9515
9516 /* Allow SImode subregs of DImode addresses,
9517 they will be emitted with addr32 prefix. */
9518 if (TARGET_64BIT && GET_MODE (addr) == SImode)
9519 {
9520 if (SUBREG_P (addr)
9521 && GET_MODE (SUBREG_REG (addr)) == DImode)
9522 {
9523 addr = SUBREG_REG (addr);
9524 if (CONST_INT_P (addr))
9525 return 0;
9526 }
9527 }
9528
9529 if (REG_P (addr))
9530 base = addr;
9531 else if (SUBREG_P (addr))
9532 {
9533 if (REG_P (SUBREG_REG (addr)))
9534 base = addr;
9535 else
9536 return 0;
9537 }
9538 else if (GET_CODE (addr) == PLUS)
9539 {
9540 rtx addends[4], op;
9541 int n = 0, i;
9542
9543 op = addr;
9544 do
9545 {
9546 if (n >= 4)
9547 return 0;
9548 addends[n++] = XEXP (op, 1);
9549 op = XEXP (op, 0);
9550 }
9551 while (GET_CODE (op) == PLUS);
9552 if (n >= 4)
9553 return 0;
9554 addends[n] = op;
9555
9556 for (i = n; i >= 0; --i)
9557 {
9558 op = addends[i];
9559 switch (GET_CODE (op))
9560 {
9561 case MULT:
9562 if (index)
9563 return 0;
9564 index = XEXP (op, 0);
9565 scale_rtx = XEXP (op, 1);
9566 break;
9567
9568 case ASHIFT:
9569 if (index)
9570 return 0;
9571 index = XEXP (op, 0);
9572 tmp = XEXP (op, 1);
9573 if (!CONST_INT_P (tmp))
9574 return 0;
9575 scale = INTVAL (tmp);
9576 if ((unsigned HOST_WIDE_INT) scale > 3)
9577 return 0;
9578 scale = 1 << scale;
9579 break;
9580
9581 case ZERO_EXTEND:
9582 op = XEXP (op, 0);
9583 if (GET_CODE (op) != UNSPEC)
9584 return 0;
9585 /* FALLTHRU */
9586
9587 case UNSPEC:
9588 if (XINT (op, 1) == UNSPEC_TP
9589 && TARGET_TLS_DIRECT_SEG_REFS
9590 && seg == ADDR_SPACE_GENERIC)
9591 seg = DEFAULT_TLS_SEG_REG;
9592 else
9593 return 0;
9594 break;
9595
9596 case SUBREG:
9597 if (!REG_P (SUBREG_REG (op)))
9598 return 0;
9599 /* FALLTHRU */
9600
9601 case REG:
9602 if (!base)
9603 base = op;
9604 else if (!index)
9605 index = op;
9606 else
9607 return 0;
9608 break;
9609
9610 case CONST:
9611 case CONST_INT:
9612 case SYMBOL_REF:
9613 case LABEL_REF:
9614 if (disp)
9615 return 0;
9616 disp = op;
9617 break;
9618
9619 default:
9620 return 0;
9621 }
9622 }
9623 }
9624 else if (GET_CODE (addr) == MULT)
9625 {
9626 index = XEXP (addr, 0); /* index*scale */
9627 scale_rtx = XEXP (addr, 1);
9628 }
9629 else if (GET_CODE (addr) == ASHIFT)
9630 {
9631 /* We're called for lea too, which implements ashift on occasion. */
9632 index = XEXP (addr, 0);
9633 tmp = XEXP (addr, 1);
9634 if (!CONST_INT_P (tmp))
9635 return 0;
9636 scale = INTVAL (tmp);
9637 if ((unsigned HOST_WIDE_INT) scale > 3)
9638 return 0;
9639 scale = 1 << scale;
9640 retval = -1;
9641 }
9642 else
9643 disp = addr; /* displacement */
9644
9645 if (index)
9646 {
9647 if (REG_P (index))
9648 ;
9649 else if (SUBREG_P (index)
9650 && REG_P (SUBREG_REG (index)))
9651 ;
9652 else
9653 return 0;
9654 }
9655
9656 /* Extract the integral value of scale. */
9657 if (scale_rtx)
9658 {
9659 if (!CONST_INT_P (scale_rtx))
9660 return 0;
9661 scale = INTVAL (scale_rtx);
9662 }
9663
9664 base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base;
9665 index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index;
9666
9667 /* Avoid useless 0 displacement. */
9668 if (disp == const0_rtx && (base || index))
9669 disp = NULL_RTX;
9670
9671 /* Allow arg pointer and stack pointer as index if there is not scaling. */
9672 if (base_reg && index_reg && scale == 1
9673 && (REGNO (index_reg) == ARG_POINTER_REGNUM
9674 || REGNO (index_reg) == FRAME_POINTER_REGNUM
9675 || REGNO (index_reg) == SP_REG))
9676 {
9677 std::swap (base, index);
9678 std::swap (base_reg, index_reg);
9679 }
9680
9681 /* Special case: %ebp cannot be encoded as a base without a displacement.
9682 Similarly %r13. */
9683 if (!disp && base_reg
9684 && (REGNO (base_reg) == ARG_POINTER_REGNUM
9685 || REGNO (base_reg) == FRAME_POINTER_REGNUM
9686 || REGNO (base_reg) == BP_REG
9687 || REGNO (base_reg) == R13_REG))
9688 disp = const0_rtx;
9689
9690 /* Special case: on K6, [%esi] makes the instruction vector decoded.
9691 Avoid this by transforming to [%esi+0].
9692 Reload calls address legitimization without cfun defined, so we need
9693 to test cfun for being non-NULL. */
9694 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
9695 && base_reg && !index_reg && !disp
9696 && REGNO (base_reg) == SI_REG)
9697 disp = const0_rtx;
9698
9699 /* Special case: encode reg+reg instead of reg*2. */
9700 if (!base && index && scale == 2)
9701 base = index, base_reg = index_reg, scale = 1;
9702
9703 /* Special case: scaling cannot be encoded without base or displacement. */
9704 if (!base && !disp && index && scale != 1)
9705 disp = const0_rtx;
9706
9707 out->base = base;
9708 out->index = index;
9709 out->disp = disp;
9710 out->scale = scale;
9711 out->seg = seg;
9712
9713 return retval;
9714 }
9715 \f
9716 /* Return cost of the memory address x.
9717 For i386, it is better to use a complex address than let gcc copy
9718 the address into a reg and make a new pseudo. But not if the address
9719 requires to two regs - that would mean more pseudos with longer
9720 lifetimes. */
9721 static int
9722 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
9723 {
9724 struct ix86_address parts;
9725 int cost = 1;
9726 int ok = ix86_decompose_address (x, &parts);
9727
9728 gcc_assert (ok);
9729
9730 if (parts.base && SUBREG_P (parts.base))
9731 parts.base = SUBREG_REG (parts.base);
9732 if (parts.index && SUBREG_P (parts.index))
9733 parts.index = SUBREG_REG (parts.index);
9734
9735 /* Attempt to minimize number of registers in the address by increasing
9736 address cost for each used register. We don't increase address cost
9737 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
9738 is not invariant itself it most likely means that base or index is not
9739 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
9740 which is not profitable for x86. */
9741 if (parts.base
9742 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
9743 && (current_pass->type == GIMPLE_PASS
9744 || !pic_offset_table_rtx
9745 || !REG_P (parts.base)
9746 || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
9747 cost++;
9748
9749 if (parts.index
9750 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
9751 && (current_pass->type == GIMPLE_PASS
9752 || !pic_offset_table_rtx
9753 || !REG_P (parts.index)
9754 || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
9755 cost++;
9756
9757 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
9758 since it's predecode logic can't detect the length of instructions
9759 and it degenerates to vector decoded. Increase cost of such
9760 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
9761 to split such addresses or even refuse such addresses at all.
9762
9763 Following addressing modes are affected:
9764 [base+scale*index]
9765 [scale*index+disp]
9766 [base+index]
9767
9768 The first and last case may be avoidable by explicitly coding the zero in
9769 memory address, but I don't have AMD-K6 machine handy to check this
9770 theory. */
9771
9772 if (TARGET_K6
9773 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
9774 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
9775 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
9776 cost += 10;
9777
9778 return cost;
9779 }
9780 \f
9781 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
9782 this is used for to form addresses to local data when -fPIC is in
9783 use. */
9784
9785 static bool
9786 darwin_local_data_pic (rtx disp)
9787 {
9788 return (GET_CODE (disp) == UNSPEC
9789 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
9790 }
9791
9792 /* True if operand X should be loaded from GOT. */
9793
9794 bool
9795 ix86_force_load_from_GOT_p (rtx x)
9796 {
9797 return ((TARGET_64BIT || HAVE_AS_IX86_GOT32X)
9798 && !TARGET_PECOFF && !TARGET_MACHO
9799 && !flag_pic
9800 && ix86_cmodel != CM_LARGE
9801 && GET_CODE (x) == SYMBOL_REF
9802 && SYMBOL_REF_FUNCTION_P (x)
9803 && (!flag_plt
9804 || (SYMBOL_REF_DECL (x)
9805 && lookup_attribute ("noplt",
9806 DECL_ATTRIBUTES (SYMBOL_REF_DECL (x)))))
9807 && !SYMBOL_REF_LOCAL_P (x));
9808 }
9809
9810 /* Determine if a given RTX is a valid constant. We already know this
9811 satisfies CONSTANT_P. */
9812
9813 static bool
9814 ix86_legitimate_constant_p (machine_mode mode, rtx x)
9815 {
9816 switch (GET_CODE (x))
9817 {
9818 case CONST:
9819 x = XEXP (x, 0);
9820
9821 if (GET_CODE (x) == PLUS)
9822 {
9823 if (!CONST_INT_P (XEXP (x, 1)))
9824 return false;
9825 x = XEXP (x, 0);
9826 }
9827
9828 if (TARGET_MACHO && darwin_local_data_pic (x))
9829 return true;
9830
9831 /* Only some unspecs are valid as "constants". */
9832 if (GET_CODE (x) == UNSPEC)
9833 switch (XINT (x, 1))
9834 {
9835 case UNSPEC_GOT:
9836 case UNSPEC_GOTOFF:
9837 case UNSPEC_PLTOFF:
9838 return TARGET_64BIT;
9839 case UNSPEC_TPOFF:
9840 case UNSPEC_NTPOFF:
9841 x = XVECEXP (x, 0, 0);
9842 return (GET_CODE (x) == SYMBOL_REF
9843 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9844 case UNSPEC_DTPOFF:
9845 x = XVECEXP (x, 0, 0);
9846 return (GET_CODE (x) == SYMBOL_REF
9847 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
9848 default:
9849 return false;
9850 }
9851
9852 /* We must have drilled down to a symbol. */
9853 if (GET_CODE (x) == LABEL_REF)
9854 return true;
9855 if (GET_CODE (x) != SYMBOL_REF)
9856 return false;
9857 /* FALLTHRU */
9858
9859 case SYMBOL_REF:
9860 /* TLS symbols are never valid. */
9861 if (SYMBOL_REF_TLS_MODEL (x))
9862 return false;
9863
9864 /* DLLIMPORT symbols are never valid. */
9865 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9866 && SYMBOL_REF_DLLIMPORT_P (x))
9867 return false;
9868
9869 #if TARGET_MACHO
9870 /* mdynamic-no-pic */
9871 if (MACHO_DYNAMIC_NO_PIC_P)
9872 return machopic_symbol_defined_p (x);
9873 #endif
9874
9875 /* External function address should be loaded
9876 via the GOT slot to avoid PLT. */
9877 if (ix86_force_load_from_GOT_p (x))
9878 return false;
9879
9880 break;
9881
9882 CASE_CONST_SCALAR_INT:
9883 switch (mode)
9884 {
9885 case E_TImode:
9886 if (TARGET_64BIT)
9887 return true;
9888 /* FALLTHRU */
9889 case E_OImode:
9890 case E_XImode:
9891 if (!standard_sse_constant_p (x, mode))
9892 return false;
9893 default:
9894 break;
9895 }
9896 break;
9897
9898 case CONST_VECTOR:
9899 if (!standard_sse_constant_p (x, mode))
9900 return false;
9901
9902 default:
9903 break;
9904 }
9905
9906 /* Otherwise we handle everything else in the move patterns. */
9907 return true;
9908 }
9909
9910 /* Determine if it's legal to put X into the constant pool. This
9911 is not possible for the address of thread-local symbols, which
9912 is checked above. */
9913
9914 static bool
9915 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
9916 {
9917 /* We can put any immediate constant in memory. */
9918 switch (GET_CODE (x))
9919 {
9920 CASE_CONST_ANY:
9921 return false;
9922
9923 default:
9924 break;
9925 }
9926
9927 return !ix86_legitimate_constant_p (mode, x);
9928 }
9929
9930 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
9931 otherwise zero. */
9932
9933 static bool
9934 is_imported_p (rtx x)
9935 {
9936 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
9937 || GET_CODE (x) != SYMBOL_REF)
9938 return false;
9939
9940 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
9941 }
9942
9943
9944 /* Nonzero if the constant value X is a legitimate general operand
9945 when generating PIC code. It is given that flag_pic is on and
9946 that X satisfies CONSTANT_P. */
9947
9948 bool
9949 legitimate_pic_operand_p (rtx x)
9950 {
9951 rtx inner;
9952
9953 switch (GET_CODE (x))
9954 {
9955 case CONST:
9956 inner = XEXP (x, 0);
9957 if (GET_CODE (inner) == PLUS
9958 && CONST_INT_P (XEXP (inner, 1)))
9959 inner = XEXP (inner, 0);
9960
9961 /* Only some unspecs are valid as "constants". */
9962 if (GET_CODE (inner) == UNSPEC)
9963 switch (XINT (inner, 1))
9964 {
9965 case UNSPEC_GOT:
9966 case UNSPEC_GOTOFF:
9967 case UNSPEC_PLTOFF:
9968 return TARGET_64BIT;
9969 case UNSPEC_TPOFF:
9970 x = XVECEXP (inner, 0, 0);
9971 return (GET_CODE (x) == SYMBOL_REF
9972 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9973 case UNSPEC_MACHOPIC_OFFSET:
9974 return legitimate_pic_address_disp_p (x);
9975 default:
9976 return false;
9977 }
9978 /* FALLTHRU */
9979
9980 case SYMBOL_REF:
9981 case LABEL_REF:
9982 return legitimate_pic_address_disp_p (x);
9983
9984 default:
9985 return true;
9986 }
9987 }
9988
9989 /* Determine if a given CONST RTX is a valid memory displacement
9990 in PIC mode. */
9991
9992 bool
9993 legitimate_pic_address_disp_p (rtx disp)
9994 {
9995 bool saw_plus;
9996
9997 /* In 64bit mode we can allow direct addresses of symbols and labels
9998 when they are not dynamic symbols. */
9999 if (TARGET_64BIT)
10000 {
10001 rtx op0 = disp, op1;
10002
10003 switch (GET_CODE (disp))
10004 {
10005 case LABEL_REF:
10006 return true;
10007
10008 case CONST:
10009 if (GET_CODE (XEXP (disp, 0)) != PLUS)
10010 break;
10011 op0 = XEXP (XEXP (disp, 0), 0);
10012 op1 = XEXP (XEXP (disp, 0), 1);
10013 if (!CONST_INT_P (op1))
10014 break;
10015 if (GET_CODE (op0) == UNSPEC
10016 && (XINT (op0, 1) == UNSPEC_DTPOFF
10017 || XINT (op0, 1) == UNSPEC_NTPOFF)
10018 && trunc_int_for_mode (INTVAL (op1), SImode) == INTVAL (op1))
10019 return true;
10020 if (INTVAL (op1) >= 16*1024*1024
10021 || INTVAL (op1) < -16*1024*1024)
10022 break;
10023 if (GET_CODE (op0) == LABEL_REF)
10024 return true;
10025 if (GET_CODE (op0) == CONST
10026 && GET_CODE (XEXP (op0, 0)) == UNSPEC
10027 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
10028 return true;
10029 if (GET_CODE (op0) == UNSPEC
10030 && XINT (op0, 1) == UNSPEC_PCREL)
10031 return true;
10032 if (GET_CODE (op0) != SYMBOL_REF)
10033 break;
10034 /* FALLTHRU */
10035
10036 case SYMBOL_REF:
10037 /* TLS references should always be enclosed in UNSPEC.
10038 The dllimported symbol needs always to be resolved. */
10039 if (SYMBOL_REF_TLS_MODEL (op0)
10040 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
10041 return false;
10042
10043 if (TARGET_PECOFF)
10044 {
10045 if (is_imported_p (op0))
10046 return true;
10047
10048 if (SYMBOL_REF_FAR_ADDR_P (op0)
10049 || !SYMBOL_REF_LOCAL_P (op0))
10050 break;
10051
10052 /* Function-symbols need to be resolved only for
10053 large-model.
10054 For the small-model we don't need to resolve anything
10055 here. */
10056 if ((ix86_cmodel != CM_LARGE_PIC
10057 && SYMBOL_REF_FUNCTION_P (op0))
10058 || ix86_cmodel == CM_SMALL_PIC)
10059 return true;
10060 /* Non-external symbols don't need to be resolved for
10061 large, and medium-model. */
10062 if ((ix86_cmodel == CM_LARGE_PIC
10063 || ix86_cmodel == CM_MEDIUM_PIC)
10064 && !SYMBOL_REF_EXTERNAL_P (op0))
10065 return true;
10066 }
10067 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
10068 && (SYMBOL_REF_LOCAL_P (op0)
10069 || (HAVE_LD_PIE_COPYRELOC
10070 && flag_pie
10071 && !SYMBOL_REF_WEAK (op0)
10072 && !SYMBOL_REF_FUNCTION_P (op0)))
10073 && ix86_cmodel != CM_LARGE_PIC)
10074 return true;
10075 break;
10076
10077 default:
10078 break;
10079 }
10080 }
10081 if (GET_CODE (disp) != CONST)
10082 return false;
10083 disp = XEXP (disp, 0);
10084
10085 if (TARGET_64BIT)
10086 {
10087 /* We are unsafe to allow PLUS expressions. This limit allowed distance
10088 of GOT tables. We should not need these anyway. */
10089 if (GET_CODE (disp) != UNSPEC
10090 || (XINT (disp, 1) != UNSPEC_GOTPCREL
10091 && XINT (disp, 1) != UNSPEC_GOTOFF
10092 && XINT (disp, 1) != UNSPEC_PCREL
10093 && XINT (disp, 1) != UNSPEC_PLTOFF))
10094 return false;
10095
10096 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
10097 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
10098 return false;
10099 return true;
10100 }
10101
10102 saw_plus = false;
10103 if (GET_CODE (disp) == PLUS)
10104 {
10105 if (!CONST_INT_P (XEXP (disp, 1)))
10106 return false;
10107 disp = XEXP (disp, 0);
10108 saw_plus = true;
10109 }
10110
10111 if (TARGET_MACHO && darwin_local_data_pic (disp))
10112 return true;
10113
10114 if (GET_CODE (disp) != UNSPEC)
10115 return false;
10116
10117 switch (XINT (disp, 1))
10118 {
10119 case UNSPEC_GOT:
10120 if (saw_plus)
10121 return false;
10122 /* We need to check for both symbols and labels because VxWorks loads
10123 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
10124 details. */
10125 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
10126 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
10127 case UNSPEC_GOTOFF:
10128 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
10129 While ABI specify also 32bit relocation but we don't produce it in
10130 small PIC model at all. */
10131 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
10132 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
10133 && !TARGET_64BIT)
10134 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
10135 return false;
10136 case UNSPEC_GOTTPOFF:
10137 case UNSPEC_GOTNTPOFF:
10138 case UNSPEC_INDNTPOFF:
10139 if (saw_plus)
10140 return false;
10141 disp = XVECEXP (disp, 0, 0);
10142 return (GET_CODE (disp) == SYMBOL_REF
10143 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
10144 case UNSPEC_NTPOFF:
10145 disp = XVECEXP (disp, 0, 0);
10146 return (GET_CODE (disp) == SYMBOL_REF
10147 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
10148 case UNSPEC_DTPOFF:
10149 disp = XVECEXP (disp, 0, 0);
10150 return (GET_CODE (disp) == SYMBOL_REF
10151 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
10152 }
10153
10154 return false;
10155 }
10156
10157 /* Determine if op is suitable RTX for an address register.
10158 Return naked register if a register or a register subreg is
10159 found, otherwise return NULL_RTX. */
10160
10161 static rtx
10162 ix86_validate_address_register (rtx op)
10163 {
10164 machine_mode mode = GET_MODE (op);
10165
10166 /* Only SImode or DImode registers can form the address. */
10167 if (mode != SImode && mode != DImode)
10168 return NULL_RTX;
10169
10170 if (REG_P (op))
10171 return op;
10172 else if (SUBREG_P (op))
10173 {
10174 rtx reg = SUBREG_REG (op);
10175
10176 if (!REG_P (reg))
10177 return NULL_RTX;
10178
10179 mode = GET_MODE (reg);
10180
10181 /* Don't allow SUBREGs that span more than a word. It can
10182 lead to spill failures when the register is one word out
10183 of a two word structure. */
10184 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
10185 return NULL_RTX;
10186
10187 /* Allow only SUBREGs of non-eliminable hard registers. */
10188 if (register_no_elim_operand (reg, mode))
10189 return reg;
10190 }
10191
10192 /* Op is not a register. */
10193 return NULL_RTX;
10194 }
10195
10196 /* Recognizes RTL expressions that are valid memory addresses for an
10197 instruction. The MODE argument is the machine mode for the MEM
10198 expression that wants to use this address.
10199
10200 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
10201 convert common non-canonical forms to canonical form so that they will
10202 be recognized. */
10203
10204 static bool
10205 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
10206 {
10207 struct ix86_address parts;
10208 rtx base, index, disp;
10209 HOST_WIDE_INT scale;
10210 addr_space_t seg;
10211
10212 if (ix86_decompose_address (addr, &parts) <= 0)
10213 /* Decomposition failed. */
10214 return false;
10215
10216 base = parts.base;
10217 index = parts.index;
10218 disp = parts.disp;
10219 scale = parts.scale;
10220 seg = parts.seg;
10221
10222 /* Validate base register. */
10223 if (base)
10224 {
10225 rtx reg = ix86_validate_address_register (base);
10226
10227 if (reg == NULL_RTX)
10228 return false;
10229
10230 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
10231 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
10232 /* Base is not valid. */
10233 return false;
10234 }
10235
10236 /* Validate index register. */
10237 if (index)
10238 {
10239 rtx reg = ix86_validate_address_register (index);
10240
10241 if (reg == NULL_RTX)
10242 return false;
10243
10244 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
10245 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
10246 /* Index is not valid. */
10247 return false;
10248 }
10249
10250 /* Index and base should have the same mode. */
10251 if (base && index
10252 && GET_MODE (base) != GET_MODE (index))
10253 return false;
10254
10255 /* Address override works only on the (%reg) part of %fs:(%reg). */
10256 if (seg != ADDR_SPACE_GENERIC
10257 && ((base && GET_MODE (base) != word_mode)
10258 || (index && GET_MODE (index) != word_mode)))
10259 return false;
10260
10261 /* Validate scale factor. */
10262 if (scale != 1)
10263 {
10264 if (!index)
10265 /* Scale without index. */
10266 return false;
10267
10268 if (scale != 2 && scale != 4 && scale != 8)
10269 /* Scale is not a valid multiplier. */
10270 return false;
10271 }
10272
10273 /* Validate displacement. */
10274 if (disp)
10275 {
10276 if (GET_CODE (disp) == CONST
10277 && GET_CODE (XEXP (disp, 0)) == UNSPEC
10278 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
10279 switch (XINT (XEXP (disp, 0), 1))
10280 {
10281 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit
10282 when used. While ABI specify also 32bit relocations, we
10283 don't produce them at all and use IP relative instead.
10284 Allow GOT in 32bit mode for both PIC and non-PIC if symbol
10285 should be loaded via GOT. */
10286 case UNSPEC_GOT:
10287 if (!TARGET_64BIT
10288 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
10289 goto is_legitimate_pic;
10290 /* FALLTHRU */
10291 case UNSPEC_GOTOFF:
10292 gcc_assert (flag_pic);
10293 if (!TARGET_64BIT)
10294 goto is_legitimate_pic;
10295
10296 /* 64bit address unspec. */
10297 return false;
10298
10299 case UNSPEC_GOTPCREL:
10300 if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
10301 goto is_legitimate_pic;
10302 /* FALLTHRU */
10303 case UNSPEC_PCREL:
10304 gcc_assert (flag_pic);
10305 goto is_legitimate_pic;
10306
10307 case UNSPEC_GOTTPOFF:
10308 case UNSPEC_GOTNTPOFF:
10309 case UNSPEC_INDNTPOFF:
10310 case UNSPEC_NTPOFF:
10311 case UNSPEC_DTPOFF:
10312 break;
10313
10314 default:
10315 /* Invalid address unspec. */
10316 return false;
10317 }
10318
10319 else if (SYMBOLIC_CONST (disp)
10320 && (flag_pic
10321 || (TARGET_MACHO
10322 #if TARGET_MACHO
10323 && MACHOPIC_INDIRECT
10324 && !machopic_operand_p (disp)
10325 #endif
10326 )))
10327 {
10328
10329 is_legitimate_pic:
10330 if (TARGET_64BIT && (index || base))
10331 {
10332 /* foo@dtpoff(%rX) is ok. */
10333 if (GET_CODE (disp) != CONST
10334 || GET_CODE (XEXP (disp, 0)) != PLUS
10335 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
10336 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
10337 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
10338 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
10339 /* Non-constant pic memory reference. */
10340 return false;
10341 }
10342 else if ((!TARGET_MACHO || flag_pic)
10343 && ! legitimate_pic_address_disp_p (disp))
10344 /* Displacement is an invalid pic construct. */
10345 return false;
10346 #if TARGET_MACHO
10347 else if (MACHO_DYNAMIC_NO_PIC_P
10348 && !ix86_legitimate_constant_p (Pmode, disp))
10349 /* displacment must be referenced via non_lazy_pointer */
10350 return false;
10351 #endif
10352
10353 /* This code used to verify that a symbolic pic displacement
10354 includes the pic_offset_table_rtx register.
10355
10356 While this is good idea, unfortunately these constructs may
10357 be created by "adds using lea" optimization for incorrect
10358 code like:
10359
10360 int a;
10361 int foo(int i)
10362 {
10363 return *(&a+i);
10364 }
10365
10366 This code is nonsensical, but results in addressing
10367 GOT table with pic_offset_table_rtx base. We can't
10368 just refuse it easily, since it gets matched by
10369 "addsi3" pattern, that later gets split to lea in the
10370 case output register differs from input. While this
10371 can be handled by separate addsi pattern for this case
10372 that never results in lea, this seems to be easier and
10373 correct fix for crash to disable this test. */
10374 }
10375 else if (GET_CODE (disp) != LABEL_REF
10376 && !CONST_INT_P (disp)
10377 && (GET_CODE (disp) != CONST
10378 || !ix86_legitimate_constant_p (Pmode, disp))
10379 && (GET_CODE (disp) != SYMBOL_REF
10380 || !ix86_legitimate_constant_p (Pmode, disp)))
10381 /* Displacement is not constant. */
10382 return false;
10383 else if (TARGET_64BIT
10384 && !x86_64_immediate_operand (disp, VOIDmode))
10385 /* Displacement is out of range. */
10386 return false;
10387 /* In x32 mode, constant addresses are sign extended to 64bit, so
10388 we have to prevent addresses from 0x80000000 to 0xffffffff. */
10389 else if (TARGET_X32 && !(index || base)
10390 && CONST_INT_P (disp)
10391 && val_signbit_known_set_p (SImode, INTVAL (disp)))
10392 return false;
10393 }
10394
10395 /* Everything looks valid. */
10396 return true;
10397 }
10398
10399 /* Determine if a given RTX is a valid constant address. */
10400
10401 bool
10402 constant_address_p (rtx x)
10403 {
10404 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
10405 }
10406 \f
10407 /* Return a unique alias set for the GOT. */
10408
10409 alias_set_type
10410 ix86_GOT_alias_set (void)
10411 {
10412 static alias_set_type set = -1;
10413 if (set == -1)
10414 set = new_alias_set ();
10415 return set;
10416 }
10417
10418 /* Return a legitimate reference for ORIG (an address) using the
10419 register REG. If REG is 0, a new pseudo is generated.
10420
10421 There are two types of references that must be handled:
10422
10423 1. Global data references must load the address from the GOT, via
10424 the PIC reg. An insn is emitted to do this load, and the reg is
10425 returned.
10426
10427 2. Static data references, constant pool addresses, and code labels
10428 compute the address as an offset from the GOT, whose base is in
10429 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
10430 differentiate them from global data objects. The returned
10431 address is the PIC reg + an unspec constant.
10432
10433 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
10434 reg also appears in the address. */
10435
10436 rtx
10437 legitimize_pic_address (rtx orig, rtx reg)
10438 {
10439 rtx addr = orig;
10440 rtx new_rtx = orig;
10441
10442 #if TARGET_MACHO
10443 if (TARGET_MACHO && !TARGET_64BIT)
10444 {
10445 if (reg == 0)
10446 reg = gen_reg_rtx (Pmode);
10447 /* Use the generic Mach-O PIC machinery. */
10448 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
10449 }
10450 #endif
10451
10452 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
10453 {
10454 rtx tmp = legitimize_pe_coff_symbol (addr, true);
10455 if (tmp)
10456 return tmp;
10457 }
10458
10459 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
10460 new_rtx = addr;
10461 else if ((!TARGET_64BIT
10462 || /* TARGET_64BIT && */ ix86_cmodel != CM_SMALL_PIC)
10463 && !TARGET_PECOFF
10464 && gotoff_operand (addr, Pmode))
10465 {
10466 /* This symbol may be referenced via a displacement
10467 from the PIC base address (@GOTOFF). */
10468 if (GET_CODE (addr) == CONST)
10469 addr = XEXP (addr, 0);
10470
10471 if (GET_CODE (addr) == PLUS)
10472 {
10473 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
10474 UNSPEC_GOTOFF);
10475 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
10476 }
10477 else
10478 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
10479
10480 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10481
10482 if (TARGET_64BIT)
10483 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
10484
10485 if (reg != 0)
10486 {
10487 gcc_assert (REG_P (reg));
10488 new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
10489 new_rtx, reg, 1, OPTAB_DIRECT);
10490 }
10491 else
10492 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10493 }
10494 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
10495 /* We can't use @GOTOFF for text labels
10496 on VxWorks, see gotoff_operand. */
10497 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
10498 {
10499 rtx tmp = legitimize_pe_coff_symbol (addr, true);
10500 if (tmp)
10501 return tmp;
10502
10503 /* For x64 PE-COFF there is no GOT table,
10504 so we use address directly. */
10505 if (TARGET_64BIT && TARGET_PECOFF)
10506 {
10507 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
10508 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10509 }
10510 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
10511 {
10512 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
10513 UNSPEC_GOTPCREL);
10514 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10515 new_rtx = gen_const_mem (Pmode, new_rtx);
10516 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
10517 }
10518 else
10519 {
10520 /* This symbol must be referenced via a load
10521 from the Global Offset Table (@GOT). */
10522 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
10523 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10524 if (TARGET_64BIT)
10525 new_rtx = force_reg (Pmode, new_rtx);
10526 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10527 new_rtx = gen_const_mem (Pmode, new_rtx);
10528 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
10529 }
10530
10531 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
10532 }
10533 else
10534 {
10535 if (CONST_INT_P (addr)
10536 && !x86_64_immediate_operand (addr, VOIDmode))
10537 new_rtx = copy_to_suggested_reg (addr, reg, Pmode);
10538 else if (GET_CODE (addr) == CONST)
10539 {
10540 addr = XEXP (addr, 0);
10541
10542 /* We must match stuff we generate before. Assume the only
10543 unspecs that can get here are ours. Not that we could do
10544 anything with them anyway.... */
10545 if (GET_CODE (addr) == UNSPEC
10546 || (GET_CODE (addr) == PLUS
10547 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
10548 return orig;
10549 gcc_assert (GET_CODE (addr) == PLUS);
10550 }
10551
10552 if (GET_CODE (addr) == PLUS)
10553 {
10554 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
10555
10556 /* Check first to see if this is a constant
10557 offset from a @GOTOFF symbol reference. */
10558 if (!TARGET_PECOFF
10559 && gotoff_operand (op0, Pmode)
10560 && CONST_INT_P (op1))
10561 {
10562 if (!TARGET_64BIT)
10563 {
10564 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
10565 UNSPEC_GOTOFF);
10566 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
10567 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10568
10569 if (reg != 0)
10570 {
10571 gcc_assert (REG_P (reg));
10572 new_rtx = expand_simple_binop (Pmode, PLUS,
10573 pic_offset_table_rtx,
10574 new_rtx, reg, 1,
10575 OPTAB_DIRECT);
10576 }
10577 else
10578 new_rtx
10579 = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10580 }
10581 else
10582 {
10583 if (INTVAL (op1) < -16*1024*1024
10584 || INTVAL (op1) >= 16*1024*1024)
10585 {
10586 if (!x86_64_immediate_operand (op1, Pmode))
10587 op1 = force_reg (Pmode, op1);
10588
10589 new_rtx
10590 = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
10591 }
10592 }
10593 }
10594 else
10595 {
10596 rtx base = legitimize_pic_address (op0, reg);
10597 machine_mode mode = GET_MODE (base);
10598 new_rtx
10599 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
10600
10601 if (CONST_INT_P (new_rtx))
10602 {
10603 if (INTVAL (new_rtx) < -16*1024*1024
10604 || INTVAL (new_rtx) >= 16*1024*1024)
10605 {
10606 if (!x86_64_immediate_operand (new_rtx, mode))
10607 new_rtx = force_reg (mode, new_rtx);
10608
10609 new_rtx
10610 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
10611 }
10612 else
10613 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
10614 }
10615 else
10616 {
10617 /* For %rip addressing, we have to use
10618 just disp32, not base nor index. */
10619 if (TARGET_64BIT
10620 && (GET_CODE (base) == SYMBOL_REF
10621 || GET_CODE (base) == LABEL_REF))
10622 base = force_reg (mode, base);
10623 if (GET_CODE (new_rtx) == PLUS
10624 && CONSTANT_P (XEXP (new_rtx, 1)))
10625 {
10626 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
10627 new_rtx = XEXP (new_rtx, 1);
10628 }
10629 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
10630 }
10631 }
10632 }
10633 }
10634 return new_rtx;
10635 }
10636 \f
10637 /* Load the thread pointer. If TO_REG is true, force it into a register. */
10638
10639 static rtx
10640 get_thread_pointer (machine_mode tp_mode, bool to_reg)
10641 {
10642 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
10643
10644 if (GET_MODE (tp) != tp_mode)
10645 {
10646 gcc_assert (GET_MODE (tp) == SImode);
10647 gcc_assert (tp_mode == DImode);
10648
10649 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
10650 }
10651
10652 if (to_reg)
10653 tp = copy_to_mode_reg (tp_mode, tp);
10654
10655 return tp;
10656 }
10657
10658 /* Construct the SYMBOL_REF for the tls_get_addr function. */
10659
10660 static GTY(()) rtx ix86_tls_symbol;
10661
10662 static rtx
10663 ix86_tls_get_addr (void)
10664 {
10665 if (!ix86_tls_symbol)
10666 {
10667 const char *sym
10668 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
10669 ? "___tls_get_addr" : "__tls_get_addr");
10670
10671 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
10672 }
10673
10674 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
10675 {
10676 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
10677 UNSPEC_PLTOFF);
10678 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
10679 gen_rtx_CONST (Pmode, unspec));
10680 }
10681
10682 return ix86_tls_symbol;
10683 }
10684
10685 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
10686
10687 static GTY(()) rtx ix86_tls_module_base_symbol;
10688
10689 rtx
10690 ix86_tls_module_base (void)
10691 {
10692 if (!ix86_tls_module_base_symbol)
10693 {
10694 ix86_tls_module_base_symbol
10695 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
10696
10697 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
10698 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
10699 }
10700
10701 return ix86_tls_module_base_symbol;
10702 }
10703
10704 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
10705 false if we expect this to be used for a memory address and true if
10706 we expect to load the address into a register. */
10707
10708 rtx
10709 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
10710 {
10711 rtx dest, base, off;
10712 rtx pic = NULL_RTX, tp = NULL_RTX;
10713 machine_mode tp_mode = Pmode;
10714 int type;
10715
10716 /* Fall back to global dynamic model if tool chain cannot support local
10717 dynamic. */
10718 if (TARGET_SUN_TLS && !TARGET_64BIT
10719 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
10720 && model == TLS_MODEL_LOCAL_DYNAMIC)
10721 model = TLS_MODEL_GLOBAL_DYNAMIC;
10722
10723 switch (model)
10724 {
10725 case TLS_MODEL_GLOBAL_DYNAMIC:
10726 dest = gen_reg_rtx (Pmode);
10727
10728 if (!TARGET_64BIT)
10729 {
10730 if (flag_pic && !TARGET_PECOFF)
10731 pic = pic_offset_table_rtx;
10732 else
10733 {
10734 pic = gen_reg_rtx (Pmode);
10735 emit_insn (gen_set_got (pic));
10736 }
10737 }
10738
10739 if (TARGET_GNU2_TLS)
10740 {
10741 if (TARGET_64BIT)
10742 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
10743 else
10744 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
10745
10746 tp = get_thread_pointer (Pmode, true);
10747 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
10748
10749 if (GET_MODE (x) != Pmode)
10750 x = gen_rtx_ZERO_EXTEND (Pmode, x);
10751
10752 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
10753 }
10754 else
10755 {
10756 rtx caddr = ix86_tls_get_addr ();
10757
10758 if (TARGET_64BIT)
10759 {
10760 rtx rax = gen_rtx_REG (Pmode, AX_REG);
10761 rtx_insn *insns;
10762
10763 start_sequence ();
10764 emit_call_insn
10765 (gen_tls_global_dynamic_64 (Pmode, rax, x, caddr));
10766 insns = get_insns ();
10767 end_sequence ();
10768
10769 if (GET_MODE (x) != Pmode)
10770 x = gen_rtx_ZERO_EXTEND (Pmode, x);
10771
10772 RTL_CONST_CALL_P (insns) = 1;
10773 emit_libcall_block (insns, dest, rax, x);
10774 }
10775 else
10776 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
10777 }
10778 break;
10779
10780 case TLS_MODEL_LOCAL_DYNAMIC:
10781 base = gen_reg_rtx (Pmode);
10782
10783 if (!TARGET_64BIT)
10784 {
10785 if (flag_pic)
10786 pic = pic_offset_table_rtx;
10787 else
10788 {
10789 pic = gen_reg_rtx (Pmode);
10790 emit_insn (gen_set_got (pic));
10791 }
10792 }
10793
10794 if (TARGET_GNU2_TLS)
10795 {
10796 rtx tmp = ix86_tls_module_base ();
10797
10798 if (TARGET_64BIT)
10799 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
10800 else
10801 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
10802
10803 tp = get_thread_pointer (Pmode, true);
10804 set_unique_reg_note (get_last_insn (), REG_EQUAL,
10805 gen_rtx_MINUS (Pmode, tmp, tp));
10806 }
10807 else
10808 {
10809 rtx caddr = ix86_tls_get_addr ();
10810
10811 if (TARGET_64BIT)
10812 {
10813 rtx rax = gen_rtx_REG (Pmode, AX_REG);
10814 rtx_insn *insns;
10815 rtx eqv;
10816
10817 start_sequence ();
10818 emit_call_insn
10819 (gen_tls_local_dynamic_base_64 (Pmode, rax, caddr));
10820 insns = get_insns ();
10821 end_sequence ();
10822
10823 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
10824 share the LD_BASE result with other LD model accesses. */
10825 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
10826 UNSPEC_TLS_LD_BASE);
10827
10828 RTL_CONST_CALL_P (insns) = 1;
10829 emit_libcall_block (insns, base, rax, eqv);
10830 }
10831 else
10832 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
10833 }
10834
10835 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
10836 off = gen_rtx_CONST (Pmode, off);
10837
10838 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
10839
10840 if (TARGET_GNU2_TLS)
10841 {
10842 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
10843
10844 if (GET_MODE (x) != Pmode)
10845 x = gen_rtx_ZERO_EXTEND (Pmode, x);
10846
10847 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
10848 }
10849 break;
10850
10851 case TLS_MODEL_INITIAL_EXEC:
10852 if (TARGET_64BIT)
10853 {
10854 if (TARGET_SUN_TLS && !TARGET_X32)
10855 {
10856 /* The Sun linker took the AMD64 TLS spec literally
10857 and can only handle %rax as destination of the
10858 initial executable code sequence. */
10859
10860 dest = gen_reg_rtx (DImode);
10861 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
10862 return dest;
10863 }
10864
10865 /* Generate DImode references to avoid %fs:(%reg32)
10866 problems and linker IE->LE relaxation bug. */
10867 tp_mode = DImode;
10868 pic = NULL;
10869 type = UNSPEC_GOTNTPOFF;
10870 }
10871 else if (flag_pic)
10872 {
10873 pic = pic_offset_table_rtx;
10874 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
10875 }
10876 else if (!TARGET_ANY_GNU_TLS)
10877 {
10878 pic = gen_reg_rtx (Pmode);
10879 emit_insn (gen_set_got (pic));
10880 type = UNSPEC_GOTTPOFF;
10881 }
10882 else
10883 {
10884 pic = NULL;
10885 type = UNSPEC_INDNTPOFF;
10886 }
10887
10888 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
10889 off = gen_rtx_CONST (tp_mode, off);
10890 if (pic)
10891 off = gen_rtx_PLUS (tp_mode, pic, off);
10892 off = gen_const_mem (tp_mode, off);
10893 set_mem_alias_set (off, ix86_GOT_alias_set ());
10894
10895 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
10896 {
10897 base = get_thread_pointer (tp_mode,
10898 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
10899 off = force_reg (tp_mode, off);
10900 dest = gen_rtx_PLUS (tp_mode, base, off);
10901 if (tp_mode != Pmode)
10902 dest = convert_to_mode (Pmode, dest, 1);
10903 }
10904 else
10905 {
10906 base = get_thread_pointer (Pmode, true);
10907 dest = gen_reg_rtx (Pmode);
10908 emit_insn (gen_sub3_insn (dest, base, off));
10909 }
10910 break;
10911
10912 case TLS_MODEL_LOCAL_EXEC:
10913 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
10914 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
10915 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
10916 off = gen_rtx_CONST (Pmode, off);
10917
10918 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
10919 {
10920 base = get_thread_pointer (Pmode,
10921 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
10922 return gen_rtx_PLUS (Pmode, base, off);
10923 }
10924 else
10925 {
10926 base = get_thread_pointer (Pmode, true);
10927 dest = gen_reg_rtx (Pmode);
10928 emit_insn (gen_sub3_insn (dest, base, off));
10929 }
10930 break;
10931
10932 default:
10933 gcc_unreachable ();
10934 }
10935
10936 return dest;
10937 }
10938
10939 /* Return true if OP refers to a TLS address. */
10940 bool
10941 ix86_tls_address_pattern_p (rtx op)
10942 {
10943 subrtx_var_iterator::array_type array;
10944 FOR_EACH_SUBRTX_VAR (iter, array, op, ALL)
10945 {
10946 rtx op = *iter;
10947 if (MEM_P (op))
10948 {
10949 rtx *x = &XEXP (op, 0);
10950 while (GET_CODE (*x) == PLUS)
10951 {
10952 int i;
10953 for (i = 0; i < 2; i++)
10954 {
10955 rtx u = XEXP (*x, i);
10956 if (GET_CODE (u) == ZERO_EXTEND)
10957 u = XEXP (u, 0);
10958 if (GET_CODE (u) == UNSPEC
10959 && XINT (u, 1) == UNSPEC_TP)
10960 return true;
10961 }
10962 x = &XEXP (*x, 0);
10963 }
10964
10965 iter.skip_subrtxes ();
10966 }
10967 }
10968
10969 return false;
10970 }
10971
10972 /* Rewrite *LOC so that it refers to a default TLS address space. */
10973 void
10974 ix86_rewrite_tls_address_1 (rtx *loc)
10975 {
10976 subrtx_ptr_iterator::array_type array;
10977 FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL)
10978 {
10979 rtx *loc = *iter;
10980 if (MEM_P (*loc))
10981 {
10982 rtx addr = XEXP (*loc, 0);
10983 rtx *x = &addr;
10984 while (GET_CODE (*x) == PLUS)
10985 {
10986 int i;
10987 for (i = 0; i < 2; i++)
10988 {
10989 rtx u = XEXP (*x, i);
10990 if (GET_CODE (u) == ZERO_EXTEND)
10991 u = XEXP (u, 0);
10992 if (GET_CODE (u) == UNSPEC
10993 && XINT (u, 1) == UNSPEC_TP)
10994 {
10995 addr_space_t as = DEFAULT_TLS_SEG_REG;
10996
10997 *x = XEXP (*x, 1 - i);
10998
10999 *loc = replace_equiv_address_nv (*loc, addr, true);
11000 set_mem_addr_space (*loc, as);
11001 return;
11002 }
11003 }
11004 x = &XEXP (*x, 0);
11005 }
11006
11007 iter.skip_subrtxes ();
11008 }
11009 }
11010 }
11011
11012 /* Rewrite instruction pattern involvning TLS address
11013 so that it refers to a default TLS address space. */
11014 rtx
11015 ix86_rewrite_tls_address (rtx pattern)
11016 {
11017 pattern = copy_insn (pattern);
11018 ix86_rewrite_tls_address_1 (&pattern);
11019 return pattern;
11020 }
11021
11022 /* Create or return the unique __imp_DECL dllimport symbol corresponding
11023 to symbol DECL if BEIMPORT is true. Otherwise create or return the
11024 unique refptr-DECL symbol corresponding to symbol DECL. */
11025
11026 struct dllimport_hasher : ggc_cache_ptr_hash<tree_map>
11027 {
11028 static inline hashval_t hash (tree_map *m) { return m->hash; }
11029 static inline bool
11030 equal (tree_map *a, tree_map *b)
11031 {
11032 return a->base.from == b->base.from;
11033 }
11034
11035 static int
11036 keep_cache_entry (tree_map *&m)
11037 {
11038 return ggc_marked_p (m->base.from);
11039 }
11040 };
11041
11042 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
11043
11044 static tree
11045 get_dllimport_decl (tree decl, bool beimport)
11046 {
11047 struct tree_map *h, in;
11048 const char *name;
11049 const char *prefix;
11050 size_t namelen, prefixlen;
11051 char *imp_name;
11052 tree to;
11053 rtx rtl;
11054
11055 if (!dllimport_map)
11056 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
11057
11058 in.hash = htab_hash_pointer (decl);
11059 in.base.from = decl;
11060 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
11061 h = *loc;
11062 if (h)
11063 return h->to;
11064
11065 *loc = h = ggc_alloc<tree_map> ();
11066 h->hash = in.hash;
11067 h->base.from = decl;
11068 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
11069 VAR_DECL, NULL, ptr_type_node);
11070 DECL_ARTIFICIAL (to) = 1;
11071 DECL_IGNORED_P (to) = 1;
11072 DECL_EXTERNAL (to) = 1;
11073 TREE_READONLY (to) = 1;
11074
11075 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
11076 name = targetm.strip_name_encoding (name);
11077 if (beimport)
11078 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
11079 ? "*__imp_" : "*__imp__";
11080 else
11081 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
11082 namelen = strlen (name);
11083 prefixlen = strlen (prefix);
11084 imp_name = (char *) alloca (namelen + prefixlen + 1);
11085 memcpy (imp_name, prefix, prefixlen);
11086 memcpy (imp_name + prefixlen, name, namelen + 1);
11087
11088 name = ggc_alloc_string (imp_name, namelen + prefixlen);
11089 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
11090 SET_SYMBOL_REF_DECL (rtl, to);
11091 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
11092 if (!beimport)
11093 {
11094 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
11095 #ifdef SUB_TARGET_RECORD_STUB
11096 SUB_TARGET_RECORD_STUB (name);
11097 #endif
11098 }
11099
11100 rtl = gen_const_mem (Pmode, rtl);
11101 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
11102
11103 SET_DECL_RTL (to, rtl);
11104 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
11105
11106 return to;
11107 }
11108
11109 /* Expand SYMBOL into its corresponding far-address symbol.
11110 WANT_REG is true if we require the result be a register. */
11111
11112 static rtx
11113 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
11114 {
11115 tree imp_decl;
11116 rtx x;
11117
11118 gcc_assert (SYMBOL_REF_DECL (symbol));
11119 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
11120
11121 x = DECL_RTL (imp_decl);
11122 if (want_reg)
11123 x = force_reg (Pmode, x);
11124 return x;
11125 }
11126
11127 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
11128 true if we require the result be a register. */
11129
11130 static rtx
11131 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
11132 {
11133 tree imp_decl;
11134 rtx x;
11135
11136 gcc_assert (SYMBOL_REF_DECL (symbol));
11137 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
11138
11139 x = DECL_RTL (imp_decl);
11140 if (want_reg)
11141 x = force_reg (Pmode, x);
11142 return x;
11143 }
11144
11145 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
11146 is true if we require the result be a register. */
11147
11148 rtx
11149 legitimize_pe_coff_symbol (rtx addr, bool inreg)
11150 {
11151 if (!TARGET_PECOFF)
11152 return NULL_RTX;
11153
11154 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11155 {
11156 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
11157 return legitimize_dllimport_symbol (addr, inreg);
11158 if (GET_CODE (addr) == CONST
11159 && GET_CODE (XEXP (addr, 0)) == PLUS
11160 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
11161 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
11162 {
11163 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
11164 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
11165 }
11166 }
11167
11168 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
11169 return NULL_RTX;
11170 if (GET_CODE (addr) == SYMBOL_REF
11171 && !is_imported_p (addr)
11172 && SYMBOL_REF_EXTERNAL_P (addr)
11173 && SYMBOL_REF_DECL (addr))
11174 return legitimize_pe_coff_extern_decl (addr, inreg);
11175
11176 if (GET_CODE (addr) == CONST
11177 && GET_CODE (XEXP (addr, 0)) == PLUS
11178 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
11179 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
11180 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
11181 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
11182 {
11183 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
11184 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
11185 }
11186 return NULL_RTX;
11187 }
11188
11189 /* Try machine-dependent ways of modifying an illegitimate address
11190 to be legitimate. If we find one, return the new, valid address.
11191 This macro is used in only one place: `memory_address' in explow.c.
11192
11193 OLDX is the address as it was before break_out_memory_refs was called.
11194 In some cases it is useful to look at this to decide what needs to be done.
11195
11196 It is always safe for this macro to do nothing. It exists to recognize
11197 opportunities to optimize the output.
11198
11199 For the 80386, we handle X+REG by loading X into a register R and
11200 using R+REG. R will go in a general reg and indexing will be used.
11201 However, if REG is a broken-out memory address or multiplication,
11202 nothing needs to be done because REG can certainly go in a general reg.
11203
11204 When -fpic is used, special handling is needed for symbolic references.
11205 See comments by legitimize_pic_address in i386.c for details. */
11206
11207 static rtx
11208 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
11209 {
11210 bool changed = false;
11211 unsigned log;
11212
11213 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
11214 if (log)
11215 return legitimize_tls_address (x, (enum tls_model) log, false);
11216 if (GET_CODE (x) == CONST
11217 && GET_CODE (XEXP (x, 0)) == PLUS
11218 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
11219 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
11220 {
11221 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
11222 (enum tls_model) log, false);
11223 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
11224 }
11225
11226 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11227 {
11228 rtx tmp = legitimize_pe_coff_symbol (x, true);
11229 if (tmp)
11230 return tmp;
11231 }
11232
11233 if (flag_pic && SYMBOLIC_CONST (x))
11234 return legitimize_pic_address (x, 0);
11235
11236 #if TARGET_MACHO
11237 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
11238 return machopic_indirect_data_reference (x, 0);
11239 #endif
11240
11241 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
11242 if (GET_CODE (x) == ASHIFT
11243 && CONST_INT_P (XEXP (x, 1))
11244 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
11245 {
11246 changed = true;
11247 log = INTVAL (XEXP (x, 1));
11248 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
11249 GEN_INT (1 << log));
11250 }
11251
11252 if (GET_CODE (x) == PLUS)
11253 {
11254 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
11255
11256 if (GET_CODE (XEXP (x, 0)) == ASHIFT
11257 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11258 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
11259 {
11260 changed = true;
11261 log = INTVAL (XEXP (XEXP (x, 0), 1));
11262 XEXP (x, 0) = gen_rtx_MULT (Pmode,
11263 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
11264 GEN_INT (1 << log));
11265 }
11266
11267 if (GET_CODE (XEXP (x, 1)) == ASHIFT
11268 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11269 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
11270 {
11271 changed = true;
11272 log = INTVAL (XEXP (XEXP (x, 1), 1));
11273 XEXP (x, 1) = gen_rtx_MULT (Pmode,
11274 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
11275 GEN_INT (1 << log));
11276 }
11277
11278 /* Put multiply first if it isn't already. */
11279 if (GET_CODE (XEXP (x, 1)) == MULT)
11280 {
11281 std::swap (XEXP (x, 0), XEXP (x, 1));
11282 changed = true;
11283 }
11284
11285 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
11286 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
11287 created by virtual register instantiation, register elimination, and
11288 similar optimizations. */
11289 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
11290 {
11291 changed = true;
11292 x = gen_rtx_PLUS (Pmode,
11293 gen_rtx_PLUS (Pmode, XEXP (x, 0),
11294 XEXP (XEXP (x, 1), 0)),
11295 XEXP (XEXP (x, 1), 1));
11296 }
11297
11298 /* Canonicalize
11299 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
11300 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
11301 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
11302 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11303 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
11304 && CONSTANT_P (XEXP (x, 1)))
11305 {
11306 rtx constant;
11307 rtx other = NULL_RTX;
11308
11309 if (CONST_INT_P (XEXP (x, 1)))
11310 {
11311 constant = XEXP (x, 1);
11312 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
11313 }
11314 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
11315 {
11316 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
11317 other = XEXP (x, 1);
11318 }
11319 else
11320 constant = 0;
11321
11322 if (constant)
11323 {
11324 changed = true;
11325 x = gen_rtx_PLUS (Pmode,
11326 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
11327 XEXP (XEXP (XEXP (x, 0), 1), 0)),
11328 plus_constant (Pmode, other,
11329 INTVAL (constant)));
11330 }
11331 }
11332
11333 if (changed && ix86_legitimate_address_p (mode, x, false))
11334 return x;
11335
11336 if (GET_CODE (XEXP (x, 0)) == MULT)
11337 {
11338 changed = true;
11339 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
11340 }
11341
11342 if (GET_CODE (XEXP (x, 1)) == MULT)
11343 {
11344 changed = true;
11345 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
11346 }
11347
11348 if (changed
11349 && REG_P (XEXP (x, 1))
11350 && REG_P (XEXP (x, 0)))
11351 return x;
11352
11353 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
11354 {
11355 changed = true;
11356 x = legitimize_pic_address (x, 0);
11357 }
11358
11359 if (changed && ix86_legitimate_address_p (mode, x, false))
11360 return x;
11361
11362 if (REG_P (XEXP (x, 0)))
11363 {
11364 rtx temp = gen_reg_rtx (Pmode);
11365 rtx val = force_operand (XEXP (x, 1), temp);
11366 if (val != temp)
11367 {
11368 val = convert_to_mode (Pmode, val, 1);
11369 emit_move_insn (temp, val);
11370 }
11371
11372 XEXP (x, 1) = temp;
11373 return x;
11374 }
11375
11376 else if (REG_P (XEXP (x, 1)))
11377 {
11378 rtx temp = gen_reg_rtx (Pmode);
11379 rtx val = force_operand (XEXP (x, 0), temp);
11380 if (val != temp)
11381 {
11382 val = convert_to_mode (Pmode, val, 1);
11383 emit_move_insn (temp, val);
11384 }
11385
11386 XEXP (x, 0) = temp;
11387 return x;
11388 }
11389 }
11390
11391 return x;
11392 }
11393 \f
11394 /* Print an integer constant expression in assembler syntax. Addition
11395 and subtraction are the only arithmetic that may appear in these
11396 expressions. FILE is the stdio stream to write to, X is the rtx, and
11397 CODE is the operand print code from the output string. */
11398
11399 static void
11400 output_pic_addr_const (FILE *file, rtx x, int code)
11401 {
11402 char buf[256];
11403
11404 switch (GET_CODE (x))
11405 {
11406 case PC:
11407 gcc_assert (flag_pic);
11408 putc ('.', file);
11409 break;
11410
11411 case SYMBOL_REF:
11412 if (TARGET_64BIT || ! TARGET_MACHO_SYMBOL_STUBS)
11413 output_addr_const (file, x);
11414 else
11415 {
11416 const char *name = XSTR (x, 0);
11417
11418 /* Mark the decl as referenced so that cgraph will
11419 output the function. */
11420 if (SYMBOL_REF_DECL (x))
11421 mark_decl_referenced (SYMBOL_REF_DECL (x));
11422
11423 #if TARGET_MACHO
11424 if (MACHOPIC_INDIRECT
11425 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
11426 name = machopic_indirection_name (x, /*stub_p=*/true);
11427 #endif
11428 assemble_name (file, name);
11429 }
11430 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
11431 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
11432 fputs ("@PLT", file);
11433 break;
11434
11435 case LABEL_REF:
11436 x = XEXP (x, 0);
11437 /* FALLTHRU */
11438 case CODE_LABEL:
11439 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
11440 assemble_name (asm_out_file, buf);
11441 break;
11442
11443 case CONST_INT:
11444 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11445 break;
11446
11447 case CONST:
11448 /* This used to output parentheses around the expression,
11449 but that does not work on the 386 (either ATT or BSD assembler). */
11450 output_pic_addr_const (file, XEXP (x, 0), code);
11451 break;
11452
11453 case CONST_DOUBLE:
11454 /* We can't handle floating point constants;
11455 TARGET_PRINT_OPERAND must handle them. */
11456 output_operand_lossage ("floating constant misused");
11457 break;
11458
11459 case PLUS:
11460 /* Some assemblers need integer constants to appear first. */
11461 if (CONST_INT_P (XEXP (x, 0)))
11462 {
11463 output_pic_addr_const (file, XEXP (x, 0), code);
11464 putc ('+', file);
11465 output_pic_addr_const (file, XEXP (x, 1), code);
11466 }
11467 else
11468 {
11469 gcc_assert (CONST_INT_P (XEXP (x, 1)));
11470 output_pic_addr_const (file, XEXP (x, 1), code);
11471 putc ('+', file);
11472 output_pic_addr_const (file, XEXP (x, 0), code);
11473 }
11474 break;
11475
11476 case MINUS:
11477 if (!TARGET_MACHO)
11478 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
11479 output_pic_addr_const (file, XEXP (x, 0), code);
11480 putc ('-', file);
11481 output_pic_addr_const (file, XEXP (x, 1), code);
11482 if (!TARGET_MACHO)
11483 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
11484 break;
11485
11486 case UNSPEC:
11487 gcc_assert (XVECLEN (x, 0) == 1);
11488 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
11489 switch (XINT (x, 1))
11490 {
11491 case UNSPEC_GOT:
11492 fputs ("@GOT", file);
11493 break;
11494 case UNSPEC_GOTOFF:
11495 fputs ("@GOTOFF", file);
11496 break;
11497 case UNSPEC_PLTOFF:
11498 fputs ("@PLTOFF", file);
11499 break;
11500 case UNSPEC_PCREL:
11501 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11502 "(%rip)" : "[rip]", file);
11503 break;
11504 case UNSPEC_GOTPCREL:
11505 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11506 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
11507 break;
11508 case UNSPEC_GOTTPOFF:
11509 /* FIXME: This might be @TPOFF in Sun ld too. */
11510 fputs ("@gottpoff", file);
11511 break;
11512 case UNSPEC_TPOFF:
11513 fputs ("@tpoff", file);
11514 break;
11515 case UNSPEC_NTPOFF:
11516 if (TARGET_64BIT)
11517 fputs ("@tpoff", file);
11518 else
11519 fputs ("@ntpoff", file);
11520 break;
11521 case UNSPEC_DTPOFF:
11522 fputs ("@dtpoff", file);
11523 break;
11524 case UNSPEC_GOTNTPOFF:
11525 if (TARGET_64BIT)
11526 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11527 "@gottpoff(%rip)": "@gottpoff[rip]", file);
11528 else
11529 fputs ("@gotntpoff", file);
11530 break;
11531 case UNSPEC_INDNTPOFF:
11532 fputs ("@indntpoff", file);
11533 break;
11534 #if TARGET_MACHO
11535 case UNSPEC_MACHOPIC_OFFSET:
11536 putc ('-', file);
11537 machopic_output_function_base_name (file);
11538 break;
11539 #endif
11540 default:
11541 output_operand_lossage ("invalid UNSPEC as operand");
11542 break;
11543 }
11544 break;
11545
11546 default:
11547 output_operand_lossage ("invalid expression as operand");
11548 }
11549 }
11550
11551 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11552 We need to emit DTP-relative relocations. */
11553
11554 static void ATTRIBUTE_UNUSED
11555 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
11556 {
11557 fputs (ASM_LONG, file);
11558 output_addr_const (file, x);
11559 fputs ("@dtpoff", file);
11560 switch (size)
11561 {
11562 case 4:
11563 break;
11564 case 8:
11565 fputs (", 0", file);
11566 break;
11567 default:
11568 gcc_unreachable ();
11569 }
11570 }
11571
11572 /* Return true if X is a representation of the PIC register. This copes
11573 with calls from ix86_find_base_term, where the register might have
11574 been replaced by a cselib value. */
11575
11576 static bool
11577 ix86_pic_register_p (rtx x)
11578 {
11579 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
11580 return (pic_offset_table_rtx
11581 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
11582 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SET_GOT)
11583 return true;
11584 else if (!REG_P (x))
11585 return false;
11586 else if (pic_offset_table_rtx)
11587 {
11588 if (REGNO (x) == REGNO (pic_offset_table_rtx))
11589 return true;
11590 if (HARD_REGISTER_P (x)
11591 && !HARD_REGISTER_P (pic_offset_table_rtx)
11592 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
11593 return true;
11594 return false;
11595 }
11596 else
11597 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
11598 }
11599
11600 /* Helper function for ix86_delegitimize_address.
11601 Attempt to delegitimize TLS local-exec accesses. */
11602
11603 static rtx
11604 ix86_delegitimize_tls_address (rtx orig_x)
11605 {
11606 rtx x = orig_x, unspec;
11607 struct ix86_address addr;
11608
11609 if (!TARGET_TLS_DIRECT_SEG_REFS)
11610 return orig_x;
11611 if (MEM_P (x))
11612 x = XEXP (x, 0);
11613 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
11614 return orig_x;
11615 if (ix86_decompose_address (x, &addr) == 0
11616 || addr.seg != DEFAULT_TLS_SEG_REG
11617 || addr.disp == NULL_RTX
11618 || GET_CODE (addr.disp) != CONST)
11619 return orig_x;
11620 unspec = XEXP (addr.disp, 0);
11621 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
11622 unspec = XEXP (unspec, 0);
11623 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
11624 return orig_x;
11625 x = XVECEXP (unspec, 0, 0);
11626 gcc_assert (GET_CODE (x) == SYMBOL_REF);
11627 if (unspec != XEXP (addr.disp, 0))
11628 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
11629 if (addr.index)
11630 {
11631 rtx idx = addr.index;
11632 if (addr.scale != 1)
11633 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
11634 x = gen_rtx_PLUS (Pmode, idx, x);
11635 }
11636 if (addr.base)
11637 x = gen_rtx_PLUS (Pmode, addr.base, x);
11638 if (MEM_P (orig_x))
11639 x = replace_equiv_address_nv (orig_x, x);
11640 return x;
11641 }
11642
11643 /* In the name of slightly smaller debug output, and to cater to
11644 general assembler lossage, recognize PIC+GOTOFF and turn it back
11645 into a direct symbol reference.
11646
11647 On Darwin, this is necessary to avoid a crash, because Darwin
11648 has a different PIC label for each routine but the DWARF debugging
11649 information is not associated with any particular routine, so it's
11650 necessary to remove references to the PIC label from RTL stored by
11651 the DWARF output code.
11652
11653 This helper is used in the normal ix86_delegitimize_address
11654 entrypoint (e.g. used in the target delegitimization hook) and
11655 in ix86_find_base_term. As compile time memory optimization, we
11656 avoid allocating rtxes that will not change anything on the outcome
11657 of the callers (find_base_value and find_base_term). */
11658
11659 static inline rtx
11660 ix86_delegitimize_address_1 (rtx x, bool base_term_p)
11661 {
11662 rtx orig_x = delegitimize_mem_from_attrs (x);
11663 /* addend is NULL or some rtx if x is something+GOTOFF where
11664 something doesn't include the PIC register. */
11665 rtx addend = NULL_RTX;
11666 /* reg_addend is NULL or a multiple of some register. */
11667 rtx reg_addend = NULL_RTX;
11668 /* const_addend is NULL or a const_int. */
11669 rtx const_addend = NULL_RTX;
11670 /* This is the result, or NULL. */
11671 rtx result = NULL_RTX;
11672
11673 x = orig_x;
11674
11675 if (MEM_P (x))
11676 x = XEXP (x, 0);
11677
11678 if (TARGET_64BIT)
11679 {
11680 if (GET_CODE (x) == CONST
11681 && GET_CODE (XEXP (x, 0)) == PLUS
11682 && GET_MODE (XEXP (x, 0)) == Pmode
11683 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11684 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
11685 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
11686 {
11687 /* find_base_{value,term} only care about MEMs with arg_pointer_rtx
11688 base. A CONST can't be arg_pointer_rtx based. */
11689 if (base_term_p && MEM_P (orig_x))
11690 return orig_x;
11691 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
11692 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
11693 if (MEM_P (orig_x))
11694 x = replace_equiv_address_nv (orig_x, x);
11695 return x;
11696 }
11697
11698 if (GET_CODE (x) == CONST
11699 && GET_CODE (XEXP (x, 0)) == UNSPEC
11700 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
11701 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
11702 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
11703 {
11704 x = XVECEXP (XEXP (x, 0), 0, 0);
11705 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
11706 {
11707 x = lowpart_subreg (GET_MODE (orig_x), x, GET_MODE (x));
11708 if (x == NULL_RTX)
11709 return orig_x;
11710 }
11711 return x;
11712 }
11713
11714 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
11715 return ix86_delegitimize_tls_address (orig_x);
11716
11717 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
11718 and -mcmodel=medium -fpic. */
11719 }
11720
11721 if (GET_CODE (x) != PLUS
11722 || GET_CODE (XEXP (x, 1)) != CONST)
11723 return ix86_delegitimize_tls_address (orig_x);
11724
11725 if (ix86_pic_register_p (XEXP (x, 0)))
11726 /* %ebx + GOT/GOTOFF */
11727 ;
11728 else if (GET_CODE (XEXP (x, 0)) == PLUS)
11729 {
11730 /* %ebx + %reg * scale + GOT/GOTOFF */
11731 reg_addend = XEXP (x, 0);
11732 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
11733 reg_addend = XEXP (reg_addend, 1);
11734 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
11735 reg_addend = XEXP (reg_addend, 0);
11736 else
11737 {
11738 reg_addend = NULL_RTX;
11739 addend = XEXP (x, 0);
11740 }
11741 }
11742 else
11743 addend = XEXP (x, 0);
11744
11745 x = XEXP (XEXP (x, 1), 0);
11746 if (GET_CODE (x) == PLUS
11747 && CONST_INT_P (XEXP (x, 1)))
11748 {
11749 const_addend = XEXP (x, 1);
11750 x = XEXP (x, 0);
11751 }
11752
11753 if (GET_CODE (x) == UNSPEC
11754 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
11755 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
11756 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
11757 && !MEM_P (orig_x) && !addend)))
11758 result = XVECEXP (x, 0, 0);
11759
11760 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
11761 && !MEM_P (orig_x))
11762 result = XVECEXP (x, 0, 0);
11763
11764 if (! result)
11765 return ix86_delegitimize_tls_address (orig_x);
11766
11767 /* For (PLUS something CONST_INT) both find_base_{value,term} just
11768 recurse on the first operand. */
11769 if (const_addend && !base_term_p)
11770 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
11771 if (reg_addend)
11772 result = gen_rtx_PLUS (Pmode, reg_addend, result);
11773 if (addend)
11774 {
11775 /* If the rest of original X doesn't involve the PIC register, add
11776 addend and subtract pic_offset_table_rtx. This can happen e.g.
11777 for code like:
11778 leal (%ebx, %ecx, 4), %ecx
11779 ...
11780 movl foo@GOTOFF(%ecx), %edx
11781 in which case we return (%ecx - %ebx) + foo
11782 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
11783 and reload has completed. Don't do the latter for debug,
11784 as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly. */
11785 if (pic_offset_table_rtx
11786 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
11787 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
11788 pic_offset_table_rtx),
11789 result);
11790 else if (base_term_p
11791 && pic_offset_table_rtx
11792 && !TARGET_MACHO
11793 && !TARGET_VXWORKS_RTP)
11794 {
11795 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
11796 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
11797 result = gen_rtx_PLUS (Pmode, tmp, result);
11798 }
11799 else
11800 return orig_x;
11801 }
11802 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
11803 {
11804 result = lowpart_subreg (GET_MODE (orig_x), result, Pmode);
11805 if (result == NULL_RTX)
11806 return orig_x;
11807 }
11808 return result;
11809 }
11810
11811 /* The normal instantiation of the above template. */
11812
11813 static rtx
11814 ix86_delegitimize_address (rtx x)
11815 {
11816 return ix86_delegitimize_address_1 (x, false);
11817 }
11818
11819 /* If X is a machine specific address (i.e. a symbol or label being
11820 referenced as a displacement from the GOT implemented using an
11821 UNSPEC), then return the base term. Otherwise return X. */
11822
11823 rtx
11824 ix86_find_base_term (rtx x)
11825 {
11826 rtx term;
11827
11828 if (TARGET_64BIT)
11829 {
11830 if (GET_CODE (x) != CONST)
11831 return x;
11832 term = XEXP (x, 0);
11833 if (GET_CODE (term) == PLUS
11834 && CONST_INT_P (XEXP (term, 1)))
11835 term = XEXP (term, 0);
11836 if (GET_CODE (term) != UNSPEC
11837 || (XINT (term, 1) != UNSPEC_GOTPCREL
11838 && XINT (term, 1) != UNSPEC_PCREL))
11839 return x;
11840
11841 return XVECEXP (term, 0, 0);
11842 }
11843
11844 return ix86_delegitimize_address_1 (x, true);
11845 }
11846
11847 /* Return true if X shouldn't be emitted into the debug info.
11848 Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_
11849 symbol easily into the .debug_info section, so we need not to
11850 delegitimize, but instead assemble as @gotoff.
11851 Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically
11852 assembles that as _GLOBAL_OFFSET_TABLE_-. expression. */
11853
11854 static bool
11855 ix86_const_not_ok_for_debug_p (rtx x)
11856 {
11857 if (GET_CODE (x) == UNSPEC && XINT (x, 1) != UNSPEC_GOTOFF)
11858 return true;
11859
11860 if (SYMBOL_REF_P (x) && strcmp (XSTR (x, 0), GOT_SYMBOL_NAME) == 0)
11861 return true;
11862
11863 return false;
11864 }
11865 \f
11866 static void
11867 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
11868 bool fp, FILE *file)
11869 {
11870 const char *suffix;
11871
11872 if (mode == CCFPmode)
11873 {
11874 code = ix86_fp_compare_code_to_integer (code);
11875 mode = CCmode;
11876 }
11877 if (reverse)
11878 code = reverse_condition (code);
11879
11880 switch (code)
11881 {
11882 case EQ:
11883 gcc_assert (mode != CCGZmode);
11884 switch (mode)
11885 {
11886 case E_CCAmode:
11887 suffix = "a";
11888 break;
11889 case E_CCCmode:
11890 suffix = "c";
11891 break;
11892 case E_CCOmode:
11893 suffix = "o";
11894 break;
11895 case E_CCPmode:
11896 suffix = "p";
11897 break;
11898 case E_CCSmode:
11899 suffix = "s";
11900 break;
11901 default:
11902 suffix = "e";
11903 break;
11904 }
11905 break;
11906 case NE:
11907 gcc_assert (mode != CCGZmode);
11908 switch (mode)
11909 {
11910 case E_CCAmode:
11911 suffix = "na";
11912 break;
11913 case E_CCCmode:
11914 suffix = "nc";
11915 break;
11916 case E_CCOmode:
11917 suffix = "no";
11918 break;
11919 case E_CCPmode:
11920 suffix = "np";
11921 break;
11922 case E_CCSmode:
11923 suffix = "ns";
11924 break;
11925 default:
11926 suffix = "ne";
11927 break;
11928 }
11929 break;
11930 case GT:
11931 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
11932 suffix = "g";
11933 break;
11934 case GTU:
11935 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
11936 Those same assemblers have the same but opposite lossage on cmov. */
11937 if (mode == CCmode)
11938 suffix = fp ? "nbe" : "a";
11939 else
11940 gcc_unreachable ();
11941 break;
11942 case LT:
11943 switch (mode)
11944 {
11945 case E_CCNOmode:
11946 case E_CCGOCmode:
11947 suffix = "s";
11948 break;
11949
11950 case E_CCmode:
11951 case E_CCGCmode:
11952 case E_CCGZmode:
11953 suffix = "l";
11954 break;
11955
11956 default:
11957 gcc_unreachable ();
11958 }
11959 break;
11960 case LTU:
11961 if (mode == CCmode || mode == CCGZmode)
11962 suffix = "b";
11963 else if (mode == CCCmode)
11964 suffix = fp ? "b" : "c";
11965 else
11966 gcc_unreachable ();
11967 break;
11968 case GE:
11969 switch (mode)
11970 {
11971 case E_CCNOmode:
11972 case E_CCGOCmode:
11973 suffix = "ns";
11974 break;
11975
11976 case E_CCmode:
11977 case E_CCGCmode:
11978 case E_CCGZmode:
11979 suffix = "ge";
11980 break;
11981
11982 default:
11983 gcc_unreachable ();
11984 }
11985 break;
11986 case GEU:
11987 if (mode == CCmode || mode == CCGZmode)
11988 suffix = "nb";
11989 else if (mode == CCCmode)
11990 suffix = fp ? "nb" : "nc";
11991 else
11992 gcc_unreachable ();
11993 break;
11994 case LE:
11995 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
11996 suffix = "le";
11997 break;
11998 case LEU:
11999 if (mode == CCmode)
12000 suffix = "be";
12001 else
12002 gcc_unreachable ();
12003 break;
12004 case UNORDERED:
12005 suffix = fp ? "u" : "p";
12006 break;
12007 case ORDERED:
12008 suffix = fp ? "nu" : "np";
12009 break;
12010 default:
12011 gcc_unreachable ();
12012 }
12013 fputs (suffix, file);
12014 }
12015
12016 /* Print the name of register X to FILE based on its machine mode and number.
12017 If CODE is 'w', pretend the mode is HImode.
12018 If CODE is 'b', pretend the mode is QImode.
12019 If CODE is 'k', pretend the mode is SImode.
12020 If CODE is 'q', pretend the mode is DImode.
12021 If CODE is 'x', pretend the mode is V4SFmode.
12022 If CODE is 't', pretend the mode is V8SFmode.
12023 If CODE is 'g', pretend the mode is V16SFmode.
12024 If CODE is 'h', pretend the reg is the 'high' byte register.
12025 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
12026 If CODE is 'd', duplicate the operand for AVX instruction.
12027 If CODE is 'V', print naked full integer register name without %.
12028 */
12029
12030 void
12031 print_reg (rtx x, int code, FILE *file)
12032 {
12033 const char *reg;
12034 int msize;
12035 unsigned int regno;
12036 bool duplicated;
12037
12038 if (ASSEMBLER_DIALECT == ASM_ATT && code != 'V')
12039 putc ('%', file);
12040
12041 if (x == pc_rtx)
12042 {
12043 gcc_assert (TARGET_64BIT);
12044 fputs ("rip", file);
12045 return;
12046 }
12047
12048 if (code == 'y' && STACK_TOP_P (x))
12049 {
12050 fputs ("st(0)", file);
12051 return;
12052 }
12053
12054 if (code == 'w')
12055 msize = 2;
12056 else if (code == 'b')
12057 msize = 1;
12058 else if (code == 'k')
12059 msize = 4;
12060 else if (code == 'q')
12061 msize = 8;
12062 else if (code == 'h')
12063 msize = 0;
12064 else if (code == 'x')
12065 msize = 16;
12066 else if (code == 't')
12067 msize = 32;
12068 else if (code == 'g')
12069 msize = 64;
12070 else
12071 msize = GET_MODE_SIZE (GET_MODE (x));
12072
12073 regno = REGNO (x);
12074
12075 if (regno == ARG_POINTER_REGNUM
12076 || regno == FRAME_POINTER_REGNUM
12077 || regno == FPSR_REG)
12078 {
12079 output_operand_lossage
12080 ("invalid use of register '%s'", reg_names[regno]);
12081 return;
12082 }
12083 else if (regno == FLAGS_REG)
12084 {
12085 output_operand_lossage ("invalid use of asm flag output");
12086 return;
12087 }
12088
12089 if (code == 'V')
12090 {
12091 if (GENERAL_REGNO_P (regno))
12092 msize = GET_MODE_SIZE (word_mode);
12093 else
12094 error ("%<V%> modifier on non-integer register");
12095 }
12096
12097 duplicated = code == 'd' && TARGET_AVX;
12098
12099 switch (msize)
12100 {
12101 case 16:
12102 case 12:
12103 case 8:
12104 if (GENERAL_REGNO_P (regno) && msize > GET_MODE_SIZE (word_mode))
12105 warning (0, "unsupported size for integer register");
12106 /* FALLTHRU */
12107 case 4:
12108 if (LEGACY_INT_REGNO_P (regno))
12109 putc (msize > 4 && TARGET_64BIT ? 'r' : 'e', file);
12110 /* FALLTHRU */
12111 case 2:
12112 normal:
12113 reg = hi_reg_name[regno];
12114 break;
12115 case 1:
12116 if (regno >= ARRAY_SIZE (qi_reg_name))
12117 goto normal;
12118 if (!ANY_QI_REGNO_P (regno))
12119 error ("unsupported size for integer register");
12120 reg = qi_reg_name[regno];
12121 break;
12122 case 0:
12123 if (regno >= ARRAY_SIZE (qi_high_reg_name))
12124 goto normal;
12125 reg = qi_high_reg_name[regno];
12126 break;
12127 case 32:
12128 case 64:
12129 if (SSE_REGNO_P (regno))
12130 {
12131 gcc_assert (!duplicated);
12132 putc (msize == 32 ? 'y' : 'z', file);
12133 reg = hi_reg_name[regno] + 1;
12134 break;
12135 }
12136 goto normal;
12137 default:
12138 gcc_unreachable ();
12139 }
12140
12141 fputs (reg, file);
12142
12143 /* Irritatingly, AMD extended registers use
12144 different naming convention: "r%d[bwd]" */
12145 if (REX_INT_REGNO_P (regno))
12146 {
12147 gcc_assert (TARGET_64BIT);
12148 switch (msize)
12149 {
12150 case 0:
12151 error ("extended registers have no high halves");
12152 break;
12153 case 1:
12154 putc ('b', file);
12155 break;
12156 case 2:
12157 putc ('w', file);
12158 break;
12159 case 4:
12160 putc ('d', file);
12161 break;
12162 case 8:
12163 /* no suffix */
12164 break;
12165 default:
12166 error ("unsupported operand size for extended register");
12167 break;
12168 }
12169 return;
12170 }
12171
12172 if (duplicated)
12173 {
12174 if (ASSEMBLER_DIALECT == ASM_ATT)
12175 fprintf (file, ", %%%s", reg);
12176 else
12177 fprintf (file, ", %s", reg);
12178 }
12179 }
12180
12181 /* Meaning of CODE:
12182 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
12183 C -- print opcode suffix for set/cmov insn.
12184 c -- like C, but print reversed condition
12185 F,f -- likewise, but for floating-point.
12186 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
12187 otherwise nothing
12188 R -- print embedded rounding and sae.
12189 r -- print only sae.
12190 z -- print the opcode suffix for the size of the current operand.
12191 Z -- likewise, with special suffixes for x87 instructions.
12192 * -- print a star (in certain assembler syntax)
12193 A -- print an absolute memory reference.
12194 E -- print address with DImode register names if TARGET_64BIT.
12195 w -- print the operand as if it's a "word" (HImode) even if it isn't.
12196 s -- print a shift double count, followed by the assemblers argument
12197 delimiter.
12198 b -- print the QImode name of the register for the indicated operand.
12199 %b0 would print %al if operands[0] is reg 0.
12200 w -- likewise, print the HImode name of the register.
12201 k -- likewise, print the SImode name of the register.
12202 q -- likewise, print the DImode name of the register.
12203 x -- likewise, print the V4SFmode name of the register.
12204 t -- likewise, print the V8SFmode name of the register.
12205 g -- likewise, print the V16SFmode name of the register.
12206 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
12207 y -- print "st(0)" instead of "st" as a register.
12208 d -- print duplicated register operand for AVX instruction.
12209 D -- print condition for SSE cmp instruction.
12210 P -- if PIC, print an @PLT suffix.
12211 p -- print raw symbol name.
12212 X -- don't print any sort of PIC '@' suffix for a symbol.
12213 & -- print some in-use local-dynamic symbol name.
12214 H -- print a memory address offset by 8; used for sse high-parts
12215 Y -- print condition for XOP pcom* instruction.
12216 V -- print naked full integer register name without %.
12217 + -- print a branch hint as 'cs' or 'ds' prefix
12218 ; -- print a semicolon (after prefixes due to bug in older gas).
12219 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
12220 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
12221 M -- print addr32 prefix for TARGET_X32 with VSIB address.
12222 ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
12223 */
12224
12225 void
12226 ix86_print_operand (FILE *file, rtx x, int code)
12227 {
12228 if (code)
12229 {
12230 switch (code)
12231 {
12232 case 'A':
12233 switch (ASSEMBLER_DIALECT)
12234 {
12235 case ASM_ATT:
12236 putc ('*', file);
12237 break;
12238
12239 case ASM_INTEL:
12240 /* Intel syntax. For absolute addresses, registers should not
12241 be surrounded by braces. */
12242 if (!REG_P (x))
12243 {
12244 putc ('[', file);
12245 ix86_print_operand (file, x, 0);
12246 putc (']', file);
12247 return;
12248 }
12249 break;
12250
12251 default:
12252 gcc_unreachable ();
12253 }
12254
12255 ix86_print_operand (file, x, 0);
12256 return;
12257
12258 case 'E':
12259 /* Wrap address in an UNSPEC to declare special handling. */
12260 if (TARGET_64BIT)
12261 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
12262
12263 output_address (VOIDmode, x);
12264 return;
12265
12266 case 'L':
12267 if (ASSEMBLER_DIALECT == ASM_ATT)
12268 putc ('l', file);
12269 return;
12270
12271 case 'W':
12272 if (ASSEMBLER_DIALECT == ASM_ATT)
12273 putc ('w', file);
12274 return;
12275
12276 case 'B':
12277 if (ASSEMBLER_DIALECT == ASM_ATT)
12278 putc ('b', file);
12279 return;
12280
12281 case 'Q':
12282 if (ASSEMBLER_DIALECT == ASM_ATT)
12283 putc ('l', file);
12284 return;
12285
12286 case 'S':
12287 if (ASSEMBLER_DIALECT == ASM_ATT)
12288 putc ('s', file);
12289 return;
12290
12291 case 'T':
12292 if (ASSEMBLER_DIALECT == ASM_ATT)
12293 putc ('t', file);
12294 return;
12295
12296 case 'O':
12297 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12298 if (ASSEMBLER_DIALECT != ASM_ATT)
12299 return;
12300
12301 switch (GET_MODE_SIZE (GET_MODE (x)))
12302 {
12303 case 2:
12304 putc ('w', file);
12305 break;
12306
12307 case 4:
12308 putc ('l', file);
12309 break;
12310
12311 case 8:
12312 putc ('q', file);
12313 break;
12314
12315 default:
12316 output_operand_lossage ("invalid operand size for operand "
12317 "code 'O'");
12318 return;
12319 }
12320
12321 putc ('.', file);
12322 #endif
12323 return;
12324
12325 case 'z':
12326 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12327 {
12328 /* Opcodes don't get size suffixes if using Intel opcodes. */
12329 if (ASSEMBLER_DIALECT == ASM_INTEL)
12330 return;
12331
12332 switch (GET_MODE_SIZE (GET_MODE (x)))
12333 {
12334 case 1:
12335 putc ('b', file);
12336 return;
12337
12338 case 2:
12339 putc ('w', file);
12340 return;
12341
12342 case 4:
12343 putc ('l', file);
12344 return;
12345
12346 case 8:
12347 putc ('q', file);
12348 return;
12349
12350 default:
12351 output_operand_lossage ("invalid operand size for operand "
12352 "code 'z'");
12353 return;
12354 }
12355 }
12356
12357 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12358 warning (0, "non-integer operand used with operand code %<z%>");
12359 /* FALLTHRU */
12360
12361 case 'Z':
12362 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
12363 if (ASSEMBLER_DIALECT == ASM_INTEL)
12364 return;
12365
12366 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12367 {
12368 switch (GET_MODE_SIZE (GET_MODE (x)))
12369 {
12370 case 2:
12371 #ifdef HAVE_AS_IX86_FILDS
12372 putc ('s', file);
12373 #endif
12374 return;
12375
12376 case 4:
12377 putc ('l', file);
12378 return;
12379
12380 case 8:
12381 #ifdef HAVE_AS_IX86_FILDQ
12382 putc ('q', file);
12383 #else
12384 fputs ("ll", file);
12385 #endif
12386 return;
12387
12388 default:
12389 break;
12390 }
12391 }
12392 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12393 {
12394 /* 387 opcodes don't get size suffixes
12395 if the operands are registers. */
12396 if (STACK_REG_P (x))
12397 return;
12398
12399 switch (GET_MODE_SIZE (GET_MODE (x)))
12400 {
12401 case 4:
12402 putc ('s', file);
12403 return;
12404
12405 case 8:
12406 putc ('l', file);
12407 return;
12408
12409 case 12:
12410 case 16:
12411 putc ('t', file);
12412 return;
12413
12414 default:
12415 break;
12416 }
12417 }
12418 else
12419 {
12420 output_operand_lossage ("invalid operand type used with "
12421 "operand code 'Z'");
12422 return;
12423 }
12424
12425 output_operand_lossage ("invalid operand size for operand code 'Z'");
12426 return;
12427
12428 case 'd':
12429 case 'b':
12430 case 'w':
12431 case 'k':
12432 case 'q':
12433 case 'h':
12434 case 't':
12435 case 'g':
12436 case 'y':
12437 case 'x':
12438 case 'X':
12439 case 'P':
12440 case 'p':
12441 case 'V':
12442 break;
12443
12444 case 's':
12445 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
12446 {
12447 ix86_print_operand (file, x, 0);
12448 fputs (", ", file);
12449 }
12450 return;
12451
12452 case 'Y':
12453 switch (GET_CODE (x))
12454 {
12455 case NE:
12456 fputs ("neq", file);
12457 break;
12458 case EQ:
12459 fputs ("eq", file);
12460 break;
12461 case GE:
12462 case GEU:
12463 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
12464 break;
12465 case GT:
12466 case GTU:
12467 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
12468 break;
12469 case LE:
12470 case LEU:
12471 fputs ("le", file);
12472 break;
12473 case LT:
12474 case LTU:
12475 fputs ("lt", file);
12476 break;
12477 case UNORDERED:
12478 fputs ("unord", file);
12479 break;
12480 case ORDERED:
12481 fputs ("ord", file);
12482 break;
12483 case UNEQ:
12484 fputs ("ueq", file);
12485 break;
12486 case UNGE:
12487 fputs ("nlt", file);
12488 break;
12489 case UNGT:
12490 fputs ("nle", file);
12491 break;
12492 case UNLE:
12493 fputs ("ule", file);
12494 break;
12495 case UNLT:
12496 fputs ("ult", file);
12497 break;
12498 case LTGT:
12499 fputs ("une", file);
12500 break;
12501 default:
12502 output_operand_lossage ("operand is not a condition code, "
12503 "invalid operand code 'Y'");
12504 return;
12505 }
12506 return;
12507
12508 case 'D':
12509 /* Little bit of braindamage here. The SSE compare instructions
12510 does use completely different names for the comparisons that the
12511 fp conditional moves. */
12512 switch (GET_CODE (x))
12513 {
12514 case UNEQ:
12515 if (TARGET_AVX)
12516 {
12517 fputs ("eq_us", file);
12518 break;
12519 }
12520 /* FALLTHRU */
12521 case EQ:
12522 fputs ("eq", file);
12523 break;
12524 case UNLT:
12525 if (TARGET_AVX)
12526 {
12527 fputs ("nge", file);
12528 break;
12529 }
12530 /* FALLTHRU */
12531 case LT:
12532 fputs ("lt", file);
12533 break;
12534 case UNLE:
12535 if (TARGET_AVX)
12536 {
12537 fputs ("ngt", file);
12538 break;
12539 }
12540 /* FALLTHRU */
12541 case LE:
12542 fputs ("le", file);
12543 break;
12544 case UNORDERED:
12545 fputs ("unord", file);
12546 break;
12547 case LTGT:
12548 if (TARGET_AVX)
12549 {
12550 fputs ("neq_oq", file);
12551 break;
12552 }
12553 /* FALLTHRU */
12554 case NE:
12555 fputs ("neq", file);
12556 break;
12557 case GE:
12558 if (TARGET_AVX)
12559 {
12560 fputs ("ge", file);
12561 break;
12562 }
12563 /* FALLTHRU */
12564 case UNGE:
12565 fputs ("nlt", file);
12566 break;
12567 case GT:
12568 if (TARGET_AVX)
12569 {
12570 fputs ("gt", file);
12571 break;
12572 }
12573 /* FALLTHRU */
12574 case UNGT:
12575 fputs ("nle", file);
12576 break;
12577 case ORDERED:
12578 fputs ("ord", file);
12579 break;
12580 default:
12581 output_operand_lossage ("operand is not a condition code, "
12582 "invalid operand code 'D'");
12583 return;
12584 }
12585 return;
12586
12587 case 'F':
12588 case 'f':
12589 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12590 if (ASSEMBLER_DIALECT == ASM_ATT)
12591 putc ('.', file);
12592 gcc_fallthrough ();
12593 #endif
12594
12595 case 'C':
12596 case 'c':
12597 if (!COMPARISON_P (x))
12598 {
12599 output_operand_lossage ("operand is not a condition code, "
12600 "invalid operand code '%c'", code);
12601 return;
12602 }
12603 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
12604 code == 'c' || code == 'f',
12605 code == 'F' || code == 'f',
12606 file);
12607 return;
12608
12609 case 'H':
12610 if (!offsettable_memref_p (x))
12611 {
12612 output_operand_lossage ("operand is not an offsettable memory "
12613 "reference, invalid operand code 'H'");
12614 return;
12615 }
12616 /* It doesn't actually matter what mode we use here, as we're
12617 only going to use this for printing. */
12618 x = adjust_address_nv (x, DImode, 8);
12619 /* Output 'qword ptr' for intel assembler dialect. */
12620 if (ASSEMBLER_DIALECT == ASM_INTEL)
12621 code = 'q';
12622 break;
12623
12624 case 'K':
12625 if (!CONST_INT_P (x))
12626 {
12627 output_operand_lossage ("operand is not an integer, invalid "
12628 "operand code 'K'");
12629 return;
12630 }
12631
12632 if (INTVAL (x) & IX86_HLE_ACQUIRE)
12633 #ifdef HAVE_AS_IX86_HLE
12634 fputs ("xacquire ", file);
12635 #else
12636 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
12637 #endif
12638 else if (INTVAL (x) & IX86_HLE_RELEASE)
12639 #ifdef HAVE_AS_IX86_HLE
12640 fputs ("xrelease ", file);
12641 #else
12642 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
12643 #endif
12644 /* We do not want to print value of the operand. */
12645 return;
12646
12647 case 'N':
12648 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
12649 fputs ("{z}", file);
12650 return;
12651
12652 case 'r':
12653 if (!CONST_INT_P (x) || INTVAL (x) != ROUND_SAE)
12654 {
12655 output_operand_lossage ("operand is not a specific integer, "
12656 "invalid operand code 'r'");
12657 return;
12658 }
12659
12660 if (ASSEMBLER_DIALECT == ASM_INTEL)
12661 fputs (", ", file);
12662
12663 fputs ("{sae}", file);
12664
12665 if (ASSEMBLER_DIALECT == ASM_ATT)
12666 fputs (", ", file);
12667
12668 return;
12669
12670 case 'R':
12671 if (!CONST_INT_P (x))
12672 {
12673 output_operand_lossage ("operand is not an integer, invalid "
12674 "operand code 'R'");
12675 return;
12676 }
12677
12678 if (ASSEMBLER_DIALECT == ASM_INTEL)
12679 fputs (", ", file);
12680
12681 switch (INTVAL (x))
12682 {
12683 case ROUND_NEAREST_INT | ROUND_SAE:
12684 fputs ("{rn-sae}", file);
12685 break;
12686 case ROUND_NEG_INF | ROUND_SAE:
12687 fputs ("{rd-sae}", file);
12688 break;
12689 case ROUND_POS_INF | ROUND_SAE:
12690 fputs ("{ru-sae}", file);
12691 break;
12692 case ROUND_ZERO | ROUND_SAE:
12693 fputs ("{rz-sae}", file);
12694 break;
12695 default:
12696 output_operand_lossage ("operand is not a specific integer, "
12697 "invalid operand code 'R'");
12698 }
12699
12700 if (ASSEMBLER_DIALECT == ASM_ATT)
12701 fputs (", ", file);
12702
12703 return;
12704
12705 case '*':
12706 if (ASSEMBLER_DIALECT == ASM_ATT)
12707 putc ('*', file);
12708 return;
12709
12710 case '&':
12711 {
12712 const char *name = get_some_local_dynamic_name ();
12713 if (name == NULL)
12714 output_operand_lossage ("'%%&' used without any "
12715 "local dynamic TLS references");
12716 else
12717 assemble_name (file, name);
12718 return;
12719 }
12720
12721 case '+':
12722 {
12723 rtx x;
12724
12725 if (!optimize
12726 || optimize_function_for_size_p (cfun)
12727 || !TARGET_BRANCH_PREDICTION_HINTS)
12728 return;
12729
12730 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
12731 if (x)
12732 {
12733 int pred_val = profile_probability::from_reg_br_prob_note
12734 (XINT (x, 0)).to_reg_br_prob_base ();
12735
12736 if (pred_val < REG_BR_PROB_BASE * 45 / 100
12737 || pred_val > REG_BR_PROB_BASE * 55 / 100)
12738 {
12739 bool taken = pred_val > REG_BR_PROB_BASE / 2;
12740 bool cputaken
12741 = final_forward_branch_p (current_output_insn) == 0;
12742
12743 /* Emit hints only in the case default branch prediction
12744 heuristics would fail. */
12745 if (taken != cputaken)
12746 {
12747 /* We use 3e (DS) prefix for taken branches and
12748 2e (CS) prefix for not taken branches. */
12749 if (taken)
12750 fputs ("ds ; ", file);
12751 else
12752 fputs ("cs ; ", file);
12753 }
12754 }
12755 }
12756 return;
12757 }
12758
12759 case ';':
12760 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
12761 putc (';', file);
12762 #endif
12763 return;
12764
12765 case '~':
12766 putc (TARGET_AVX2 ? 'i' : 'f', file);
12767 return;
12768
12769 case 'M':
12770 if (TARGET_X32)
12771 {
12772 /* NB: 32-bit indices in VSIB address are sign-extended
12773 to 64 bits. In x32, if 32-bit address 0xf7fa3010 is
12774 sign-extended to 0xfffffffff7fa3010 which is invalid
12775 address. Add addr32 prefix if there is no base
12776 register nor symbol. */
12777 bool ok;
12778 struct ix86_address parts;
12779 ok = ix86_decompose_address (x, &parts);
12780 gcc_assert (ok && parts.index == NULL_RTX);
12781 if (parts.base == NULL_RTX
12782 && (parts.disp == NULL_RTX
12783 || !symbolic_operand (parts.disp,
12784 GET_MODE (parts.disp))))
12785 fputs ("addr32 ", file);
12786 }
12787 return;
12788
12789 case '^':
12790 if (TARGET_64BIT && Pmode != word_mode)
12791 fputs ("addr32 ", file);
12792 return;
12793
12794 case '!':
12795 if (ix86_notrack_prefixed_insn_p (current_output_insn))
12796 fputs ("notrack ", file);
12797 return;
12798
12799 default:
12800 output_operand_lossage ("invalid operand code '%c'", code);
12801 }
12802 }
12803
12804 if (REG_P (x))
12805 print_reg (x, code, file);
12806
12807 else if (MEM_P (x))
12808 {
12809 rtx addr = XEXP (x, 0);
12810
12811 /* No `byte ptr' prefix for call instructions ... */
12812 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
12813 {
12814 machine_mode mode = GET_MODE (x);
12815 const char *size;
12816
12817 /* Check for explicit size override codes. */
12818 if (code == 'b')
12819 size = "BYTE";
12820 else if (code == 'w')
12821 size = "WORD";
12822 else if (code == 'k')
12823 size = "DWORD";
12824 else if (code == 'q')
12825 size = "QWORD";
12826 else if (code == 'x')
12827 size = "XMMWORD";
12828 else if (code == 't')
12829 size = "YMMWORD";
12830 else if (code == 'g')
12831 size = "ZMMWORD";
12832 else if (mode == BLKmode)
12833 /* ... or BLKmode operands, when not overridden. */
12834 size = NULL;
12835 else
12836 switch (GET_MODE_SIZE (mode))
12837 {
12838 case 1: size = "BYTE"; break;
12839 case 2: size = "WORD"; break;
12840 case 4: size = "DWORD"; break;
12841 case 8: size = "QWORD"; break;
12842 case 12: size = "TBYTE"; break;
12843 case 16:
12844 if (mode == XFmode)
12845 size = "TBYTE";
12846 else
12847 size = "XMMWORD";
12848 break;
12849 case 32: size = "YMMWORD"; break;
12850 case 64: size = "ZMMWORD"; break;
12851 default:
12852 gcc_unreachable ();
12853 }
12854 if (size)
12855 {
12856 fputs (size, file);
12857 fputs (" PTR ", file);
12858 }
12859 }
12860
12861 if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
12862 output_operand_lossage ("invalid constraints for operand");
12863 else
12864 ix86_print_operand_address_as
12865 (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P');
12866 }
12867
12868 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
12869 {
12870 long l;
12871
12872 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
12873
12874 if (ASSEMBLER_DIALECT == ASM_ATT)
12875 putc ('$', file);
12876 /* Sign extend 32bit SFmode immediate to 8 bytes. */
12877 if (code == 'q')
12878 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
12879 (unsigned long long) (int) l);
12880 else
12881 fprintf (file, "0x%08x", (unsigned int) l);
12882 }
12883
12884 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
12885 {
12886 long l[2];
12887
12888 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
12889
12890 if (ASSEMBLER_DIALECT == ASM_ATT)
12891 putc ('$', file);
12892 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
12893 }
12894
12895 /* These float cases don't actually occur as immediate operands. */
12896 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
12897 {
12898 char dstr[30];
12899
12900 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
12901 fputs (dstr, file);
12902 }
12903
12904 else
12905 {
12906 /* We have patterns that allow zero sets of memory, for instance.
12907 In 64-bit mode, we should probably support all 8-byte vectors,
12908 since we can in fact encode that into an immediate. */
12909 if (GET_CODE (x) == CONST_VECTOR)
12910 {
12911 if (x != CONST0_RTX (GET_MODE (x)))
12912 output_operand_lossage ("invalid vector immediate");
12913 x = const0_rtx;
12914 }
12915
12916 if (code != 'P' && code != 'p')
12917 {
12918 if (CONST_INT_P (x))
12919 {
12920 if (ASSEMBLER_DIALECT == ASM_ATT)
12921 putc ('$', file);
12922 }
12923 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
12924 || GET_CODE (x) == LABEL_REF)
12925 {
12926 if (ASSEMBLER_DIALECT == ASM_ATT)
12927 putc ('$', file);
12928 else
12929 fputs ("OFFSET FLAT:", file);
12930 }
12931 }
12932 if (CONST_INT_P (x))
12933 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12934 else if (flag_pic || MACHOPIC_INDIRECT)
12935 output_pic_addr_const (file, x, code);
12936 else
12937 output_addr_const (file, x);
12938 }
12939 }
12940
12941 static bool
12942 ix86_print_operand_punct_valid_p (unsigned char code)
12943 {
12944 return (code == '*' || code == '+' || code == '&' || code == ';'
12945 || code == '~' || code == '^' || code == '!');
12946 }
12947 \f
12948 /* Print a memory operand whose address is ADDR. */
12949
12950 static void
12951 ix86_print_operand_address_as (FILE *file, rtx addr,
12952 addr_space_t as, bool no_rip)
12953 {
12954 struct ix86_address parts;
12955 rtx base, index, disp;
12956 int scale;
12957 int ok;
12958 bool vsib = false;
12959 int code = 0;
12960
12961 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
12962 {
12963 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
12964 gcc_assert (parts.index == NULL_RTX);
12965 parts.index = XVECEXP (addr, 0, 1);
12966 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
12967 addr = XVECEXP (addr, 0, 0);
12968 vsib = true;
12969 }
12970 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
12971 {
12972 gcc_assert (TARGET_64BIT);
12973 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
12974 code = 'q';
12975 }
12976 else
12977 ok = ix86_decompose_address (addr, &parts);
12978
12979 gcc_assert (ok);
12980
12981 base = parts.base;
12982 index = parts.index;
12983 disp = parts.disp;
12984 scale = parts.scale;
12985
12986 if (ADDR_SPACE_GENERIC_P (as))
12987 as = parts.seg;
12988 else
12989 gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg));
12990
12991 if (!ADDR_SPACE_GENERIC_P (as))
12992 {
12993 if (ASSEMBLER_DIALECT == ASM_ATT)
12994 putc ('%', file);
12995
12996 switch (as)
12997 {
12998 case ADDR_SPACE_SEG_FS:
12999 fputs ("fs:", file);
13000 break;
13001 case ADDR_SPACE_SEG_GS:
13002 fputs ("gs:", file);
13003 break;
13004 default:
13005 gcc_unreachable ();
13006 }
13007 }
13008
13009 /* Use one byte shorter RIP relative addressing for 64bit mode. */
13010 if (TARGET_64BIT && !base && !index && !no_rip)
13011 {
13012 rtx symbol = disp;
13013
13014 if (GET_CODE (disp) == CONST
13015 && GET_CODE (XEXP (disp, 0)) == PLUS
13016 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13017 symbol = XEXP (XEXP (disp, 0), 0);
13018
13019 if (GET_CODE (symbol) == LABEL_REF
13020 || (GET_CODE (symbol) == SYMBOL_REF
13021 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
13022 base = pc_rtx;
13023 }
13024
13025 if (!base && !index)
13026 {
13027 /* Displacement only requires special attention. */
13028 if (CONST_INT_P (disp))
13029 {
13030 if (ASSEMBLER_DIALECT == ASM_INTEL && ADDR_SPACE_GENERIC_P (as))
13031 fputs ("ds:", file);
13032 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
13033 }
13034 /* Load the external function address via the GOT slot to avoid PLT. */
13035 else if (GET_CODE (disp) == CONST
13036 && GET_CODE (XEXP (disp, 0)) == UNSPEC
13037 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL
13038 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT)
13039 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
13040 output_pic_addr_const (file, disp, 0);
13041 else if (flag_pic)
13042 output_pic_addr_const (file, disp, 0);
13043 else
13044 output_addr_const (file, disp);
13045 }
13046 else
13047 {
13048 /* Print SImode register names to force addr32 prefix. */
13049 if (SImode_address_operand (addr, VOIDmode))
13050 {
13051 if (flag_checking)
13052 {
13053 gcc_assert (TARGET_64BIT);
13054 switch (GET_CODE (addr))
13055 {
13056 case SUBREG:
13057 gcc_assert (GET_MODE (addr) == SImode);
13058 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
13059 break;
13060 case ZERO_EXTEND:
13061 case AND:
13062 gcc_assert (GET_MODE (addr) == DImode);
13063 break;
13064 default:
13065 gcc_unreachable ();
13066 }
13067 }
13068 gcc_assert (!code);
13069 code = 'k';
13070 }
13071 else if (code == 0
13072 && TARGET_X32
13073 && disp
13074 && CONST_INT_P (disp)
13075 && INTVAL (disp) < -16*1024*1024)
13076 {
13077 /* X32 runs in 64-bit mode, where displacement, DISP, in
13078 address DISP(%r64), is encoded as 32-bit immediate sign-
13079 extended from 32-bit to 64-bit. For -0x40000300(%r64),
13080 address is %r64 + 0xffffffffbffffd00. When %r64 <
13081 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
13082 which is invalid for x32. The correct address is %r64
13083 - 0x40000300 == 0xf7ffdd64. To properly encode
13084 -0x40000300(%r64) for x32, we zero-extend negative
13085 displacement by forcing addr32 prefix which truncates
13086 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
13087 zero-extend all negative displacements, including -1(%rsp).
13088 However, for small negative displacements, sign-extension
13089 won't cause overflow. We only zero-extend negative
13090 displacements if they < -16*1024*1024, which is also used
13091 to check legitimate address displacements for PIC. */
13092 code = 'k';
13093 }
13094
13095 /* Since the upper 32 bits of RSP are always zero for x32,
13096 we can encode %esp as %rsp to avoid 0x67 prefix if
13097 there is no index register. */
13098 if (TARGET_X32 && Pmode == SImode
13099 && !index && base && REG_P (base) && REGNO (base) == SP_REG)
13100 code = 'q';
13101
13102 if (ASSEMBLER_DIALECT == ASM_ATT)
13103 {
13104 if (disp)
13105 {
13106 if (flag_pic)
13107 output_pic_addr_const (file, disp, 0);
13108 else if (GET_CODE (disp) == LABEL_REF)
13109 output_asm_label (disp);
13110 else
13111 output_addr_const (file, disp);
13112 }
13113
13114 putc ('(', file);
13115 if (base)
13116 print_reg (base, code, file);
13117 if (index)
13118 {
13119 putc (',', file);
13120 print_reg (index, vsib ? 0 : code, file);
13121 if (scale != 1 || vsib)
13122 fprintf (file, ",%d", scale);
13123 }
13124 putc (')', file);
13125 }
13126 else
13127 {
13128 rtx offset = NULL_RTX;
13129
13130 if (disp)
13131 {
13132 /* Pull out the offset of a symbol; print any symbol itself. */
13133 if (GET_CODE (disp) == CONST
13134 && GET_CODE (XEXP (disp, 0)) == PLUS
13135 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13136 {
13137 offset = XEXP (XEXP (disp, 0), 1);
13138 disp = gen_rtx_CONST (VOIDmode,
13139 XEXP (XEXP (disp, 0), 0));
13140 }
13141
13142 if (flag_pic)
13143 output_pic_addr_const (file, disp, 0);
13144 else if (GET_CODE (disp) == LABEL_REF)
13145 output_asm_label (disp);
13146 else if (CONST_INT_P (disp))
13147 offset = disp;
13148 else
13149 output_addr_const (file, disp);
13150 }
13151
13152 putc ('[', file);
13153 if (base)
13154 {
13155 print_reg (base, code, file);
13156 if (offset)
13157 {
13158 if (INTVAL (offset) >= 0)
13159 putc ('+', file);
13160 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
13161 }
13162 }
13163 else if (offset)
13164 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
13165 else
13166 putc ('0', file);
13167
13168 if (index)
13169 {
13170 putc ('+', file);
13171 print_reg (index, vsib ? 0 : code, file);
13172 if (scale != 1 || vsib)
13173 fprintf (file, "*%d", scale);
13174 }
13175 putc (']', file);
13176 }
13177 }
13178 }
13179
13180 static void
13181 ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
13182 {
13183 ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, false);
13184 }
13185
13186 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
13187
13188 static bool
13189 i386_asm_output_addr_const_extra (FILE *file, rtx x)
13190 {
13191 rtx op;
13192
13193 if (GET_CODE (x) != UNSPEC)
13194 return false;
13195
13196 op = XVECEXP (x, 0, 0);
13197 switch (XINT (x, 1))
13198 {
13199 case UNSPEC_GOTOFF:
13200 output_addr_const (file, op);
13201 fputs ("@gotoff", file);
13202 break;
13203 case UNSPEC_GOTTPOFF:
13204 output_addr_const (file, op);
13205 /* FIXME: This might be @TPOFF in Sun ld. */
13206 fputs ("@gottpoff", file);
13207 break;
13208 case UNSPEC_TPOFF:
13209 output_addr_const (file, op);
13210 fputs ("@tpoff", file);
13211 break;
13212 case UNSPEC_NTPOFF:
13213 output_addr_const (file, op);
13214 if (TARGET_64BIT)
13215 fputs ("@tpoff", file);
13216 else
13217 fputs ("@ntpoff", file);
13218 break;
13219 case UNSPEC_DTPOFF:
13220 output_addr_const (file, op);
13221 fputs ("@dtpoff", file);
13222 break;
13223 case UNSPEC_GOTNTPOFF:
13224 output_addr_const (file, op);
13225 if (TARGET_64BIT)
13226 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13227 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
13228 else
13229 fputs ("@gotntpoff", file);
13230 break;
13231 case UNSPEC_INDNTPOFF:
13232 output_addr_const (file, op);
13233 fputs ("@indntpoff", file);
13234 break;
13235 #if TARGET_MACHO
13236 case UNSPEC_MACHOPIC_OFFSET:
13237 output_addr_const (file, op);
13238 putc ('-', file);
13239 machopic_output_function_base_name (file);
13240 break;
13241 #endif
13242
13243 default:
13244 return false;
13245 }
13246
13247 return true;
13248 }
13249 \f
13250 \f
13251 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
13252 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
13253 is the expression of the binary operation. The output may either be
13254 emitted here, or returned to the caller, like all output_* functions.
13255
13256 There is no guarantee that the operands are the same mode, as they
13257 might be within FLOAT or FLOAT_EXTEND expressions. */
13258
13259 #ifndef SYSV386_COMPAT
13260 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
13261 wants to fix the assemblers because that causes incompatibility
13262 with gcc. No-one wants to fix gcc because that causes
13263 incompatibility with assemblers... You can use the option of
13264 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
13265 #define SYSV386_COMPAT 1
13266 #endif
13267
13268 const char *
13269 output_387_binary_op (rtx_insn *insn, rtx *operands)
13270 {
13271 static char buf[40];
13272 const char *p;
13273 bool is_sse
13274 = (SSE_REG_P (operands[0])
13275 || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]));
13276
13277 if (is_sse)
13278 p = "%v";
13279 else if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13280 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13281 p = "fi";
13282 else
13283 p = "f";
13284
13285 strcpy (buf, p);
13286
13287 switch (GET_CODE (operands[3]))
13288 {
13289 case PLUS:
13290 p = "add"; break;
13291 case MINUS:
13292 p = "sub"; break;
13293 case MULT:
13294 p = "mul"; break;
13295 case DIV:
13296 p = "div"; break;
13297 default:
13298 gcc_unreachable ();
13299 }
13300
13301 strcat (buf, p);
13302
13303 if (is_sse)
13304 {
13305 p = (GET_MODE (operands[0]) == SFmode) ? "ss" : "sd";
13306 strcat (buf, p);
13307
13308 if (TARGET_AVX)
13309 p = "\t{%2, %1, %0|%0, %1, %2}";
13310 else
13311 p = "\t{%2, %0|%0, %2}";
13312
13313 strcat (buf, p);
13314 return buf;
13315 }
13316
13317 /* Even if we do not want to check the inputs, this documents input
13318 constraints. Which helps in understanding the following code. */
13319 if (flag_checking)
13320 {
13321 if (STACK_REG_P (operands[0])
13322 && ((REG_P (operands[1])
13323 && REGNO (operands[0]) == REGNO (operands[1])
13324 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
13325 || (REG_P (operands[2])
13326 && REGNO (operands[0]) == REGNO (operands[2])
13327 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
13328 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
13329 ; /* ok */
13330 else
13331 gcc_unreachable ();
13332 }
13333
13334 switch (GET_CODE (operands[3]))
13335 {
13336 case MULT:
13337 case PLUS:
13338 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
13339 std::swap (operands[1], operands[2]);
13340
13341 /* know operands[0] == operands[1]. */
13342
13343 if (MEM_P (operands[2]))
13344 {
13345 p = "%Z2\t%2";
13346 break;
13347 }
13348
13349 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13350 {
13351 if (STACK_TOP_P (operands[0]))
13352 /* How is it that we are storing to a dead operand[2]?
13353 Well, presumably operands[1] is dead too. We can't
13354 store the result to st(0) as st(0) gets popped on this
13355 instruction. Instead store to operands[2] (which I
13356 think has to be st(1)). st(1) will be popped later.
13357 gcc <= 2.8.1 didn't have this check and generated
13358 assembly code that the Unixware assembler rejected. */
13359 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13360 else
13361 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13362 break;
13363 }
13364
13365 if (STACK_TOP_P (operands[0]))
13366 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13367 else
13368 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13369 break;
13370
13371 case MINUS:
13372 case DIV:
13373 if (MEM_P (operands[1]))
13374 {
13375 p = "r%Z1\t%1";
13376 break;
13377 }
13378
13379 if (MEM_P (operands[2]))
13380 {
13381 p = "%Z2\t%2";
13382 break;
13383 }
13384
13385 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13386 {
13387 #if SYSV386_COMPAT
13388 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
13389 derived assemblers, confusingly reverse the direction of
13390 the operation for fsub{r} and fdiv{r} when the
13391 destination register is not st(0). The Intel assembler
13392 doesn't have this brain damage. Read !SYSV386_COMPAT to
13393 figure out what the hardware really does. */
13394 if (STACK_TOP_P (operands[0]))
13395 p = "{p\t%0, %2|rp\t%2, %0}";
13396 else
13397 p = "{rp\t%2, %0|p\t%0, %2}";
13398 #else
13399 if (STACK_TOP_P (operands[0]))
13400 /* As above for fmul/fadd, we can't store to st(0). */
13401 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13402 else
13403 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13404 #endif
13405 break;
13406 }
13407
13408 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
13409 {
13410 #if SYSV386_COMPAT
13411 if (STACK_TOP_P (operands[0]))
13412 p = "{rp\t%0, %1|p\t%1, %0}";
13413 else
13414 p = "{p\t%1, %0|rp\t%0, %1}";
13415 #else
13416 if (STACK_TOP_P (operands[0]))
13417 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
13418 else
13419 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
13420 #endif
13421 break;
13422 }
13423
13424 if (STACK_TOP_P (operands[0]))
13425 {
13426 if (STACK_TOP_P (operands[1]))
13427 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13428 else
13429 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
13430 break;
13431 }
13432 else if (STACK_TOP_P (operands[1]))
13433 {
13434 #if SYSV386_COMPAT
13435 p = "{\t%1, %0|r\t%0, %1}";
13436 #else
13437 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
13438 #endif
13439 }
13440 else
13441 {
13442 #if SYSV386_COMPAT
13443 p = "{r\t%2, %0|\t%0, %2}";
13444 #else
13445 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13446 #endif
13447 }
13448 break;
13449
13450 default:
13451 gcc_unreachable ();
13452 }
13453
13454 strcat (buf, p);
13455 return buf;
13456 }
13457
13458 /* Return needed mode for entity in optimize_mode_switching pass. */
13459
13460 static int
13461 ix86_dirflag_mode_needed (rtx_insn *insn)
13462 {
13463 if (CALL_P (insn))
13464 {
13465 if (cfun->machine->func_type == TYPE_NORMAL)
13466 return X86_DIRFLAG_ANY;
13467 else
13468 /* No need to emit CLD in interrupt handler for TARGET_CLD. */
13469 return TARGET_CLD ? X86_DIRFLAG_ANY : X86_DIRFLAG_RESET;
13470 }
13471
13472 if (recog_memoized (insn) < 0)
13473 return X86_DIRFLAG_ANY;
13474
13475 if (get_attr_type (insn) == TYPE_STR)
13476 {
13477 /* Emit cld instruction if stringops are used in the function. */
13478 if (cfun->machine->func_type == TYPE_NORMAL)
13479 return TARGET_CLD ? X86_DIRFLAG_RESET : X86_DIRFLAG_ANY;
13480 else
13481 return X86_DIRFLAG_RESET;
13482 }
13483
13484 return X86_DIRFLAG_ANY;
13485 }
13486
13487 /* Check if a 256bit or 512 bit AVX register is referenced inside of EXP. */
13488
13489 static bool
13490 ix86_check_avx_upper_register (const_rtx exp)
13491 {
13492 return SSE_REG_P (exp) && GET_MODE_BITSIZE (GET_MODE (exp)) > 128;
13493 }
13494
13495 /* Return needed mode for entity in optimize_mode_switching pass. */
13496
13497 static int
13498 ix86_avx_u128_mode_needed (rtx_insn *insn)
13499 {
13500 if (CALL_P (insn))
13501 {
13502 rtx link;
13503
13504 /* Needed mode is set to AVX_U128_CLEAN if there are
13505 no 256bit or 512bit modes used in function arguments. */
13506 for (link = CALL_INSN_FUNCTION_USAGE (insn);
13507 link;
13508 link = XEXP (link, 1))
13509 {
13510 if (GET_CODE (XEXP (link, 0)) == USE)
13511 {
13512 rtx arg = XEXP (XEXP (link, 0), 0);
13513
13514 if (ix86_check_avx_upper_register (arg))
13515 return AVX_U128_DIRTY;
13516 }
13517 }
13518
13519 return AVX_U128_CLEAN;
13520 }
13521
13522 /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced.
13523 Hardware changes state only when a 256bit register is written to,
13524 but we need to prevent the compiler from moving optimal insertion
13525 point above eventual read from 256bit or 512 bit register. */
13526 subrtx_iterator::array_type array;
13527 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
13528 if (ix86_check_avx_upper_register (*iter))
13529 return AVX_U128_DIRTY;
13530
13531 return AVX_U128_ANY;
13532 }
13533
13534 /* Return mode that i387 must be switched into
13535 prior to the execution of insn. */
13536
13537 static int
13538 ix86_i387_mode_needed (int entity, rtx_insn *insn)
13539 {
13540 enum attr_i387_cw mode;
13541
13542 /* The mode UNINITIALIZED is used to store control word after a
13543 function call or ASM pattern. The mode ANY specify that function
13544 has no requirements on the control word and make no changes in the
13545 bits we are interested in. */
13546
13547 if (CALL_P (insn)
13548 || (NONJUMP_INSN_P (insn)
13549 && (asm_noperands (PATTERN (insn)) >= 0
13550 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
13551 return I387_CW_UNINITIALIZED;
13552
13553 if (recog_memoized (insn) < 0)
13554 return I387_CW_ANY;
13555
13556 mode = get_attr_i387_cw (insn);
13557
13558 switch (entity)
13559 {
13560 case I387_TRUNC:
13561 if (mode == I387_CW_TRUNC)
13562 return mode;
13563 break;
13564
13565 case I387_FLOOR:
13566 if (mode == I387_CW_FLOOR)
13567 return mode;
13568 break;
13569
13570 case I387_CEIL:
13571 if (mode == I387_CW_CEIL)
13572 return mode;
13573 break;
13574
13575 default:
13576 gcc_unreachable ();
13577 }
13578
13579 return I387_CW_ANY;
13580 }
13581
13582 /* Return mode that entity must be switched into
13583 prior to the execution of insn. */
13584
13585 static int
13586 ix86_mode_needed (int entity, rtx_insn *insn)
13587 {
13588 switch (entity)
13589 {
13590 case X86_DIRFLAG:
13591 return ix86_dirflag_mode_needed (insn);
13592 case AVX_U128:
13593 return ix86_avx_u128_mode_needed (insn);
13594 case I387_TRUNC:
13595 case I387_FLOOR:
13596 case I387_CEIL:
13597 return ix86_i387_mode_needed (entity, insn);
13598 default:
13599 gcc_unreachable ();
13600 }
13601 return 0;
13602 }
13603
13604 /* Check if a 256bit or 512bit AVX register is referenced in stores. */
13605
13606 static void
13607 ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data)
13608 {
13609 if (ix86_check_avx_upper_register (dest))
13610 {
13611 bool *used = (bool *) data;
13612 *used = true;
13613 }
13614 }
13615
13616 /* Calculate mode of upper 128bit AVX registers after the insn. */
13617
13618 static int
13619 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
13620 {
13621 rtx pat = PATTERN (insn);
13622
13623 if (vzeroupper_pattern (pat, VOIDmode)
13624 || vzeroall_pattern (pat, VOIDmode))
13625 return AVX_U128_CLEAN;
13626
13627 /* We know that state is clean after CALL insn if there are no
13628 256bit or 512bit registers used in the function return register. */
13629 if (CALL_P (insn))
13630 {
13631 bool avx_upper_reg_found = false;
13632 note_stores (pat, ix86_check_avx_upper_stores, &avx_upper_reg_found);
13633
13634 return avx_upper_reg_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
13635 }
13636
13637 /* Otherwise, return current mode. Remember that if insn
13638 references AVX 256bit or 512bit registers, the mode was already
13639 changed to DIRTY from MODE_NEEDED. */
13640 return mode;
13641 }
13642
13643 /* Return the mode that an insn results in. */
13644
13645 static int
13646 ix86_mode_after (int entity, int mode, rtx_insn *insn)
13647 {
13648 switch (entity)
13649 {
13650 case X86_DIRFLAG:
13651 return mode;
13652 case AVX_U128:
13653 return ix86_avx_u128_mode_after (mode, insn);
13654 case I387_TRUNC:
13655 case I387_FLOOR:
13656 case I387_CEIL:
13657 return mode;
13658 default:
13659 gcc_unreachable ();
13660 }
13661 }
13662
13663 static int
13664 ix86_dirflag_mode_entry (void)
13665 {
13666 /* For TARGET_CLD or in the interrupt handler we can't assume
13667 direction flag state at function entry. */
13668 if (TARGET_CLD
13669 || cfun->machine->func_type != TYPE_NORMAL)
13670 return X86_DIRFLAG_ANY;
13671
13672 return X86_DIRFLAG_RESET;
13673 }
13674
13675 static int
13676 ix86_avx_u128_mode_entry (void)
13677 {
13678 tree arg;
13679
13680 /* Entry mode is set to AVX_U128_DIRTY if there are
13681 256bit or 512bit modes used in function arguments. */
13682 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
13683 arg = TREE_CHAIN (arg))
13684 {
13685 rtx incoming = DECL_INCOMING_RTL (arg);
13686
13687 if (incoming && ix86_check_avx_upper_register (incoming))
13688 return AVX_U128_DIRTY;
13689 }
13690
13691 return AVX_U128_CLEAN;
13692 }
13693
13694 /* Return a mode that ENTITY is assumed to be
13695 switched to at function entry. */
13696
13697 static int
13698 ix86_mode_entry (int entity)
13699 {
13700 switch (entity)
13701 {
13702 case X86_DIRFLAG:
13703 return ix86_dirflag_mode_entry ();
13704 case AVX_U128:
13705 return ix86_avx_u128_mode_entry ();
13706 case I387_TRUNC:
13707 case I387_FLOOR:
13708 case I387_CEIL:
13709 return I387_CW_ANY;
13710 default:
13711 gcc_unreachable ();
13712 }
13713 }
13714
13715 static int
13716 ix86_avx_u128_mode_exit (void)
13717 {
13718 rtx reg = crtl->return_rtx;
13719
13720 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit
13721 or 512 bit modes used in the function return register. */
13722 if (reg && ix86_check_avx_upper_register (reg))
13723 return AVX_U128_DIRTY;
13724
13725 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit or 512bit
13726 modes used in function arguments, otherwise return AVX_U128_CLEAN.
13727 */
13728 return ix86_avx_u128_mode_entry ();
13729 }
13730
13731 /* Return a mode that ENTITY is assumed to be
13732 switched to at function exit. */
13733
13734 static int
13735 ix86_mode_exit (int entity)
13736 {
13737 switch (entity)
13738 {
13739 case X86_DIRFLAG:
13740 return X86_DIRFLAG_ANY;
13741 case AVX_U128:
13742 return ix86_avx_u128_mode_exit ();
13743 case I387_TRUNC:
13744 case I387_FLOOR:
13745 case I387_CEIL:
13746 return I387_CW_ANY;
13747 default:
13748 gcc_unreachable ();
13749 }
13750 }
13751
13752 static int
13753 ix86_mode_priority (int, int n)
13754 {
13755 return n;
13756 }
13757
13758 /* Output code to initialize control word copies used by trunc?f?i and
13759 rounding patterns. CURRENT_MODE is set to current control word,
13760 while NEW_MODE is set to new control word. */
13761
13762 static void
13763 emit_i387_cw_initialization (int mode)
13764 {
13765 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
13766 rtx new_mode;
13767
13768 enum ix86_stack_slot slot;
13769
13770 rtx reg = gen_reg_rtx (HImode);
13771
13772 emit_insn (gen_x86_fnstcw_1 (stored_mode));
13773 emit_move_insn (reg, copy_rtx (stored_mode));
13774
13775 switch (mode)
13776 {
13777 case I387_CW_TRUNC:
13778 /* round toward zero (truncate) */
13779 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
13780 slot = SLOT_CW_TRUNC;
13781 break;
13782
13783 case I387_CW_FLOOR:
13784 /* round down toward -oo */
13785 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
13786 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
13787 slot = SLOT_CW_FLOOR;
13788 break;
13789
13790 case I387_CW_CEIL:
13791 /* round up toward +oo */
13792 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
13793 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
13794 slot = SLOT_CW_CEIL;
13795 break;
13796
13797 default:
13798 gcc_unreachable ();
13799 }
13800
13801 gcc_assert (slot < MAX_386_STACK_LOCALS);
13802
13803 new_mode = assign_386_stack_local (HImode, slot);
13804 emit_move_insn (new_mode, reg);
13805 }
13806
13807 /* Generate one or more insns to set ENTITY to MODE. */
13808
13809 static void
13810 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
13811 HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
13812 {
13813 switch (entity)
13814 {
13815 case X86_DIRFLAG:
13816 if (mode == X86_DIRFLAG_RESET)
13817 emit_insn (gen_cld ());
13818 break;
13819 case AVX_U128:
13820 if (mode == AVX_U128_CLEAN)
13821 emit_insn (gen_avx_vzeroupper ());
13822 break;
13823 case I387_TRUNC:
13824 case I387_FLOOR:
13825 case I387_CEIL:
13826 if (mode != I387_CW_ANY
13827 && mode != I387_CW_UNINITIALIZED)
13828 emit_i387_cw_initialization (mode);
13829 break;
13830 default:
13831 gcc_unreachable ();
13832 }
13833 }
13834
13835 /* Output code for INSN to convert a float to a signed int. OPERANDS
13836 are the insn operands. The output may be [HSD]Imode and the input
13837 operand may be [SDX]Fmode. */
13838
13839 const char *
13840 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
13841 {
13842 bool stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
13843 bool dimode_p = GET_MODE (operands[0]) == DImode;
13844 int round_mode = get_attr_i387_cw (insn);
13845
13846 static char buf[40];
13847 const char *p;
13848
13849 /* Jump through a hoop or two for DImode, since the hardware has no
13850 non-popping instruction. We used to do this a different way, but
13851 that was somewhat fragile and broke with post-reload splitters. */
13852 if ((dimode_p || fisttp) && !stack_top_dies)
13853 output_asm_insn ("fld\t%y1", operands);
13854
13855 gcc_assert (STACK_TOP_P (operands[1]));
13856 gcc_assert (MEM_P (operands[0]));
13857 gcc_assert (GET_MODE (operands[1]) != TFmode);
13858
13859 if (fisttp)
13860 return "fisttp%Z0\t%0";
13861
13862 strcpy (buf, "fist");
13863
13864 if (round_mode != I387_CW_ANY)
13865 output_asm_insn ("fldcw\t%3", operands);
13866
13867 p = "p%Z0\t%0";
13868 strcat (buf, p + !(stack_top_dies || dimode_p));
13869
13870 output_asm_insn (buf, operands);
13871
13872 if (round_mode != I387_CW_ANY)
13873 output_asm_insn ("fldcw\t%2", operands);
13874
13875 return "";
13876 }
13877
13878 /* Output code for x87 ffreep insn. The OPNO argument, which may only
13879 have the values zero or one, indicates the ffreep insn's operand
13880 from the OPERANDS array. */
13881
13882 static const char *
13883 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
13884 {
13885 if (TARGET_USE_FFREEP)
13886 #ifdef HAVE_AS_IX86_FFREEP
13887 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
13888 #else
13889 {
13890 static char retval[32];
13891 int regno = REGNO (operands[opno]);
13892
13893 gcc_assert (STACK_REGNO_P (regno));
13894
13895 regno -= FIRST_STACK_REG;
13896
13897 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
13898 return retval;
13899 }
13900 #endif
13901
13902 return opno ? "fstp\t%y1" : "fstp\t%y0";
13903 }
13904
13905
13906 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
13907 should be used. UNORDERED_P is true when fucom should be used. */
13908
13909 const char *
13910 output_fp_compare (rtx_insn *insn, rtx *operands,
13911 bool eflags_p, bool unordered_p)
13912 {
13913 rtx *xops = eflags_p ? &operands[0] : &operands[1];
13914 bool stack_top_dies;
13915
13916 static char buf[40];
13917 const char *p;
13918
13919 gcc_assert (STACK_TOP_P (xops[0]));
13920
13921 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
13922
13923 if (eflags_p)
13924 {
13925 p = unordered_p ? "fucomi" : "fcomi";
13926 strcpy (buf, p);
13927
13928 p = "p\t{%y1, %0|%0, %y1}";
13929 strcat (buf, p + !stack_top_dies);
13930
13931 return buf;
13932 }
13933
13934 if (STACK_REG_P (xops[1])
13935 && stack_top_dies
13936 && find_regno_note (insn, REG_DEAD, FIRST_STACK_REG + 1))
13937 {
13938 gcc_assert (REGNO (xops[1]) == FIRST_STACK_REG + 1);
13939
13940 /* If both the top of the 387 stack die, and the other operand
13941 is also a stack register that dies, then this must be a
13942 `fcompp' float compare. */
13943 p = unordered_p ? "fucompp" : "fcompp";
13944 strcpy (buf, p);
13945 }
13946 else if (const0_operand (xops[1], VOIDmode))
13947 {
13948 gcc_assert (!unordered_p);
13949 strcpy (buf, "ftst");
13950 }
13951 else
13952 {
13953 if (GET_MODE_CLASS (GET_MODE (xops[1])) == MODE_INT)
13954 {
13955 gcc_assert (!unordered_p);
13956 p = "ficom";
13957 }
13958 else
13959 p = unordered_p ? "fucom" : "fcom";
13960
13961 strcpy (buf, p);
13962
13963 p = "p%Z2\t%y2";
13964 strcat (buf, p + !stack_top_dies);
13965 }
13966
13967 output_asm_insn (buf, operands);
13968 return "fnstsw\t%0";
13969 }
13970
13971 void
13972 ix86_output_addr_vec_elt (FILE *file, int value)
13973 {
13974 const char *directive = ASM_LONG;
13975
13976 #ifdef ASM_QUAD
13977 if (TARGET_LP64)
13978 directive = ASM_QUAD;
13979 #else
13980 gcc_assert (!TARGET_64BIT);
13981 #endif
13982
13983 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
13984 }
13985
13986 void
13987 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
13988 {
13989 const char *directive = ASM_LONG;
13990
13991 #ifdef ASM_QUAD
13992 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
13993 directive = ASM_QUAD;
13994 #else
13995 gcc_assert (!TARGET_64BIT);
13996 #endif
13997 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
13998 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
13999 fprintf (file, "%s%s%d-%s%d\n",
14000 directive, LPREFIX, value, LPREFIX, rel);
14001 #if TARGET_MACHO
14002 else if (TARGET_MACHO)
14003 {
14004 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
14005 machopic_output_function_base_name (file);
14006 putc ('\n', file);
14007 }
14008 #endif
14009 else if (HAVE_AS_GOTOFF_IN_DATA)
14010 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
14011 else
14012 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
14013 GOT_SYMBOL_NAME, LPREFIX, value);
14014 }
14015 \f
14016 #define LEA_MAX_STALL (3)
14017 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
14018
14019 /* Increase given DISTANCE in half-cycles according to
14020 dependencies between PREV and NEXT instructions.
14021 Add 1 half-cycle if there is no dependency and
14022 go to next cycle if there is some dependecy. */
14023
14024 static unsigned int
14025 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
14026 {
14027 df_ref def, use;
14028
14029 if (!prev || !next)
14030 return distance + (distance & 1) + 2;
14031
14032 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
14033 return distance + 1;
14034
14035 FOR_EACH_INSN_USE (use, next)
14036 FOR_EACH_INSN_DEF (def, prev)
14037 if (!DF_REF_IS_ARTIFICIAL (def)
14038 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
14039 return distance + (distance & 1) + 2;
14040
14041 return distance + 1;
14042 }
14043
14044 /* Function checks if instruction INSN defines register number
14045 REGNO1 or REGNO2. */
14046
14047 bool
14048 insn_defines_reg (unsigned int regno1, unsigned int regno2,
14049 rtx_insn *insn)
14050 {
14051 df_ref def;
14052
14053 FOR_EACH_INSN_DEF (def, insn)
14054 if (DF_REF_REG_DEF_P (def)
14055 && !DF_REF_IS_ARTIFICIAL (def)
14056 && (regno1 == DF_REF_REGNO (def)
14057 || regno2 == DF_REF_REGNO (def)))
14058 return true;
14059
14060 return false;
14061 }
14062
14063 /* Function checks if instruction INSN uses register number
14064 REGNO as a part of address expression. */
14065
14066 static bool
14067 insn_uses_reg_mem (unsigned int regno, rtx insn)
14068 {
14069 df_ref use;
14070
14071 FOR_EACH_INSN_USE (use, insn)
14072 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
14073 return true;
14074
14075 return false;
14076 }
14077
14078 /* Search backward for non-agu definition of register number REGNO1
14079 or register number REGNO2 in basic block starting from instruction
14080 START up to head of basic block or instruction INSN.
14081
14082 Function puts true value into *FOUND var if definition was found
14083 and false otherwise.
14084
14085 Distance in half-cycles between START and found instruction or head
14086 of BB is added to DISTANCE and returned. */
14087
14088 static int
14089 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
14090 rtx_insn *insn, int distance,
14091 rtx_insn *start, bool *found)
14092 {
14093 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
14094 rtx_insn *prev = start;
14095 rtx_insn *next = NULL;
14096
14097 *found = false;
14098
14099 while (prev
14100 && prev != insn
14101 && distance < LEA_SEARCH_THRESHOLD)
14102 {
14103 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
14104 {
14105 distance = increase_distance (prev, next, distance);
14106 if (insn_defines_reg (regno1, regno2, prev))
14107 {
14108 if (recog_memoized (prev) < 0
14109 || get_attr_type (prev) != TYPE_LEA)
14110 {
14111 *found = true;
14112 return distance;
14113 }
14114 }
14115
14116 next = prev;
14117 }
14118 if (prev == BB_HEAD (bb))
14119 break;
14120
14121 prev = PREV_INSN (prev);
14122 }
14123
14124 return distance;
14125 }
14126
14127 /* Search backward for non-agu definition of register number REGNO1
14128 or register number REGNO2 in INSN's basic block until
14129 1. Pass LEA_SEARCH_THRESHOLD instructions, or
14130 2. Reach neighbor BBs boundary, or
14131 3. Reach agu definition.
14132 Returns the distance between the non-agu definition point and INSN.
14133 If no definition point, returns -1. */
14134
14135 static int
14136 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
14137 rtx_insn *insn)
14138 {
14139 basic_block bb = BLOCK_FOR_INSN (insn);
14140 int distance = 0;
14141 bool found = false;
14142
14143 if (insn != BB_HEAD (bb))
14144 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
14145 distance, PREV_INSN (insn),
14146 &found);
14147
14148 if (!found && distance < LEA_SEARCH_THRESHOLD)
14149 {
14150 edge e;
14151 edge_iterator ei;
14152 bool simple_loop = false;
14153
14154 FOR_EACH_EDGE (e, ei, bb->preds)
14155 if (e->src == bb)
14156 {
14157 simple_loop = true;
14158 break;
14159 }
14160
14161 if (simple_loop)
14162 distance = distance_non_agu_define_in_bb (regno1, regno2,
14163 insn, distance,
14164 BB_END (bb), &found);
14165 else
14166 {
14167 int shortest_dist = -1;
14168 bool found_in_bb = false;
14169
14170 FOR_EACH_EDGE (e, ei, bb->preds)
14171 {
14172 int bb_dist
14173 = distance_non_agu_define_in_bb (regno1, regno2,
14174 insn, distance,
14175 BB_END (e->src),
14176 &found_in_bb);
14177 if (found_in_bb)
14178 {
14179 if (shortest_dist < 0)
14180 shortest_dist = bb_dist;
14181 else if (bb_dist > 0)
14182 shortest_dist = MIN (bb_dist, shortest_dist);
14183
14184 found = true;
14185 }
14186 }
14187
14188 distance = shortest_dist;
14189 }
14190 }
14191
14192 /* get_attr_type may modify recog data. We want to make sure
14193 that recog data is valid for instruction INSN, on which
14194 distance_non_agu_define is called. INSN is unchanged here. */
14195 extract_insn_cached (insn);
14196
14197 if (!found)
14198 return -1;
14199
14200 return distance >> 1;
14201 }
14202
14203 /* Return the distance in half-cycles between INSN and the next
14204 insn that uses register number REGNO in memory address added
14205 to DISTANCE. Return -1 if REGNO0 is set.
14206
14207 Put true value into *FOUND if register usage was found and
14208 false otherwise.
14209 Put true value into *REDEFINED if register redefinition was
14210 found and false otherwise. */
14211
14212 static int
14213 distance_agu_use_in_bb (unsigned int regno,
14214 rtx_insn *insn, int distance, rtx_insn *start,
14215 bool *found, bool *redefined)
14216 {
14217 basic_block bb = NULL;
14218 rtx_insn *next = start;
14219 rtx_insn *prev = NULL;
14220
14221 *found = false;
14222 *redefined = false;
14223
14224 if (start != NULL_RTX)
14225 {
14226 bb = BLOCK_FOR_INSN (start);
14227 if (start != BB_HEAD (bb))
14228 /* If insn and start belong to the same bb, set prev to insn,
14229 so the call to increase_distance will increase the distance
14230 between insns by 1. */
14231 prev = insn;
14232 }
14233
14234 while (next
14235 && next != insn
14236 && distance < LEA_SEARCH_THRESHOLD)
14237 {
14238 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
14239 {
14240 distance = increase_distance(prev, next, distance);
14241 if (insn_uses_reg_mem (regno, next))
14242 {
14243 /* Return DISTANCE if OP0 is used in memory
14244 address in NEXT. */
14245 *found = true;
14246 return distance;
14247 }
14248
14249 if (insn_defines_reg (regno, INVALID_REGNUM, next))
14250 {
14251 /* Return -1 if OP0 is set in NEXT. */
14252 *redefined = true;
14253 return -1;
14254 }
14255
14256 prev = next;
14257 }
14258
14259 if (next == BB_END (bb))
14260 break;
14261
14262 next = NEXT_INSN (next);
14263 }
14264
14265 return distance;
14266 }
14267
14268 /* Return the distance between INSN and the next insn that uses
14269 register number REGNO0 in memory address. Return -1 if no such
14270 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
14271
14272 static int
14273 distance_agu_use (unsigned int regno0, rtx_insn *insn)
14274 {
14275 basic_block bb = BLOCK_FOR_INSN (insn);
14276 int distance = 0;
14277 bool found = false;
14278 bool redefined = false;
14279
14280 if (insn != BB_END (bb))
14281 distance = distance_agu_use_in_bb (regno0, insn, distance,
14282 NEXT_INSN (insn),
14283 &found, &redefined);
14284
14285 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
14286 {
14287 edge e;
14288 edge_iterator ei;
14289 bool simple_loop = false;
14290
14291 FOR_EACH_EDGE (e, ei, bb->succs)
14292 if (e->dest == bb)
14293 {
14294 simple_loop = true;
14295 break;
14296 }
14297
14298 if (simple_loop)
14299 distance = distance_agu_use_in_bb (regno0, insn,
14300 distance, BB_HEAD (bb),
14301 &found, &redefined);
14302 else
14303 {
14304 int shortest_dist = -1;
14305 bool found_in_bb = false;
14306 bool redefined_in_bb = false;
14307
14308 FOR_EACH_EDGE (e, ei, bb->succs)
14309 {
14310 int bb_dist
14311 = distance_agu_use_in_bb (regno0, insn,
14312 distance, BB_HEAD (e->dest),
14313 &found_in_bb, &redefined_in_bb);
14314 if (found_in_bb)
14315 {
14316 if (shortest_dist < 0)
14317 shortest_dist = bb_dist;
14318 else if (bb_dist > 0)
14319 shortest_dist = MIN (bb_dist, shortest_dist);
14320
14321 found = true;
14322 }
14323 }
14324
14325 distance = shortest_dist;
14326 }
14327 }
14328
14329 if (!found || redefined)
14330 return -1;
14331
14332 return distance >> 1;
14333 }
14334
14335 /* Define this macro to tune LEA priority vs ADD, it take effect when
14336 there is a dilemma of choicing LEA or ADD
14337 Negative value: ADD is more preferred than LEA
14338 Zero: Netrual
14339 Positive value: LEA is more preferred than ADD*/
14340 #define IX86_LEA_PRIORITY 0
14341
14342 /* Return true if usage of lea INSN has performance advantage
14343 over a sequence of instructions. Instructions sequence has
14344 SPLIT_COST cycles higher latency than lea latency. */
14345
14346 static bool
14347 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
14348 unsigned int regno2, int split_cost, bool has_scale)
14349 {
14350 int dist_define, dist_use;
14351
14352 /* For Silvermont if using a 2-source or 3-source LEA for
14353 non-destructive destination purposes, or due to wanting
14354 ability to use SCALE, the use of LEA is justified. */
14355 if (TARGET_SILVERMONT || TARGET_GOLDMONT || TARGET_GOLDMONT_PLUS
14356 || TARGET_TREMONT || TARGET_INTEL)
14357 {
14358 if (has_scale)
14359 return true;
14360 if (split_cost < 1)
14361 return false;
14362 if (regno0 == regno1 || regno0 == regno2)
14363 return false;
14364 return true;
14365 }
14366
14367 dist_define = distance_non_agu_define (regno1, regno2, insn);
14368 dist_use = distance_agu_use (regno0, insn);
14369
14370 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
14371 {
14372 /* If there is no non AGU operand definition, no AGU
14373 operand usage and split cost is 0 then both lea
14374 and non lea variants have same priority. Currently
14375 we prefer lea for 64 bit code and non lea on 32 bit
14376 code. */
14377 if (dist_use < 0 && split_cost == 0)
14378 return TARGET_64BIT || IX86_LEA_PRIORITY;
14379 else
14380 return true;
14381 }
14382
14383 /* With longer definitions distance lea is more preferable.
14384 Here we change it to take into account splitting cost and
14385 lea priority. */
14386 dist_define += split_cost + IX86_LEA_PRIORITY;
14387
14388 /* If there is no use in memory addess then we just check
14389 that split cost exceeds AGU stall. */
14390 if (dist_use < 0)
14391 return dist_define > LEA_MAX_STALL;
14392
14393 /* If this insn has both backward non-agu dependence and forward
14394 agu dependence, the one with short distance takes effect. */
14395 return dist_define >= dist_use;
14396 }
14397
14398 /* Return true if it is legal to clobber flags by INSN and
14399 false otherwise. */
14400
14401 static bool
14402 ix86_ok_to_clobber_flags (rtx_insn *insn)
14403 {
14404 basic_block bb = BLOCK_FOR_INSN (insn);
14405 df_ref use;
14406 bitmap live;
14407
14408 while (insn)
14409 {
14410 if (NONDEBUG_INSN_P (insn))
14411 {
14412 FOR_EACH_INSN_USE (use, insn)
14413 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
14414 return false;
14415
14416 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
14417 return true;
14418 }
14419
14420 if (insn == BB_END (bb))
14421 break;
14422
14423 insn = NEXT_INSN (insn);
14424 }
14425
14426 live = df_get_live_out(bb);
14427 return !REGNO_REG_SET_P (live, FLAGS_REG);
14428 }
14429
14430 /* Return true if we need to split op0 = op1 + op2 into a sequence of
14431 move and add to avoid AGU stalls. */
14432
14433 bool
14434 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
14435 {
14436 unsigned int regno0, regno1, regno2;
14437
14438 /* Check if we need to optimize. */
14439 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
14440 return false;
14441
14442 /* Check it is correct to split here. */
14443 if (!ix86_ok_to_clobber_flags(insn))
14444 return false;
14445
14446 regno0 = true_regnum (operands[0]);
14447 regno1 = true_regnum (operands[1]);
14448 regno2 = true_regnum (operands[2]);
14449
14450 /* We need to split only adds with non destructive
14451 destination operand. */
14452 if (regno0 == regno1 || regno0 == regno2)
14453 return false;
14454 else
14455 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
14456 }
14457
14458 /* Return true if we should emit lea instruction instead of mov
14459 instruction. */
14460
14461 bool
14462 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
14463 {
14464 unsigned int regno0, regno1;
14465
14466 /* Check if we need to optimize. */
14467 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
14468 return false;
14469
14470 /* Use lea for reg to reg moves only. */
14471 if (!REG_P (operands[0]) || !REG_P (operands[1]))
14472 return false;
14473
14474 regno0 = true_regnum (operands[0]);
14475 regno1 = true_regnum (operands[1]);
14476
14477 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
14478 }
14479
14480 /* Return true if we need to split lea into a sequence of
14481 instructions to avoid AGU stalls. */
14482
14483 bool
14484 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
14485 {
14486 unsigned int regno0, regno1, regno2;
14487 int split_cost;
14488 struct ix86_address parts;
14489 int ok;
14490
14491 /* Check we need to optimize. */
14492 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
14493 return false;
14494
14495 /* The "at least two components" test below might not catch simple
14496 move or zero extension insns if parts.base is non-NULL and parts.disp
14497 is const0_rtx as the only components in the address, e.g. if the
14498 register is %rbp or %r13. As this test is much cheaper and moves or
14499 zero extensions are the common case, do this check first. */
14500 if (REG_P (operands[1])
14501 || (SImode_address_operand (operands[1], VOIDmode)
14502 && REG_P (XEXP (operands[1], 0))))
14503 return false;
14504
14505 /* Check if it is OK to split here. */
14506 if (!ix86_ok_to_clobber_flags (insn))
14507 return false;
14508
14509 ok = ix86_decompose_address (operands[1], &parts);
14510 gcc_assert (ok);
14511
14512 /* There should be at least two components in the address. */
14513 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
14514 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
14515 return false;
14516
14517 /* We should not split into add if non legitimate pic
14518 operand is used as displacement. */
14519 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
14520 return false;
14521
14522 regno0 = true_regnum (operands[0]) ;
14523 regno1 = INVALID_REGNUM;
14524 regno2 = INVALID_REGNUM;
14525
14526 if (parts.base)
14527 regno1 = true_regnum (parts.base);
14528 if (parts.index)
14529 regno2 = true_regnum (parts.index);
14530
14531 split_cost = 0;
14532
14533 /* Compute how many cycles we will add to execution time
14534 if split lea into a sequence of instructions. */
14535 if (parts.base || parts.index)
14536 {
14537 /* Have to use mov instruction if non desctructive
14538 destination form is used. */
14539 if (regno1 != regno0 && regno2 != regno0)
14540 split_cost += 1;
14541
14542 /* Have to add index to base if both exist. */
14543 if (parts.base && parts.index)
14544 split_cost += 1;
14545
14546 /* Have to use shift and adds if scale is 2 or greater. */
14547 if (parts.scale > 1)
14548 {
14549 if (regno0 != regno1)
14550 split_cost += 1;
14551 else if (regno2 == regno0)
14552 split_cost += 4;
14553 else
14554 split_cost += parts.scale;
14555 }
14556
14557 /* Have to use add instruction with immediate if
14558 disp is non zero. */
14559 if (parts.disp && parts.disp != const0_rtx)
14560 split_cost += 1;
14561
14562 /* Subtract the price of lea. */
14563 split_cost -= 1;
14564 }
14565
14566 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
14567 parts.scale > 1);
14568 }
14569
14570 /* Return true if it is ok to optimize an ADD operation to LEA
14571 operation to avoid flag register consumation. For most processors,
14572 ADD is faster than LEA. For the processors like BONNELL, if the
14573 destination register of LEA holds an actual address which will be
14574 used soon, LEA is better and otherwise ADD is better. */
14575
14576 bool
14577 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
14578 {
14579 unsigned int regno0 = true_regnum (operands[0]);
14580 unsigned int regno1 = true_regnum (operands[1]);
14581 unsigned int regno2 = true_regnum (operands[2]);
14582
14583 /* If a = b + c, (a!=b && a!=c), must use lea form. */
14584 if (regno0 != regno1 && regno0 != regno2)
14585 return true;
14586
14587 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
14588 return false;
14589
14590 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
14591 }
14592
14593 /* Return true if destination reg of SET_BODY is shift count of
14594 USE_BODY. */
14595
14596 static bool
14597 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
14598 {
14599 rtx set_dest;
14600 rtx shift_rtx;
14601 int i;
14602
14603 /* Retrieve destination of SET_BODY. */
14604 switch (GET_CODE (set_body))
14605 {
14606 case SET:
14607 set_dest = SET_DEST (set_body);
14608 if (!set_dest || !REG_P (set_dest))
14609 return false;
14610 break;
14611 case PARALLEL:
14612 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
14613 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
14614 use_body))
14615 return true;
14616 /* FALLTHROUGH */
14617 default:
14618 return false;
14619 }
14620
14621 /* Retrieve shift count of USE_BODY. */
14622 switch (GET_CODE (use_body))
14623 {
14624 case SET:
14625 shift_rtx = XEXP (use_body, 1);
14626 break;
14627 case PARALLEL:
14628 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
14629 if (ix86_dep_by_shift_count_body (set_body,
14630 XVECEXP (use_body, 0, i)))
14631 return true;
14632 /* FALLTHROUGH */
14633 default:
14634 return false;
14635 }
14636
14637 if (shift_rtx
14638 && (GET_CODE (shift_rtx) == ASHIFT
14639 || GET_CODE (shift_rtx) == LSHIFTRT
14640 || GET_CODE (shift_rtx) == ASHIFTRT
14641 || GET_CODE (shift_rtx) == ROTATE
14642 || GET_CODE (shift_rtx) == ROTATERT))
14643 {
14644 rtx shift_count = XEXP (shift_rtx, 1);
14645
14646 /* Return true if shift count is dest of SET_BODY. */
14647 if (REG_P (shift_count))
14648 {
14649 /* Add check since it can be invoked before register
14650 allocation in pre-reload schedule. */
14651 if (reload_completed
14652 && true_regnum (set_dest) == true_regnum (shift_count))
14653 return true;
14654 else if (REGNO(set_dest) == REGNO(shift_count))
14655 return true;
14656 }
14657 }
14658
14659 return false;
14660 }
14661
14662 /* Return true if destination reg of SET_INSN is shift count of
14663 USE_INSN. */
14664
14665 bool
14666 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
14667 {
14668 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
14669 PATTERN (use_insn));
14670 }
14671
14672 /* Return TRUE or FALSE depending on whether the unary operator meets the
14673 appropriate constraints. */
14674
14675 bool
14676 ix86_unary_operator_ok (enum rtx_code,
14677 machine_mode,
14678 rtx operands[2])
14679 {
14680 /* If one of operands is memory, source and destination must match. */
14681 if ((MEM_P (operands[0])
14682 || MEM_P (operands[1]))
14683 && ! rtx_equal_p (operands[0], operands[1]))
14684 return false;
14685 return true;
14686 }
14687
14688 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
14689 are ok, keeping in mind the possible movddup alternative. */
14690
14691 bool
14692 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
14693 {
14694 if (MEM_P (operands[0]))
14695 return rtx_equal_p (operands[0], operands[1 + high]);
14696 if (MEM_P (operands[1]) && MEM_P (operands[2]))
14697 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
14698 return true;
14699 }
14700
14701 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
14702 then replicate the value for all elements of the vector
14703 register. */
14704
14705 rtx
14706 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
14707 {
14708 int i, n_elt;
14709 rtvec v;
14710 machine_mode scalar_mode;
14711
14712 switch (mode)
14713 {
14714 case E_V64QImode:
14715 case E_V32QImode:
14716 case E_V16QImode:
14717 case E_V32HImode:
14718 case E_V16HImode:
14719 case E_V8HImode:
14720 case E_V16SImode:
14721 case E_V8SImode:
14722 case E_V4SImode:
14723 case E_V8DImode:
14724 case E_V4DImode:
14725 case E_V2DImode:
14726 gcc_assert (vect);
14727 /* FALLTHRU */
14728 case E_V16SFmode:
14729 case E_V8SFmode:
14730 case E_V4SFmode:
14731 case E_V8DFmode:
14732 case E_V4DFmode:
14733 case E_V2DFmode:
14734 n_elt = GET_MODE_NUNITS (mode);
14735 v = rtvec_alloc (n_elt);
14736 scalar_mode = GET_MODE_INNER (mode);
14737
14738 RTVEC_ELT (v, 0) = value;
14739
14740 for (i = 1; i < n_elt; ++i)
14741 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
14742
14743 return gen_rtx_CONST_VECTOR (mode, v);
14744
14745 default:
14746 gcc_unreachable ();
14747 }
14748 }
14749
14750 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
14751 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
14752 for an SSE register. If VECT is true, then replicate the mask for
14753 all elements of the vector register. If INVERT is true, then create
14754 a mask excluding the sign bit. */
14755
14756 rtx
14757 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
14758 {
14759 machine_mode vec_mode, imode;
14760 wide_int w;
14761 rtx mask, v;
14762
14763 switch (mode)
14764 {
14765 case E_V16SImode:
14766 case E_V16SFmode:
14767 case E_V8SImode:
14768 case E_V4SImode:
14769 case E_V8SFmode:
14770 case E_V4SFmode:
14771 vec_mode = mode;
14772 imode = SImode;
14773 break;
14774
14775 case E_V8DImode:
14776 case E_V4DImode:
14777 case E_V2DImode:
14778 case E_V8DFmode:
14779 case E_V4DFmode:
14780 case E_V2DFmode:
14781 vec_mode = mode;
14782 imode = DImode;
14783 break;
14784
14785 case E_TImode:
14786 case E_TFmode:
14787 vec_mode = VOIDmode;
14788 imode = TImode;
14789 break;
14790
14791 default:
14792 gcc_unreachable ();
14793 }
14794
14795 machine_mode inner_mode = GET_MODE_INNER (mode);
14796 w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1,
14797 GET_MODE_BITSIZE (inner_mode));
14798 if (invert)
14799 w = wi::bit_not (w);
14800
14801 /* Force this value into the low part of a fp vector constant. */
14802 mask = immed_wide_int_const (w, imode);
14803 mask = gen_lowpart (inner_mode, mask);
14804
14805 if (vec_mode == VOIDmode)
14806 return force_reg (inner_mode, mask);
14807
14808 v = ix86_build_const_vector (vec_mode, vect, mask);
14809 return force_reg (vec_mode, v);
14810 }
14811
14812 /* Return TRUE or FALSE depending on whether the first SET in INSN
14813 has source and destination with matching CC modes, and that the
14814 CC mode is at least as constrained as REQ_MODE. */
14815
14816 bool
14817 ix86_match_ccmode (rtx insn, machine_mode req_mode)
14818 {
14819 rtx set;
14820 machine_mode set_mode;
14821
14822 set = PATTERN (insn);
14823 if (GET_CODE (set) == PARALLEL)
14824 set = XVECEXP (set, 0, 0);
14825 gcc_assert (GET_CODE (set) == SET);
14826 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
14827
14828 set_mode = GET_MODE (SET_DEST (set));
14829 switch (set_mode)
14830 {
14831 case E_CCNOmode:
14832 if (req_mode != CCNOmode
14833 && (req_mode != CCmode
14834 || XEXP (SET_SRC (set), 1) != const0_rtx))
14835 return false;
14836 break;
14837 case E_CCmode:
14838 if (req_mode == CCGCmode)
14839 return false;
14840 /* FALLTHRU */
14841 case E_CCGCmode:
14842 if (req_mode == CCGOCmode || req_mode == CCNOmode)
14843 return false;
14844 /* FALLTHRU */
14845 case E_CCGOCmode:
14846 if (req_mode == CCZmode)
14847 return false;
14848 /* FALLTHRU */
14849 case E_CCZmode:
14850 break;
14851
14852 case E_CCGZmode:
14853
14854 case E_CCAmode:
14855 case E_CCCmode:
14856 case E_CCOmode:
14857 case E_CCPmode:
14858 case E_CCSmode:
14859 if (set_mode != req_mode)
14860 return false;
14861 break;
14862
14863 default:
14864 gcc_unreachable ();
14865 }
14866
14867 return GET_MODE (SET_SRC (set)) == set_mode;
14868 }
14869
14870 machine_mode
14871 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
14872 {
14873 machine_mode mode = GET_MODE (op0);
14874
14875 if (SCALAR_FLOAT_MODE_P (mode))
14876 {
14877 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
14878 return CCFPmode;
14879 }
14880
14881 switch (code)
14882 {
14883 /* Only zero flag is needed. */
14884 case EQ: /* ZF=0 */
14885 case NE: /* ZF!=0 */
14886 return CCZmode;
14887 /* Codes needing carry flag. */
14888 case GEU: /* CF=0 */
14889 case LTU: /* CF=1 */
14890 /* Detect overflow checks. They need just the carry flag. */
14891 if (GET_CODE (op0) == PLUS
14892 && (rtx_equal_p (op1, XEXP (op0, 0))
14893 || rtx_equal_p (op1, XEXP (op0, 1))))
14894 return CCCmode;
14895 else
14896 return CCmode;
14897 case GTU: /* CF=0 & ZF=0 */
14898 case LEU: /* CF=1 | ZF=1 */
14899 return CCmode;
14900 /* Codes possibly doable only with sign flag when
14901 comparing against zero. */
14902 case GE: /* SF=OF or SF=0 */
14903 case LT: /* SF<>OF or SF=1 */
14904 if (op1 == const0_rtx)
14905 return CCGOCmode;
14906 else
14907 /* For other cases Carry flag is not required. */
14908 return CCGCmode;
14909 /* Codes doable only with sign flag when comparing
14910 against zero, but we miss jump instruction for it
14911 so we need to use relational tests against overflow
14912 that thus needs to be zero. */
14913 case GT: /* ZF=0 & SF=OF */
14914 case LE: /* ZF=1 | SF<>OF */
14915 if (op1 == const0_rtx)
14916 return CCNOmode;
14917 else
14918 return CCGCmode;
14919 /* strcmp pattern do (use flags) and combine may ask us for proper
14920 mode. */
14921 case USE:
14922 return CCmode;
14923 default:
14924 gcc_unreachable ();
14925 }
14926 }
14927
14928 /* Return the fixed registers used for condition codes. */
14929
14930 static bool
14931 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
14932 {
14933 *p1 = FLAGS_REG;
14934 *p2 = INVALID_REGNUM;
14935 return true;
14936 }
14937
14938 /* If two condition code modes are compatible, return a condition code
14939 mode which is compatible with both. Otherwise, return
14940 VOIDmode. */
14941
14942 static machine_mode
14943 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
14944 {
14945 if (m1 == m2)
14946 return m1;
14947
14948 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
14949 return VOIDmode;
14950
14951 if ((m1 == CCGCmode && m2 == CCGOCmode)
14952 || (m1 == CCGOCmode && m2 == CCGCmode))
14953 return CCGCmode;
14954
14955 if ((m1 == CCNOmode && m2 == CCGOCmode)
14956 || (m1 == CCGOCmode && m2 == CCNOmode))
14957 return CCNOmode;
14958
14959 if (m1 == CCZmode
14960 && (m2 == CCGCmode || m2 == CCGOCmode || m2 == CCNOmode))
14961 return m2;
14962 else if (m2 == CCZmode
14963 && (m1 == CCGCmode || m1 == CCGOCmode || m1 == CCNOmode))
14964 return m1;
14965
14966 switch (m1)
14967 {
14968 default:
14969 gcc_unreachable ();
14970
14971 case E_CCmode:
14972 case E_CCGCmode:
14973 case E_CCGOCmode:
14974 case E_CCNOmode:
14975 case E_CCAmode:
14976 case E_CCCmode:
14977 case E_CCOmode:
14978 case E_CCPmode:
14979 case E_CCSmode:
14980 case E_CCZmode:
14981 switch (m2)
14982 {
14983 default:
14984 return VOIDmode;
14985
14986 case E_CCmode:
14987 case E_CCGCmode:
14988 case E_CCGOCmode:
14989 case E_CCNOmode:
14990 case E_CCAmode:
14991 case E_CCCmode:
14992 case E_CCOmode:
14993 case E_CCPmode:
14994 case E_CCSmode:
14995 case E_CCZmode:
14996 return CCmode;
14997 }
14998
14999 case E_CCFPmode:
15000 /* These are only compatible with themselves, which we already
15001 checked above. */
15002 return VOIDmode;
15003 }
15004 }
15005
15006 /* Return strategy to use for floating-point. We assume that fcomi is always
15007 preferrable where available, since that is also true when looking at size
15008 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
15009
15010 enum ix86_fpcmp_strategy
15011 ix86_fp_comparison_strategy (enum rtx_code)
15012 {
15013 /* Do fcomi/sahf based test when profitable. */
15014
15015 if (TARGET_CMOVE)
15016 return IX86_FPCMP_COMI;
15017
15018 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
15019 return IX86_FPCMP_SAHF;
15020
15021 return IX86_FPCMP_ARITH;
15022 }
15023
15024 /* Convert comparison codes we use to represent FP comparison to integer
15025 code that will result in proper branch. Return UNKNOWN if no such code
15026 is available. */
15027
15028 enum rtx_code
15029 ix86_fp_compare_code_to_integer (enum rtx_code code)
15030 {
15031 switch (code)
15032 {
15033 case GT:
15034 return GTU;
15035 case GE:
15036 return GEU;
15037 case ORDERED:
15038 case UNORDERED:
15039 return code;
15040 case UNEQ:
15041 return EQ;
15042 case UNLT:
15043 return LTU;
15044 case UNLE:
15045 return LEU;
15046 case LTGT:
15047 return NE;
15048 default:
15049 return UNKNOWN;
15050 }
15051 }
15052
15053 /* Zero extend possibly SImode EXP to Pmode register. */
15054 rtx
15055 ix86_zero_extend_to_Pmode (rtx exp)
15056 {
15057 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
15058 }
15059
15060 /* Return true if the function being called was marked with attribute
15061 "noplt" or using -fno-plt and we are compiling for non-PIC. We need
15062 to handle the non-PIC case in the backend because there is no easy
15063 interface for the front-end to force non-PLT calls to use the GOT.
15064 This is currently used only with 64-bit or 32-bit GOT32X ELF targets
15065 to call the function marked "noplt" indirectly. */
15066
15067 static bool
15068 ix86_nopic_noplt_attribute_p (rtx call_op)
15069 {
15070 if (flag_pic || ix86_cmodel == CM_LARGE
15071 || !(TARGET_64BIT || HAVE_AS_IX86_GOT32X)
15072 || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
15073 || SYMBOL_REF_LOCAL_P (call_op))
15074 return false;
15075
15076 tree symbol_decl = SYMBOL_REF_DECL (call_op);
15077
15078 if (!flag_plt
15079 || (symbol_decl != NULL_TREE
15080 && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
15081 return true;
15082
15083 return false;
15084 }
15085
15086 /* Helper to output the jmp/call. */
15087 static void
15088 ix86_output_jmp_thunk_or_indirect (const char *thunk_name, const int regno)
15089 {
15090 if (thunk_name != NULL)
15091 {
15092 fprintf (asm_out_file, "\tjmp\t");
15093 assemble_name (asm_out_file, thunk_name);
15094 putc ('\n', asm_out_file);
15095 }
15096 else
15097 output_indirect_thunk (regno);
15098 }
15099
15100 /* Output indirect branch via a call and return thunk. CALL_OP is a
15101 register which contains the branch target. XASM is the assembly
15102 template for CALL_OP. Branch is a tail call if SIBCALL_P is true.
15103 A normal call is converted to:
15104
15105 call __x86_indirect_thunk_reg
15106
15107 and a tail call is converted to:
15108
15109 jmp __x86_indirect_thunk_reg
15110 */
15111
15112 static void
15113 ix86_output_indirect_branch_via_reg (rtx call_op, bool sibcall_p)
15114 {
15115 char thunk_name_buf[32];
15116 char *thunk_name;
15117 enum indirect_thunk_prefix need_prefix
15118 = indirect_thunk_need_prefix (current_output_insn);
15119 int regno = REGNO (call_op);
15120
15121 if (cfun->machine->indirect_branch_type
15122 != indirect_branch_thunk_inline)
15123 {
15124 if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
15125 {
15126 int i = regno;
15127 if (i >= FIRST_REX_INT_REG)
15128 i -= (FIRST_REX_INT_REG - LAST_INT_REG - 1);
15129 indirect_thunks_used |= 1 << i;
15130 }
15131 indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
15132 thunk_name = thunk_name_buf;
15133 }
15134 else
15135 thunk_name = NULL;
15136
15137 if (sibcall_p)
15138 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
15139 else
15140 {
15141 if (thunk_name != NULL)
15142 {
15143 fprintf (asm_out_file, "\tcall\t");
15144 assemble_name (asm_out_file, thunk_name);
15145 putc ('\n', asm_out_file);
15146 return;
15147 }
15148
15149 char indirectlabel1[32];
15150 char indirectlabel2[32];
15151
15152 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
15153 INDIRECT_LABEL,
15154 indirectlabelno++);
15155 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
15156 INDIRECT_LABEL,
15157 indirectlabelno++);
15158
15159 /* Jump. */
15160 fputs ("\tjmp\t", asm_out_file);
15161 assemble_name_raw (asm_out_file, indirectlabel2);
15162 fputc ('\n', asm_out_file);
15163
15164 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
15165
15166 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
15167
15168 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
15169
15170 /* Call. */
15171 fputs ("\tcall\t", asm_out_file);
15172 assemble_name_raw (asm_out_file, indirectlabel1);
15173 fputc ('\n', asm_out_file);
15174 }
15175 }
15176
15177 /* Output indirect branch via a call and return thunk. CALL_OP is
15178 the branch target. XASM is the assembly template for CALL_OP.
15179 Branch is a tail call if SIBCALL_P is true. A normal call is
15180 converted to:
15181
15182 jmp L2
15183 L1:
15184 push CALL_OP
15185 jmp __x86_indirect_thunk
15186 L2:
15187 call L1
15188
15189 and a tail call is converted to:
15190
15191 push CALL_OP
15192 jmp __x86_indirect_thunk
15193 */
15194
15195 static void
15196 ix86_output_indirect_branch_via_push (rtx call_op, const char *xasm,
15197 bool sibcall_p)
15198 {
15199 char thunk_name_buf[32];
15200 char *thunk_name;
15201 char push_buf[64];
15202 enum indirect_thunk_prefix need_prefix
15203 = indirect_thunk_need_prefix (current_output_insn);
15204 int regno = -1;
15205
15206 if (cfun->machine->indirect_branch_type
15207 != indirect_branch_thunk_inline)
15208 {
15209 if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
15210 indirect_thunk_needed = true;
15211 indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
15212 thunk_name = thunk_name_buf;
15213 }
15214 else
15215 thunk_name = NULL;
15216
15217 snprintf (push_buf, sizeof (push_buf), "push{%c}\t%s",
15218 TARGET_64BIT ? 'q' : 'l', xasm);
15219
15220 if (sibcall_p)
15221 {
15222 output_asm_insn (push_buf, &call_op);
15223 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
15224 }
15225 else
15226 {
15227 char indirectlabel1[32];
15228 char indirectlabel2[32];
15229
15230 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
15231 INDIRECT_LABEL,
15232 indirectlabelno++);
15233 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
15234 INDIRECT_LABEL,
15235 indirectlabelno++);
15236
15237 /* Jump. */
15238 fputs ("\tjmp\t", asm_out_file);
15239 assemble_name_raw (asm_out_file, indirectlabel2);
15240 fputc ('\n', asm_out_file);
15241
15242 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
15243
15244 /* An external function may be called via GOT, instead of PLT. */
15245 if (MEM_P (call_op))
15246 {
15247 struct ix86_address parts;
15248 rtx addr = XEXP (call_op, 0);
15249 if (ix86_decompose_address (addr, &parts)
15250 && parts.base == stack_pointer_rtx)
15251 {
15252 /* Since call will adjust stack by -UNITS_PER_WORD,
15253 we must convert "disp(stack, index, scale)" to
15254 "disp+UNITS_PER_WORD(stack, index, scale)". */
15255 if (parts.index)
15256 {
15257 addr = gen_rtx_MULT (Pmode, parts.index,
15258 GEN_INT (parts.scale));
15259 addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
15260 addr);
15261 }
15262 else
15263 addr = stack_pointer_rtx;
15264
15265 rtx disp;
15266 if (parts.disp != NULL_RTX)
15267 disp = plus_constant (Pmode, parts.disp,
15268 UNITS_PER_WORD);
15269 else
15270 disp = GEN_INT (UNITS_PER_WORD);
15271
15272 addr = gen_rtx_PLUS (Pmode, addr, disp);
15273 call_op = gen_rtx_MEM (GET_MODE (call_op), addr);
15274 }
15275 }
15276
15277 output_asm_insn (push_buf, &call_op);
15278
15279 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
15280
15281 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
15282
15283 /* Call. */
15284 fputs ("\tcall\t", asm_out_file);
15285 assemble_name_raw (asm_out_file, indirectlabel1);
15286 fputc ('\n', asm_out_file);
15287 }
15288 }
15289
15290 /* Output indirect branch via a call and return thunk. CALL_OP is
15291 the branch target. XASM is the assembly template for CALL_OP.
15292 Branch is a tail call if SIBCALL_P is true. */
15293
15294 static void
15295 ix86_output_indirect_branch (rtx call_op, const char *xasm,
15296 bool sibcall_p)
15297 {
15298 if (REG_P (call_op))
15299 ix86_output_indirect_branch_via_reg (call_op, sibcall_p);
15300 else
15301 ix86_output_indirect_branch_via_push (call_op, xasm, sibcall_p);
15302 }
15303
15304 /* Output indirect jump. CALL_OP is the jump target. */
15305
15306 const char *
15307 ix86_output_indirect_jmp (rtx call_op)
15308 {
15309 if (cfun->machine->indirect_branch_type != indirect_branch_keep)
15310 {
15311 /* We can't have red-zone since "call" in the indirect thunk
15312 pushes the return address onto stack, destroying red-zone. */
15313 if (ix86_red_zone_size != 0)
15314 gcc_unreachable ();
15315
15316 ix86_output_indirect_branch (call_op, "%0", true);
15317 return "";
15318 }
15319 else
15320 return "%!jmp\t%A0";
15321 }
15322
15323 /* Output return instrumentation for current function if needed. */
15324
15325 static void
15326 output_return_instrumentation (void)
15327 {
15328 if (ix86_instrument_return != instrument_return_none
15329 && flag_fentry
15330 && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun->decl))
15331 {
15332 if (ix86_flag_record_return)
15333 fprintf (asm_out_file, "1:\n");
15334 switch (ix86_instrument_return)
15335 {
15336 case instrument_return_call:
15337 fprintf (asm_out_file, "\tcall\t__return__\n");
15338 break;
15339 case instrument_return_nop5:
15340 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
15341 fprintf (asm_out_file, ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
15342 break;
15343 case instrument_return_none:
15344 break;
15345 }
15346
15347 if (ix86_flag_record_return)
15348 {
15349 fprintf (asm_out_file, "\t.section __return_loc, \"a\",@progbits\n");
15350 fprintf (asm_out_file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
15351 fprintf (asm_out_file, "\t.previous\n");
15352 }
15353 }
15354 }
15355
15356 /* Output function return. CALL_OP is the jump target. Add a REP
15357 prefix to RET if LONG_P is true and function return is kept. */
15358
15359 const char *
15360 ix86_output_function_return (bool long_p)
15361 {
15362 output_return_instrumentation ();
15363
15364 if (cfun->machine->function_return_type != indirect_branch_keep)
15365 {
15366 char thunk_name[32];
15367 enum indirect_thunk_prefix need_prefix
15368 = indirect_thunk_need_prefix (current_output_insn);
15369
15370 if (cfun->machine->function_return_type
15371 != indirect_branch_thunk_inline)
15372 {
15373 bool need_thunk = (cfun->machine->function_return_type
15374 == indirect_branch_thunk);
15375 indirect_thunk_name (thunk_name, INVALID_REGNUM, need_prefix,
15376 true);
15377 indirect_return_needed |= need_thunk;
15378 fprintf (asm_out_file, "\tjmp\t");
15379 assemble_name (asm_out_file, thunk_name);
15380 putc ('\n', asm_out_file);
15381 }
15382 else
15383 output_indirect_thunk (INVALID_REGNUM);
15384
15385 return "";
15386 }
15387
15388 if (!long_p)
15389 return "%!ret";
15390
15391 return "rep%; ret";
15392 }
15393
15394 /* Output indirect function return. RET_OP is the function return
15395 target. */
15396
15397 const char *
15398 ix86_output_indirect_function_return (rtx ret_op)
15399 {
15400 if (cfun->machine->function_return_type != indirect_branch_keep)
15401 {
15402 char thunk_name[32];
15403 enum indirect_thunk_prefix need_prefix
15404 = indirect_thunk_need_prefix (current_output_insn);
15405 unsigned int regno = REGNO (ret_op);
15406 gcc_assert (regno == CX_REG);
15407
15408 if (cfun->machine->function_return_type
15409 != indirect_branch_thunk_inline)
15410 {
15411 bool need_thunk = (cfun->machine->function_return_type
15412 == indirect_branch_thunk);
15413 indirect_thunk_name (thunk_name, regno, need_prefix, true);
15414
15415 if (need_thunk)
15416 {
15417 indirect_return_via_cx = true;
15418 indirect_thunks_used |= 1 << CX_REG;
15419 }
15420 fprintf (asm_out_file, "\tjmp\t");
15421 assemble_name (asm_out_file, thunk_name);
15422 putc ('\n', asm_out_file);
15423 }
15424 else
15425 output_indirect_thunk (regno);
15426
15427 return "";
15428 }
15429 else
15430 return "%!jmp\t%A0";
15431 }
15432
15433 /* Output the assembly for a call instruction. */
15434
15435 const char *
15436 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
15437 {
15438 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
15439 bool output_indirect_p
15440 = (!TARGET_SEH
15441 && cfun->machine->indirect_branch_type != indirect_branch_keep);
15442 bool seh_nop_p = false;
15443 const char *xasm;
15444
15445 if (SIBLING_CALL_P (insn))
15446 {
15447 output_return_instrumentation ();
15448 if (direct_p)
15449 {
15450 if (ix86_nopic_noplt_attribute_p (call_op))
15451 {
15452 direct_p = false;
15453 if (TARGET_64BIT)
15454 {
15455 if (output_indirect_p)
15456 xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
15457 else
15458 xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
15459 }
15460 else
15461 {
15462 if (output_indirect_p)
15463 xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
15464 else
15465 xasm = "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
15466 }
15467 }
15468 else
15469 xasm = "%!jmp\t%P0";
15470 }
15471 /* SEH epilogue detection requires the indirect branch case
15472 to include REX.W. */
15473 else if (TARGET_SEH)
15474 xasm = "%!rex.W jmp\t%A0";
15475 else
15476 {
15477 if (output_indirect_p)
15478 xasm = "%0";
15479 else
15480 xasm = "%!jmp\t%A0";
15481 }
15482
15483 if (output_indirect_p && !direct_p)
15484 ix86_output_indirect_branch (call_op, xasm, true);
15485 else
15486 output_asm_insn (xasm, &call_op);
15487 return "";
15488 }
15489
15490 /* SEH unwinding can require an extra nop to be emitted in several
15491 circumstances. Determine if we have one of those. */
15492 if (TARGET_SEH)
15493 {
15494 rtx_insn *i;
15495
15496 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
15497 {
15498 /* Prevent a catch region from being adjacent to a jump that would
15499 be interpreted as an epilogue sequence by the unwinder. */
15500 if (JUMP_P(i) && CROSSING_JUMP_P (i))
15501 {
15502 seh_nop_p = true;
15503 break;
15504 }
15505
15506 /* If we get to another real insn, we don't need the nop. */
15507 if (INSN_P (i))
15508 break;
15509
15510 /* If we get to the epilogue note, prevent a catch region from
15511 being adjacent to the standard epilogue sequence. If non-
15512 call-exceptions, we'll have done this during epilogue emission. */
15513 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
15514 && !flag_non_call_exceptions
15515 && !can_throw_internal (insn))
15516 {
15517 seh_nop_p = true;
15518 break;
15519 }
15520 }
15521
15522 /* If we didn't find a real insn following the call, prevent the
15523 unwinder from looking into the next function. */
15524 if (i == NULL)
15525 seh_nop_p = true;
15526 }
15527
15528 if (direct_p)
15529 {
15530 if (ix86_nopic_noplt_attribute_p (call_op))
15531 {
15532 direct_p = false;
15533 if (TARGET_64BIT)
15534 {
15535 if (output_indirect_p)
15536 xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
15537 else
15538 xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
15539 }
15540 else
15541 {
15542 if (output_indirect_p)
15543 xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
15544 else
15545 xasm = "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
15546 }
15547 }
15548 else
15549 xasm = "%!call\t%P0";
15550 }
15551 else
15552 {
15553 if (output_indirect_p)
15554 xasm = "%0";
15555 else
15556 xasm = "%!call\t%A0";
15557 }
15558
15559 if (output_indirect_p && !direct_p)
15560 ix86_output_indirect_branch (call_op, xasm, false);
15561 else
15562 output_asm_insn (xasm, &call_op);
15563
15564 if (seh_nop_p)
15565 return "nop";
15566
15567 return "";
15568 }
15569 \f
15570 /* Return a MEM corresponding to a stack slot with mode MODE.
15571 Allocate a new slot if necessary.
15572
15573 The RTL for a function can have several slots available: N is
15574 which slot to use. */
15575
15576 rtx
15577 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
15578 {
15579 struct stack_local_entry *s;
15580
15581 gcc_assert (n < MAX_386_STACK_LOCALS);
15582
15583 for (s = ix86_stack_locals; s; s = s->next)
15584 if (s->mode == mode && s->n == n)
15585 return validize_mem (copy_rtx (s->rtl));
15586
15587 s = ggc_alloc<stack_local_entry> ();
15588 s->n = n;
15589 s->mode = mode;
15590 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
15591
15592 s->next = ix86_stack_locals;
15593 ix86_stack_locals = s;
15594 return validize_mem (copy_rtx (s->rtl));
15595 }
15596
15597 static void
15598 ix86_instantiate_decls (void)
15599 {
15600 struct stack_local_entry *s;
15601
15602 for (s = ix86_stack_locals; s; s = s->next)
15603 if (s->rtl != NULL_RTX)
15604 instantiate_decl_rtl (s->rtl);
15605 }
15606 \f
15607 /* Check whether x86 address PARTS is a pc-relative address. */
15608
15609 bool
15610 ix86_rip_relative_addr_p (struct ix86_address *parts)
15611 {
15612 rtx base, index, disp;
15613
15614 base = parts->base;
15615 index = parts->index;
15616 disp = parts->disp;
15617
15618 if (disp && !base && !index)
15619 {
15620 if (TARGET_64BIT)
15621 {
15622 rtx symbol = disp;
15623
15624 if (GET_CODE (disp) == CONST)
15625 symbol = XEXP (disp, 0);
15626 if (GET_CODE (symbol) == PLUS
15627 && CONST_INT_P (XEXP (symbol, 1)))
15628 symbol = XEXP (symbol, 0);
15629
15630 if (GET_CODE (symbol) == LABEL_REF
15631 || (GET_CODE (symbol) == SYMBOL_REF
15632 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
15633 || (GET_CODE (symbol) == UNSPEC
15634 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
15635 || XINT (symbol, 1) == UNSPEC_PCREL
15636 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
15637 return true;
15638 }
15639 }
15640 return false;
15641 }
15642
15643 /* Calculate the length of the memory address in the instruction encoding.
15644 Includes addr32 prefix, does not include the one-byte modrm, opcode,
15645 or other prefixes. We never generate addr32 prefix for LEA insn. */
15646
15647 int
15648 memory_address_length (rtx addr, bool lea)
15649 {
15650 struct ix86_address parts;
15651 rtx base, index, disp;
15652 int len;
15653 int ok;
15654
15655 if (GET_CODE (addr) == PRE_DEC
15656 || GET_CODE (addr) == POST_INC
15657 || GET_CODE (addr) == PRE_MODIFY
15658 || GET_CODE (addr) == POST_MODIFY)
15659 return 0;
15660
15661 ok = ix86_decompose_address (addr, &parts);
15662 gcc_assert (ok);
15663
15664 len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1;
15665
15666 /* If this is not LEA instruction, add the length of addr32 prefix. */
15667 if (TARGET_64BIT && !lea
15668 && (SImode_address_operand (addr, VOIDmode)
15669 || (parts.base && GET_MODE (parts.base) == SImode)
15670 || (parts.index && GET_MODE (parts.index) == SImode)))
15671 len++;
15672
15673 base = parts.base;
15674 index = parts.index;
15675 disp = parts.disp;
15676
15677 if (base && SUBREG_P (base))
15678 base = SUBREG_REG (base);
15679 if (index && SUBREG_P (index))
15680 index = SUBREG_REG (index);
15681
15682 gcc_assert (base == NULL_RTX || REG_P (base));
15683 gcc_assert (index == NULL_RTX || REG_P (index));
15684
15685 /* Rule of thumb:
15686 - esp as the base always wants an index,
15687 - ebp as the base always wants a displacement,
15688 - r12 as the base always wants an index,
15689 - r13 as the base always wants a displacement. */
15690
15691 /* Register Indirect. */
15692 if (base && !index && !disp)
15693 {
15694 /* esp (for its index) and ebp (for its displacement) need
15695 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
15696 code. */
15697 if (base == arg_pointer_rtx
15698 || base == frame_pointer_rtx
15699 || REGNO (base) == SP_REG
15700 || REGNO (base) == BP_REG
15701 || REGNO (base) == R12_REG
15702 || REGNO (base) == R13_REG)
15703 len++;
15704 }
15705
15706 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
15707 is not disp32, but disp32(%rip), so for disp32
15708 SIB byte is needed, unless print_operand_address
15709 optimizes it into disp32(%rip) or (%rip) is implied
15710 by UNSPEC. */
15711 else if (disp && !base && !index)
15712 {
15713 len += 4;
15714 if (!ix86_rip_relative_addr_p (&parts))
15715 len++;
15716 }
15717 else
15718 {
15719 /* Find the length of the displacement constant. */
15720 if (disp)
15721 {
15722 if (base && satisfies_constraint_K (disp))
15723 len += 1;
15724 else
15725 len += 4;
15726 }
15727 /* ebp always wants a displacement. Similarly r13. */
15728 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
15729 len++;
15730
15731 /* An index requires the two-byte modrm form.... */
15732 if (index
15733 /* ...like esp (or r12), which always wants an index. */
15734 || base == arg_pointer_rtx
15735 || base == frame_pointer_rtx
15736 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
15737 len++;
15738 }
15739
15740 return len;
15741 }
15742
15743 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15744 is set, expect that insn have 8bit immediate alternative. */
15745 int
15746 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
15747 {
15748 int len = 0;
15749 int i;
15750 extract_insn_cached (insn);
15751 for (i = recog_data.n_operands - 1; i >= 0; --i)
15752 if (CONSTANT_P (recog_data.operand[i]))
15753 {
15754 enum attr_mode mode = get_attr_mode (insn);
15755
15756 gcc_assert (!len);
15757 if (shortform && CONST_INT_P (recog_data.operand[i]))
15758 {
15759 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
15760 switch (mode)
15761 {
15762 case MODE_QI:
15763 len = 1;
15764 continue;
15765 case MODE_HI:
15766 ival = trunc_int_for_mode (ival, HImode);
15767 break;
15768 case MODE_SI:
15769 ival = trunc_int_for_mode (ival, SImode);
15770 break;
15771 default:
15772 break;
15773 }
15774 if (IN_RANGE (ival, -128, 127))
15775 {
15776 len = 1;
15777 continue;
15778 }
15779 }
15780 switch (mode)
15781 {
15782 case MODE_QI:
15783 len = 1;
15784 break;
15785 case MODE_HI:
15786 len = 2;
15787 break;
15788 case MODE_SI:
15789 len = 4;
15790 break;
15791 /* Immediates for DImode instructions are encoded
15792 as 32bit sign extended values. */
15793 case MODE_DI:
15794 len = 4;
15795 break;
15796 default:
15797 fatal_insn ("unknown insn mode", insn);
15798 }
15799 }
15800 return len;
15801 }
15802
15803 /* Compute default value for "length_address" attribute. */
15804 int
15805 ix86_attr_length_address_default (rtx_insn *insn)
15806 {
15807 int i;
15808
15809 if (get_attr_type (insn) == TYPE_LEA)
15810 {
15811 rtx set = PATTERN (insn), addr;
15812
15813 if (GET_CODE (set) == PARALLEL)
15814 set = XVECEXP (set, 0, 0);
15815
15816 gcc_assert (GET_CODE (set) == SET);
15817
15818 addr = SET_SRC (set);
15819
15820 return memory_address_length (addr, true);
15821 }
15822
15823 extract_insn_cached (insn);
15824 for (i = recog_data.n_operands - 1; i >= 0; --i)
15825 {
15826 rtx op = recog_data.operand[i];
15827 if (MEM_P (op))
15828 {
15829 constrain_operands_cached (insn, reload_completed);
15830 if (which_alternative != -1)
15831 {
15832 const char *constraints = recog_data.constraints[i];
15833 int alt = which_alternative;
15834
15835 while (*constraints == '=' || *constraints == '+')
15836 constraints++;
15837 while (alt-- > 0)
15838 while (*constraints++ != ',')
15839 ;
15840 /* Skip ignored operands. */
15841 if (*constraints == 'X')
15842 continue;
15843 }
15844
15845 int len = memory_address_length (XEXP (op, 0), false);
15846
15847 /* Account for segment prefix for non-default addr spaces. */
15848 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op)))
15849 len++;
15850
15851 return len;
15852 }
15853 }
15854 return 0;
15855 }
15856
15857 /* Compute default value for "length_vex" attribute. It includes
15858 2 or 3 byte VEX prefix and 1 opcode byte. */
15859
15860 int
15861 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
15862 bool has_vex_w)
15863 {
15864 int i;
15865
15866 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
15867 byte VEX prefix. */
15868 if (!has_0f_opcode || has_vex_w)
15869 return 3 + 1;
15870
15871 /* We can always use 2 byte VEX prefix in 32bit. */
15872 if (!TARGET_64BIT)
15873 return 2 + 1;
15874
15875 extract_insn_cached (insn);
15876
15877 for (i = recog_data.n_operands - 1; i >= 0; --i)
15878 if (REG_P (recog_data.operand[i]))
15879 {
15880 /* REX.W bit uses 3 byte VEX prefix. */
15881 if (GET_MODE (recog_data.operand[i]) == DImode
15882 && GENERAL_REG_P (recog_data.operand[i]))
15883 return 3 + 1;
15884 }
15885 else
15886 {
15887 /* REX.X or REX.B bits use 3 byte VEX prefix. */
15888 if (MEM_P (recog_data.operand[i])
15889 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
15890 return 3 + 1;
15891 }
15892
15893 return 2 + 1;
15894 }
15895 \f
15896
15897 static bool
15898 ix86_class_likely_spilled_p (reg_class_t);
15899
15900 /* Returns true if lhs of insn is HW function argument register and set up
15901 is_spilled to true if it is likely spilled HW register. */
15902 static bool
15903 insn_is_function_arg (rtx insn, bool* is_spilled)
15904 {
15905 rtx dst;
15906
15907 if (!NONDEBUG_INSN_P (insn))
15908 return false;
15909 /* Call instructions are not movable, ignore it. */
15910 if (CALL_P (insn))
15911 return false;
15912 insn = PATTERN (insn);
15913 if (GET_CODE (insn) == PARALLEL)
15914 insn = XVECEXP (insn, 0, 0);
15915 if (GET_CODE (insn) != SET)
15916 return false;
15917 dst = SET_DEST (insn);
15918 if (REG_P (dst) && HARD_REGISTER_P (dst)
15919 && ix86_function_arg_regno_p (REGNO (dst)))
15920 {
15921 /* Is it likely spilled HW register? */
15922 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
15923 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
15924 *is_spilled = true;
15925 return true;
15926 }
15927 return false;
15928 }
15929
15930 /* Add output dependencies for chain of function adjacent arguments if only
15931 there is a move to likely spilled HW register. Return first argument
15932 if at least one dependence was added or NULL otherwise. */
15933 static rtx_insn *
15934 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
15935 {
15936 rtx_insn *insn;
15937 rtx_insn *last = call;
15938 rtx_insn *first_arg = NULL;
15939 bool is_spilled = false;
15940
15941 head = PREV_INSN (head);
15942
15943 /* Find nearest to call argument passing instruction. */
15944 while (true)
15945 {
15946 last = PREV_INSN (last);
15947 if (last == head)
15948 return NULL;
15949 if (!NONDEBUG_INSN_P (last))
15950 continue;
15951 if (insn_is_function_arg (last, &is_spilled))
15952 break;
15953 return NULL;
15954 }
15955
15956 first_arg = last;
15957 while (true)
15958 {
15959 insn = PREV_INSN (last);
15960 if (!INSN_P (insn))
15961 break;
15962 if (insn == head)
15963 break;
15964 if (!NONDEBUG_INSN_P (insn))
15965 {
15966 last = insn;
15967 continue;
15968 }
15969 if (insn_is_function_arg (insn, &is_spilled))
15970 {
15971 /* Add output depdendence between two function arguments if chain
15972 of output arguments contains likely spilled HW registers. */
15973 if (is_spilled)
15974 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
15975 first_arg = last = insn;
15976 }
15977 else
15978 break;
15979 }
15980 if (!is_spilled)
15981 return NULL;
15982 return first_arg;
15983 }
15984
15985 /* Add output or anti dependency from insn to first_arg to restrict its code
15986 motion. */
15987 static void
15988 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
15989 {
15990 rtx set;
15991 rtx tmp;
15992
15993 set = single_set (insn);
15994 if (!set)
15995 return;
15996 tmp = SET_DEST (set);
15997 if (REG_P (tmp))
15998 {
15999 /* Add output dependency to the first function argument. */
16000 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
16001 return;
16002 }
16003 /* Add anti dependency. */
16004 add_dependence (first_arg, insn, REG_DEP_ANTI);
16005 }
16006
16007 /* Avoid cross block motion of function argument through adding dependency
16008 from the first non-jump instruction in bb. */
16009 static void
16010 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
16011 {
16012 rtx_insn *insn = BB_END (bb);
16013
16014 while (insn)
16015 {
16016 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
16017 {
16018 rtx set = single_set (insn);
16019 if (set)
16020 {
16021 avoid_func_arg_motion (arg, insn);
16022 return;
16023 }
16024 }
16025 if (insn == BB_HEAD (bb))
16026 return;
16027 insn = PREV_INSN (insn);
16028 }
16029 }
16030
16031 /* Hook for pre-reload schedule - avoid motion of function arguments
16032 passed in likely spilled HW registers. */
16033 static void
16034 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
16035 {
16036 rtx_insn *insn;
16037 rtx_insn *first_arg = NULL;
16038 if (reload_completed)
16039 return;
16040 while (head != tail && DEBUG_INSN_P (head))
16041 head = NEXT_INSN (head);
16042 for (insn = tail; insn != head; insn = PREV_INSN (insn))
16043 if (INSN_P (insn) && CALL_P (insn))
16044 {
16045 first_arg = add_parameter_dependencies (insn, head);
16046 if (first_arg)
16047 {
16048 /* Add dependee for first argument to predecessors if only
16049 region contains more than one block. */
16050 basic_block bb = BLOCK_FOR_INSN (insn);
16051 int rgn = CONTAINING_RGN (bb->index);
16052 int nr_blks = RGN_NR_BLOCKS (rgn);
16053 /* Skip trivial regions and region head blocks that can have
16054 predecessors outside of region. */
16055 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
16056 {
16057 edge e;
16058 edge_iterator ei;
16059
16060 /* Regions are SCCs with the exception of selective
16061 scheduling with pipelining of outer blocks enabled.
16062 So also check that immediate predecessors of a non-head
16063 block are in the same region. */
16064 FOR_EACH_EDGE (e, ei, bb->preds)
16065 {
16066 /* Avoid creating of loop-carried dependencies through
16067 using topological ordering in the region. */
16068 if (rgn == CONTAINING_RGN (e->src->index)
16069 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
16070 add_dependee_for_func_arg (first_arg, e->src);
16071 }
16072 }
16073 insn = first_arg;
16074 if (insn == head)
16075 break;
16076 }
16077 }
16078 else if (first_arg)
16079 avoid_func_arg_motion (first_arg, insn);
16080 }
16081
16082 /* Hook for pre-reload schedule - set priority of moves from likely spilled
16083 HW registers to maximum, to schedule them at soon as possible. These are
16084 moves from function argument registers at the top of the function entry
16085 and moves from function return value registers after call. */
16086 static int
16087 ix86_adjust_priority (rtx_insn *insn, int priority)
16088 {
16089 rtx set;
16090
16091 if (reload_completed)
16092 return priority;
16093
16094 if (!NONDEBUG_INSN_P (insn))
16095 return priority;
16096
16097 set = single_set (insn);
16098 if (set)
16099 {
16100 rtx tmp = SET_SRC (set);
16101 if (REG_P (tmp)
16102 && HARD_REGISTER_P (tmp)
16103 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
16104 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
16105 return current_sched_info->sched_max_insns_priority;
16106 }
16107
16108 return priority;
16109 }
16110
16111 /* Prepare for scheduling pass. */
16112 static void
16113 ix86_sched_init_global (FILE *, int, int)
16114 {
16115 /* Install scheduling hooks for current CPU. Some of these hooks are used
16116 in time-critical parts of the scheduler, so we only set them up when
16117 they are actually used. */
16118 switch (ix86_tune)
16119 {
16120 case PROCESSOR_CORE2:
16121 case PROCESSOR_NEHALEM:
16122 case PROCESSOR_SANDYBRIDGE:
16123 case PROCESSOR_HASWELL:
16124 case PROCESSOR_GENERIC:
16125 /* Do not perform multipass scheduling for pre-reload schedule
16126 to save compile time. */
16127 if (reload_completed)
16128 {
16129 ix86_core2i7_init_hooks ();
16130 break;
16131 }
16132 /* Fall through. */
16133 default:
16134 targetm.sched.dfa_post_advance_cycle = NULL;
16135 targetm.sched.first_cycle_multipass_init = NULL;
16136 targetm.sched.first_cycle_multipass_begin = NULL;
16137 targetm.sched.first_cycle_multipass_issue = NULL;
16138 targetm.sched.first_cycle_multipass_backtrack = NULL;
16139 targetm.sched.first_cycle_multipass_end = NULL;
16140 targetm.sched.first_cycle_multipass_fini = NULL;
16141 break;
16142 }
16143 }
16144
16145 \f
16146 /* Implement TARGET_STATIC_RTX_ALIGNMENT. */
16147
16148 static HOST_WIDE_INT
16149 ix86_static_rtx_alignment (machine_mode mode)
16150 {
16151 if (mode == DFmode)
16152 return 64;
16153 if (ALIGN_MODE_128 (mode))
16154 return MAX (128, GET_MODE_ALIGNMENT (mode));
16155 return GET_MODE_ALIGNMENT (mode);
16156 }
16157
16158 /* Implement TARGET_CONSTANT_ALIGNMENT. */
16159
16160 static HOST_WIDE_INT
16161 ix86_constant_alignment (const_tree exp, HOST_WIDE_INT align)
16162 {
16163 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
16164 || TREE_CODE (exp) == INTEGER_CST)
16165 {
16166 machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
16167 HOST_WIDE_INT mode_align = ix86_static_rtx_alignment (mode);
16168 return MAX (mode_align, align);
16169 }
16170 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
16171 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
16172 return BITS_PER_WORD;
16173
16174 return align;
16175 }
16176
16177 /* Implement TARGET_EMPTY_RECORD_P. */
16178
16179 static bool
16180 ix86_is_empty_record (const_tree type)
16181 {
16182 if (!TARGET_64BIT)
16183 return false;
16184 return default_is_empty_record (type);
16185 }
16186
16187 /* Implement TARGET_WARN_PARAMETER_PASSING_ABI. */
16188
16189 static void
16190 ix86_warn_parameter_passing_abi (cumulative_args_t cum_v, tree type)
16191 {
16192 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
16193
16194 if (!cum->warn_empty)
16195 return;
16196
16197 if (!TYPE_EMPTY_P (type))
16198 return;
16199
16200 /* Don't warn if the function isn't visible outside of the TU. */
16201 if (cum->decl && !TREE_PUBLIC (cum->decl))
16202 return;
16203
16204 const_tree ctx = get_ultimate_context (cum->decl);
16205 if (ctx != NULL_TREE
16206 && !TRANSLATION_UNIT_WARN_EMPTY_P (ctx))
16207 return;
16208
16209 /* If the actual size of the type is zero, then there is no change
16210 in how objects of this size are passed. */
16211 if (int_size_in_bytes (type) == 0)
16212 return;
16213
16214 warning (OPT_Wabi, "empty class %qT parameter passing ABI "
16215 "changes in %<-fabi-version=12%> (GCC 8)", type);
16216
16217 /* Only warn once. */
16218 cum->warn_empty = false;
16219 }
16220
16221 /* This hook returns name of multilib ABI. */
16222
16223 static const char *
16224 ix86_get_multilib_abi_name (void)
16225 {
16226 if (!(TARGET_64BIT_P (ix86_isa_flags)))
16227 return "i386";
16228 else if (TARGET_X32_P (ix86_isa_flags))
16229 return "x32";
16230 else
16231 return "x86_64";
16232 }
16233
16234 /* Compute the alignment for a variable for Intel MCU psABI. TYPE is
16235 the data type, and ALIGN is the alignment that the object would
16236 ordinarily have. */
16237
16238 static int
16239 iamcu_alignment (tree type, int align)
16240 {
16241 machine_mode mode;
16242
16243 if (align < 32 || TYPE_USER_ALIGN (type))
16244 return align;
16245
16246 /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
16247 bytes. */
16248 mode = TYPE_MODE (strip_array_types (type));
16249 switch (GET_MODE_CLASS (mode))
16250 {
16251 case MODE_INT:
16252 case MODE_COMPLEX_INT:
16253 case MODE_COMPLEX_FLOAT:
16254 case MODE_FLOAT:
16255 case MODE_DECIMAL_FLOAT:
16256 return 32;
16257 default:
16258 return align;
16259 }
16260 }
16261
16262 /* Compute the alignment for a static variable.
16263 TYPE is the data type, and ALIGN is the alignment that
16264 the object would ordinarily have. The value of this function is used
16265 instead of that alignment to align the object. */
16266
16267 int
16268 ix86_data_alignment (tree type, unsigned int align, bool opt)
16269 {
16270 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
16271 for symbols from other compilation units or symbols that don't need
16272 to bind locally. In order to preserve some ABI compatibility with
16273 those compilers, ensure we don't decrease alignment from what we
16274 used to assume. */
16275
16276 unsigned int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
16277
16278 /* A data structure, equal or greater than the size of a cache line
16279 (64 bytes in the Pentium 4 and other recent Intel processors, including
16280 processors based on Intel Core microarchitecture) should be aligned
16281 so that its base address is a multiple of a cache line size. */
16282
16283 unsigned int max_align
16284 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
16285
16286 if (max_align < BITS_PER_WORD)
16287 max_align = BITS_PER_WORD;
16288
16289 switch (ix86_align_data_type)
16290 {
16291 case ix86_align_data_type_abi: opt = false; break;
16292 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
16293 case ix86_align_data_type_cacheline: break;
16294 }
16295
16296 if (TARGET_IAMCU)
16297 align = iamcu_alignment (type, align);
16298
16299 if (opt
16300 && AGGREGATE_TYPE_P (type)
16301 && TYPE_SIZE (type)
16302 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
16303 {
16304 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align_compat)
16305 && align < max_align_compat)
16306 align = max_align_compat;
16307 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align)
16308 && align < max_align)
16309 align = max_align;
16310 }
16311
16312 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16313 to 16byte boundary. */
16314 if (TARGET_64BIT)
16315 {
16316 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
16317 && TYPE_SIZE (type)
16318 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16319 && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
16320 && align < 128)
16321 return 128;
16322 }
16323
16324 if (!opt)
16325 return align;
16326
16327 if (TREE_CODE (type) == ARRAY_TYPE)
16328 {
16329 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16330 return 64;
16331 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16332 return 128;
16333 }
16334 else if (TREE_CODE (type) == COMPLEX_TYPE)
16335 {
16336
16337 if (TYPE_MODE (type) == DCmode && align < 64)
16338 return 64;
16339 if ((TYPE_MODE (type) == XCmode
16340 || TYPE_MODE (type) == TCmode) && align < 128)
16341 return 128;
16342 }
16343 else if ((TREE_CODE (type) == RECORD_TYPE
16344 || TREE_CODE (type) == UNION_TYPE
16345 || TREE_CODE (type) == QUAL_UNION_TYPE)
16346 && TYPE_FIELDS (type))
16347 {
16348 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16349 return 64;
16350 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16351 return 128;
16352 }
16353 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16354 || TREE_CODE (type) == INTEGER_TYPE)
16355 {
16356 if (TYPE_MODE (type) == DFmode && align < 64)
16357 return 64;
16358 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16359 return 128;
16360 }
16361
16362 return align;
16363 }
16364
16365 /* Compute the alignment for a local variable or a stack slot. EXP is
16366 the data type or decl itself, MODE is the widest mode available and
16367 ALIGN is the alignment that the object would ordinarily have. The
16368 value of this macro is used instead of that alignment to align the
16369 object. */
16370
16371 unsigned int
16372 ix86_local_alignment (tree exp, machine_mode mode,
16373 unsigned int align)
16374 {
16375 tree type, decl;
16376
16377 if (exp && DECL_P (exp))
16378 {
16379 type = TREE_TYPE (exp);
16380 decl = exp;
16381 }
16382 else
16383 {
16384 type = exp;
16385 decl = NULL;
16386 }
16387
16388 /* Don't do dynamic stack realignment for long long objects with
16389 -mpreferred-stack-boundary=2. */
16390 if (!TARGET_64BIT
16391 && align == 64
16392 && ix86_preferred_stack_boundary < 64
16393 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
16394 && (!type || !TYPE_USER_ALIGN (type))
16395 && (!decl || !DECL_USER_ALIGN (decl)))
16396 align = 32;
16397
16398 /* If TYPE is NULL, we are allocating a stack slot for caller-save
16399 register in MODE. We will return the largest alignment of XF
16400 and DF. */
16401 if (!type)
16402 {
16403 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
16404 align = GET_MODE_ALIGNMENT (DFmode);
16405 return align;
16406 }
16407
16408 /* Don't increase alignment for Intel MCU psABI. */
16409 if (TARGET_IAMCU)
16410 return align;
16411
16412 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16413 to 16byte boundary. Exact wording is:
16414
16415 An array uses the same alignment as its elements, except that a local or
16416 global array variable of length at least 16 bytes or
16417 a C99 variable-length array variable always has alignment of at least 16 bytes.
16418
16419 This was added to allow use of aligned SSE instructions at arrays. This
16420 rule is meant for static storage (where compiler cannot do the analysis
16421 by itself). We follow it for automatic variables only when convenient.
16422 We fully control everything in the function compiled and functions from
16423 other unit cannot rely on the alignment.
16424
16425 Exclude va_list type. It is the common case of local array where
16426 we cannot benefit from the alignment.
16427
16428 TODO: Probably one should optimize for size only when var is not escaping. */
16429 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
16430 && TARGET_SSE)
16431 {
16432 if (AGGREGATE_TYPE_P (type)
16433 && (va_list_type_node == NULL_TREE
16434 || (TYPE_MAIN_VARIANT (type)
16435 != TYPE_MAIN_VARIANT (va_list_type_node)))
16436 && TYPE_SIZE (type)
16437 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16438 && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
16439 && align < 128)
16440 return 128;
16441 }
16442 if (TREE_CODE (type) == ARRAY_TYPE)
16443 {
16444 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16445 return 64;
16446 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16447 return 128;
16448 }
16449 else if (TREE_CODE (type) == COMPLEX_TYPE)
16450 {
16451 if (TYPE_MODE (type) == DCmode && align < 64)
16452 return 64;
16453 if ((TYPE_MODE (type) == XCmode
16454 || TYPE_MODE (type) == TCmode) && align < 128)
16455 return 128;
16456 }
16457 else if ((TREE_CODE (type) == RECORD_TYPE
16458 || TREE_CODE (type) == UNION_TYPE
16459 || TREE_CODE (type) == QUAL_UNION_TYPE)
16460 && TYPE_FIELDS (type))
16461 {
16462 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16463 return 64;
16464 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16465 return 128;
16466 }
16467 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16468 || TREE_CODE (type) == INTEGER_TYPE)
16469 {
16470
16471 if (TYPE_MODE (type) == DFmode && align < 64)
16472 return 64;
16473 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16474 return 128;
16475 }
16476 return align;
16477 }
16478
16479 /* Compute the minimum required alignment for dynamic stack realignment
16480 purposes for a local variable, parameter or a stack slot. EXP is
16481 the data type or decl itself, MODE is its mode and ALIGN is the
16482 alignment that the object would ordinarily have. */
16483
16484 unsigned int
16485 ix86_minimum_alignment (tree exp, machine_mode mode,
16486 unsigned int align)
16487 {
16488 tree type, decl;
16489
16490 if (exp && DECL_P (exp))
16491 {
16492 type = TREE_TYPE (exp);
16493 decl = exp;
16494 }
16495 else
16496 {
16497 type = exp;
16498 decl = NULL;
16499 }
16500
16501 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
16502 return align;
16503
16504 /* Don't do dynamic stack realignment for long long objects with
16505 -mpreferred-stack-boundary=2. */
16506 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
16507 && (!type || !TYPE_USER_ALIGN (type))
16508 && (!decl || !DECL_USER_ALIGN (decl)))
16509 {
16510 gcc_checking_assert (!TARGET_STV);
16511 return 32;
16512 }
16513
16514 return align;
16515 }
16516 \f
16517 /* Find a location for the static chain incoming to a nested function.
16518 This is a register, unless all free registers are used by arguments. */
16519
16520 static rtx
16521 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
16522 {
16523 unsigned regno;
16524
16525 if (TARGET_64BIT)
16526 {
16527 /* We always use R10 in 64-bit mode. */
16528 regno = R10_REG;
16529 }
16530 else
16531 {
16532 const_tree fntype, fndecl;
16533 unsigned int ccvt;
16534
16535 /* By default in 32-bit mode we use ECX to pass the static chain. */
16536 regno = CX_REG;
16537
16538 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
16539 {
16540 fntype = TREE_TYPE (fndecl_or_type);
16541 fndecl = fndecl_or_type;
16542 }
16543 else
16544 {
16545 fntype = fndecl_or_type;
16546 fndecl = NULL;
16547 }
16548
16549 ccvt = ix86_get_callcvt (fntype);
16550 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
16551 {
16552 /* Fastcall functions use ecx/edx for arguments, which leaves
16553 us with EAX for the static chain.
16554 Thiscall functions use ecx for arguments, which also
16555 leaves us with EAX for the static chain. */
16556 regno = AX_REG;
16557 }
16558 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
16559 {
16560 /* Thiscall functions use ecx for arguments, which leaves
16561 us with EAX and EDX for the static chain.
16562 We are using for abi-compatibility EAX. */
16563 regno = AX_REG;
16564 }
16565 else if (ix86_function_regparm (fntype, fndecl) == 3)
16566 {
16567 /* For regparm 3, we have no free call-clobbered registers in
16568 which to store the static chain. In order to implement this,
16569 we have the trampoline push the static chain to the stack.
16570 However, we can't push a value below the return address when
16571 we call the nested function directly, so we have to use an
16572 alternate entry point. For this we use ESI, and have the
16573 alternate entry point push ESI, so that things appear the
16574 same once we're executing the nested function. */
16575 if (incoming_p)
16576 {
16577 if (fndecl == current_function_decl
16578 && !ix86_static_chain_on_stack)
16579 {
16580 gcc_assert (!reload_completed);
16581 ix86_static_chain_on_stack = true;
16582 }
16583 return gen_frame_mem (SImode,
16584 plus_constant (Pmode,
16585 arg_pointer_rtx, -8));
16586 }
16587 regno = SI_REG;
16588 }
16589 }
16590
16591 return gen_rtx_REG (Pmode, regno);
16592 }
16593
16594 /* Emit RTL insns to initialize the variable parts of a trampoline.
16595 FNDECL is the decl of the target address; M_TRAMP is a MEM for
16596 the trampoline, and CHAIN_VALUE is an RTX for the static chain
16597 to be passed to the target function. */
16598
16599 static void
16600 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
16601 {
16602 rtx mem, fnaddr;
16603 int opcode;
16604 int offset = 0;
16605 bool need_endbr = (flag_cf_protection & CF_BRANCH);
16606
16607 fnaddr = XEXP (DECL_RTL (fndecl), 0);
16608
16609 if (TARGET_64BIT)
16610 {
16611 int size;
16612
16613 if (need_endbr)
16614 {
16615 /* Insert ENDBR64. */
16616 mem = adjust_address (m_tramp, SImode, offset);
16617 emit_move_insn (mem, gen_int_mode (0xfa1e0ff3, SImode));
16618 offset += 4;
16619 }
16620
16621 /* Load the function address to r11. Try to load address using
16622 the shorter movl instead of movabs. We may want to support
16623 movq for kernel mode, but kernel does not use trampolines at
16624 the moment. FNADDR is a 32bit address and may not be in
16625 DImode when ptr_mode == SImode. Always use movl in this
16626 case. */
16627 if (ptr_mode == SImode
16628 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
16629 {
16630 fnaddr = copy_addr_to_reg (fnaddr);
16631
16632 mem = adjust_address (m_tramp, HImode, offset);
16633 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
16634
16635 mem = adjust_address (m_tramp, SImode, offset + 2);
16636 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
16637 offset += 6;
16638 }
16639 else
16640 {
16641 mem = adjust_address (m_tramp, HImode, offset);
16642 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
16643
16644 mem = adjust_address (m_tramp, DImode, offset + 2);
16645 emit_move_insn (mem, fnaddr);
16646 offset += 10;
16647 }
16648
16649 /* Load static chain using movabs to r10. Use the shorter movl
16650 instead of movabs when ptr_mode == SImode. */
16651 if (ptr_mode == SImode)
16652 {
16653 opcode = 0xba41;
16654 size = 6;
16655 }
16656 else
16657 {
16658 opcode = 0xba49;
16659 size = 10;
16660 }
16661
16662 mem = adjust_address (m_tramp, HImode, offset);
16663 emit_move_insn (mem, gen_int_mode (opcode, HImode));
16664
16665 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
16666 emit_move_insn (mem, chain_value);
16667 offset += size;
16668
16669 /* Jump to r11; the last (unused) byte is a nop, only there to
16670 pad the write out to a single 32-bit store. */
16671 mem = adjust_address (m_tramp, SImode, offset);
16672 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
16673 offset += 4;
16674 }
16675 else
16676 {
16677 rtx disp, chain;
16678
16679 /* Depending on the static chain location, either load a register
16680 with a constant, or push the constant to the stack. All of the
16681 instructions are the same size. */
16682 chain = ix86_static_chain (fndecl, true);
16683 if (REG_P (chain))
16684 {
16685 switch (REGNO (chain))
16686 {
16687 case AX_REG:
16688 opcode = 0xb8; break;
16689 case CX_REG:
16690 opcode = 0xb9; break;
16691 default:
16692 gcc_unreachable ();
16693 }
16694 }
16695 else
16696 opcode = 0x68;
16697
16698 if (need_endbr)
16699 {
16700 /* Insert ENDBR32. */
16701 mem = adjust_address (m_tramp, SImode, offset);
16702 emit_move_insn (mem, gen_int_mode (0xfb1e0ff3, SImode));
16703 offset += 4;
16704 }
16705
16706 mem = adjust_address (m_tramp, QImode, offset);
16707 emit_move_insn (mem, gen_int_mode (opcode, QImode));
16708
16709 mem = adjust_address (m_tramp, SImode, offset + 1);
16710 emit_move_insn (mem, chain_value);
16711 offset += 5;
16712
16713 mem = adjust_address (m_tramp, QImode, offset);
16714 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
16715
16716 mem = adjust_address (m_tramp, SImode, offset + 1);
16717
16718 /* Compute offset from the end of the jmp to the target function.
16719 In the case in which the trampoline stores the static chain on
16720 the stack, we need to skip the first insn which pushes the
16721 (call-saved) register static chain; this push is 1 byte. */
16722 offset += 5;
16723 disp = expand_binop (SImode, sub_optab, fnaddr,
16724 plus_constant (Pmode, XEXP (m_tramp, 0),
16725 offset - (MEM_P (chain) ? 1 : 0)),
16726 NULL_RTX, 1, OPTAB_DIRECT);
16727 emit_move_insn (mem, disp);
16728 }
16729
16730 gcc_assert (offset <= TRAMPOLINE_SIZE);
16731
16732 #ifdef HAVE_ENABLE_EXECUTE_STACK
16733 #ifdef CHECK_EXECUTE_STACK_ENABLED
16734 if (CHECK_EXECUTE_STACK_ENABLED)
16735 #endif
16736 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
16737 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
16738 #endif
16739 }
16740
16741 static bool
16742 ix86_allocate_stack_slots_for_args (void)
16743 {
16744 /* Naked functions should not allocate stack slots for arguments. */
16745 return !ix86_function_naked (current_function_decl);
16746 }
16747
16748 static bool
16749 ix86_warn_func_return (tree decl)
16750 {
16751 /* Naked functions are implemented entirely in assembly, including the
16752 return sequence, so suppress warnings about this. */
16753 return !ix86_function_naked (decl);
16754 }
16755 \f
16756 /* Return the shift count of a vector by scalar shift builtin second argument
16757 ARG1. */
16758 static tree
16759 ix86_vector_shift_count (tree arg1)
16760 {
16761 if (tree_fits_uhwi_p (arg1))
16762 return arg1;
16763 else if (TREE_CODE (arg1) == VECTOR_CST && CHAR_BIT == 8)
16764 {
16765 /* The count argument is weird, passed in as various 128-bit
16766 (or 64-bit) vectors, the low 64 bits from it are the count. */
16767 unsigned char buf[16];
16768 int len = native_encode_expr (arg1, buf, 16);
16769 if (len == 0)
16770 return NULL_TREE;
16771 tree t = native_interpret_expr (uint64_type_node, buf, len);
16772 if (t && tree_fits_uhwi_p (t))
16773 return t;
16774 }
16775 return NULL_TREE;
16776 }
16777
16778 static tree
16779 ix86_fold_builtin (tree fndecl, int n_args,
16780 tree *args, bool ignore ATTRIBUTE_UNUSED)
16781 {
16782 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
16783 {
16784 enum ix86_builtins fn_code
16785 = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl);
16786 enum rtx_code rcode;
16787 bool is_vshift;
16788 unsigned HOST_WIDE_INT mask;
16789
16790 switch (fn_code)
16791 {
16792 case IX86_BUILTIN_CPU_IS:
16793 case IX86_BUILTIN_CPU_SUPPORTS:
16794 gcc_assert (n_args == 1);
16795 return fold_builtin_cpu (fndecl, args);
16796
16797 case IX86_BUILTIN_NANQ:
16798 case IX86_BUILTIN_NANSQ:
16799 {
16800 tree type = TREE_TYPE (TREE_TYPE (fndecl));
16801 const char *str = c_getstr (*args);
16802 int quiet = fn_code == IX86_BUILTIN_NANQ;
16803 REAL_VALUE_TYPE real;
16804
16805 if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
16806 return build_real (type, real);
16807 return NULL_TREE;
16808 }
16809
16810 case IX86_BUILTIN_INFQ:
16811 case IX86_BUILTIN_HUGE_VALQ:
16812 {
16813 tree type = TREE_TYPE (TREE_TYPE (fndecl));
16814 REAL_VALUE_TYPE inf;
16815 real_inf (&inf);
16816 return build_real (type, inf);
16817 }
16818
16819 case IX86_BUILTIN_TZCNT16:
16820 case IX86_BUILTIN_CTZS:
16821 case IX86_BUILTIN_TZCNT32:
16822 case IX86_BUILTIN_TZCNT64:
16823 gcc_assert (n_args == 1);
16824 if (TREE_CODE (args[0]) == INTEGER_CST)
16825 {
16826 tree type = TREE_TYPE (TREE_TYPE (fndecl));
16827 tree arg = args[0];
16828 if (fn_code == IX86_BUILTIN_TZCNT16
16829 || fn_code == IX86_BUILTIN_CTZS)
16830 arg = fold_convert (short_unsigned_type_node, arg);
16831 if (integer_zerop (arg))
16832 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
16833 else
16834 return fold_const_call (CFN_CTZ, type, arg);
16835 }
16836 break;
16837
16838 case IX86_BUILTIN_LZCNT16:
16839 case IX86_BUILTIN_CLZS:
16840 case IX86_BUILTIN_LZCNT32:
16841 case IX86_BUILTIN_LZCNT64:
16842 gcc_assert (n_args == 1);
16843 if (TREE_CODE (args[0]) == INTEGER_CST)
16844 {
16845 tree type = TREE_TYPE (TREE_TYPE (fndecl));
16846 tree arg = args[0];
16847 if (fn_code == IX86_BUILTIN_LZCNT16
16848 || fn_code == IX86_BUILTIN_CLZS)
16849 arg = fold_convert (short_unsigned_type_node, arg);
16850 if (integer_zerop (arg))
16851 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
16852 else
16853 return fold_const_call (CFN_CLZ, type, arg);
16854 }
16855 break;
16856
16857 case IX86_BUILTIN_BEXTR32:
16858 case IX86_BUILTIN_BEXTR64:
16859 case IX86_BUILTIN_BEXTRI32:
16860 case IX86_BUILTIN_BEXTRI64:
16861 gcc_assert (n_args == 2);
16862 if (tree_fits_uhwi_p (args[1]))
16863 {
16864 unsigned HOST_WIDE_INT res = 0;
16865 unsigned int prec = TYPE_PRECISION (TREE_TYPE (args[0]));
16866 unsigned int start = tree_to_uhwi (args[1]);
16867 unsigned int len = (start & 0xff00) >> 8;
16868 start &= 0xff;
16869 if (start >= prec || len == 0)
16870 res = 0;
16871 else if (!tree_fits_uhwi_p (args[0]))
16872 break;
16873 else
16874 res = tree_to_uhwi (args[0]) >> start;
16875 if (len > prec)
16876 len = prec;
16877 if (len < HOST_BITS_PER_WIDE_INT)
16878 res &= (HOST_WIDE_INT_1U << len) - 1;
16879 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
16880 }
16881 break;
16882
16883 case IX86_BUILTIN_BZHI32:
16884 case IX86_BUILTIN_BZHI64:
16885 gcc_assert (n_args == 2);
16886 if (tree_fits_uhwi_p (args[1]))
16887 {
16888 unsigned int idx = tree_to_uhwi (args[1]) & 0xff;
16889 if (idx >= TYPE_PRECISION (TREE_TYPE (args[0])))
16890 return args[0];
16891 if (idx == 0)
16892 return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), 0);
16893 if (!tree_fits_uhwi_p (args[0]))
16894 break;
16895 unsigned HOST_WIDE_INT res = tree_to_uhwi (args[0]);
16896 res &= ~(HOST_WIDE_INT_M1U << idx);
16897 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
16898 }
16899 break;
16900
16901 case IX86_BUILTIN_PDEP32:
16902 case IX86_BUILTIN_PDEP64:
16903 gcc_assert (n_args == 2);
16904 if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
16905 {
16906 unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
16907 unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
16908 unsigned HOST_WIDE_INT res = 0;
16909 unsigned HOST_WIDE_INT m, k = 1;
16910 for (m = 1; m; m <<= 1)
16911 if ((mask & m) != 0)
16912 {
16913 if ((src & k) != 0)
16914 res |= m;
16915 k <<= 1;
16916 }
16917 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
16918 }
16919 break;
16920
16921 case IX86_BUILTIN_PEXT32:
16922 case IX86_BUILTIN_PEXT64:
16923 gcc_assert (n_args == 2);
16924 if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
16925 {
16926 unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
16927 unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
16928 unsigned HOST_WIDE_INT res = 0;
16929 unsigned HOST_WIDE_INT m, k = 1;
16930 for (m = 1; m; m <<= 1)
16931 if ((mask & m) != 0)
16932 {
16933 if ((src & m) != 0)
16934 res |= k;
16935 k <<= 1;
16936 }
16937 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
16938 }
16939 break;
16940
16941 case IX86_BUILTIN_MOVMSKPS:
16942 case IX86_BUILTIN_PMOVMSKB:
16943 case IX86_BUILTIN_MOVMSKPD:
16944 case IX86_BUILTIN_PMOVMSKB128:
16945 case IX86_BUILTIN_MOVMSKPD256:
16946 case IX86_BUILTIN_MOVMSKPS256:
16947 case IX86_BUILTIN_PMOVMSKB256:
16948 gcc_assert (n_args == 1);
16949 if (TREE_CODE (args[0]) == VECTOR_CST)
16950 {
16951 HOST_WIDE_INT res = 0;
16952 for (unsigned i = 0; i < VECTOR_CST_NELTS (args[0]); ++i)
16953 {
16954 tree e = VECTOR_CST_ELT (args[0], i);
16955 if (TREE_CODE (e) == INTEGER_CST && !TREE_OVERFLOW (e))
16956 {
16957 if (wi::neg_p (wi::to_wide (e)))
16958 res |= HOST_WIDE_INT_1 << i;
16959 }
16960 else if (TREE_CODE (e) == REAL_CST && !TREE_OVERFLOW (e))
16961 {
16962 if (TREE_REAL_CST (e).sign)
16963 res |= HOST_WIDE_INT_1 << i;
16964 }
16965 else
16966 return NULL_TREE;
16967 }
16968 return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), res);
16969 }
16970 break;
16971
16972 case IX86_BUILTIN_PSLLD:
16973 case IX86_BUILTIN_PSLLD128:
16974 case IX86_BUILTIN_PSLLD128_MASK:
16975 case IX86_BUILTIN_PSLLD256:
16976 case IX86_BUILTIN_PSLLD256_MASK:
16977 case IX86_BUILTIN_PSLLD512:
16978 case IX86_BUILTIN_PSLLDI:
16979 case IX86_BUILTIN_PSLLDI128:
16980 case IX86_BUILTIN_PSLLDI128_MASK:
16981 case IX86_BUILTIN_PSLLDI256:
16982 case IX86_BUILTIN_PSLLDI256_MASK:
16983 case IX86_BUILTIN_PSLLDI512:
16984 case IX86_BUILTIN_PSLLQ:
16985 case IX86_BUILTIN_PSLLQ128:
16986 case IX86_BUILTIN_PSLLQ128_MASK:
16987 case IX86_BUILTIN_PSLLQ256:
16988 case IX86_BUILTIN_PSLLQ256_MASK:
16989 case IX86_BUILTIN_PSLLQ512:
16990 case IX86_BUILTIN_PSLLQI:
16991 case IX86_BUILTIN_PSLLQI128:
16992 case IX86_BUILTIN_PSLLQI128_MASK:
16993 case IX86_BUILTIN_PSLLQI256:
16994 case IX86_BUILTIN_PSLLQI256_MASK:
16995 case IX86_BUILTIN_PSLLQI512:
16996 case IX86_BUILTIN_PSLLW:
16997 case IX86_BUILTIN_PSLLW128:
16998 case IX86_BUILTIN_PSLLW128_MASK:
16999 case IX86_BUILTIN_PSLLW256:
17000 case IX86_BUILTIN_PSLLW256_MASK:
17001 case IX86_BUILTIN_PSLLW512_MASK:
17002 case IX86_BUILTIN_PSLLWI:
17003 case IX86_BUILTIN_PSLLWI128:
17004 case IX86_BUILTIN_PSLLWI128_MASK:
17005 case IX86_BUILTIN_PSLLWI256:
17006 case IX86_BUILTIN_PSLLWI256_MASK:
17007 case IX86_BUILTIN_PSLLWI512_MASK:
17008 rcode = ASHIFT;
17009 is_vshift = false;
17010 goto do_shift;
17011 case IX86_BUILTIN_PSRAD:
17012 case IX86_BUILTIN_PSRAD128:
17013 case IX86_BUILTIN_PSRAD128_MASK:
17014 case IX86_BUILTIN_PSRAD256:
17015 case IX86_BUILTIN_PSRAD256_MASK:
17016 case IX86_BUILTIN_PSRAD512:
17017 case IX86_BUILTIN_PSRADI:
17018 case IX86_BUILTIN_PSRADI128:
17019 case IX86_BUILTIN_PSRADI128_MASK:
17020 case IX86_BUILTIN_PSRADI256:
17021 case IX86_BUILTIN_PSRADI256_MASK:
17022 case IX86_BUILTIN_PSRADI512:
17023 case IX86_BUILTIN_PSRAQ128_MASK:
17024 case IX86_BUILTIN_PSRAQ256_MASK:
17025 case IX86_BUILTIN_PSRAQ512:
17026 case IX86_BUILTIN_PSRAQI128_MASK:
17027 case IX86_BUILTIN_PSRAQI256_MASK:
17028 case IX86_BUILTIN_PSRAQI512:
17029 case IX86_BUILTIN_PSRAW:
17030 case IX86_BUILTIN_PSRAW128:
17031 case IX86_BUILTIN_PSRAW128_MASK:
17032 case IX86_BUILTIN_PSRAW256:
17033 case IX86_BUILTIN_PSRAW256_MASK:
17034 case IX86_BUILTIN_PSRAW512:
17035 case IX86_BUILTIN_PSRAWI:
17036 case IX86_BUILTIN_PSRAWI128:
17037 case IX86_BUILTIN_PSRAWI128_MASK:
17038 case IX86_BUILTIN_PSRAWI256:
17039 case IX86_BUILTIN_PSRAWI256_MASK:
17040 case IX86_BUILTIN_PSRAWI512:
17041 rcode = ASHIFTRT;
17042 is_vshift = false;
17043 goto do_shift;
17044 case IX86_BUILTIN_PSRLD:
17045 case IX86_BUILTIN_PSRLD128:
17046 case IX86_BUILTIN_PSRLD128_MASK:
17047 case IX86_BUILTIN_PSRLD256:
17048 case IX86_BUILTIN_PSRLD256_MASK:
17049 case IX86_BUILTIN_PSRLD512:
17050 case IX86_BUILTIN_PSRLDI:
17051 case IX86_BUILTIN_PSRLDI128:
17052 case IX86_BUILTIN_PSRLDI128_MASK:
17053 case IX86_BUILTIN_PSRLDI256:
17054 case IX86_BUILTIN_PSRLDI256_MASK:
17055 case IX86_BUILTIN_PSRLDI512:
17056 case IX86_BUILTIN_PSRLQ:
17057 case IX86_BUILTIN_PSRLQ128:
17058 case IX86_BUILTIN_PSRLQ128_MASK:
17059 case IX86_BUILTIN_PSRLQ256:
17060 case IX86_BUILTIN_PSRLQ256_MASK:
17061 case IX86_BUILTIN_PSRLQ512:
17062 case IX86_BUILTIN_PSRLQI:
17063 case IX86_BUILTIN_PSRLQI128:
17064 case IX86_BUILTIN_PSRLQI128_MASK:
17065 case IX86_BUILTIN_PSRLQI256:
17066 case IX86_BUILTIN_PSRLQI256_MASK:
17067 case IX86_BUILTIN_PSRLQI512:
17068 case IX86_BUILTIN_PSRLW:
17069 case IX86_BUILTIN_PSRLW128:
17070 case IX86_BUILTIN_PSRLW128_MASK:
17071 case IX86_BUILTIN_PSRLW256:
17072 case IX86_BUILTIN_PSRLW256_MASK:
17073 case IX86_BUILTIN_PSRLW512:
17074 case IX86_BUILTIN_PSRLWI:
17075 case IX86_BUILTIN_PSRLWI128:
17076 case IX86_BUILTIN_PSRLWI128_MASK:
17077 case IX86_BUILTIN_PSRLWI256:
17078 case IX86_BUILTIN_PSRLWI256_MASK:
17079 case IX86_BUILTIN_PSRLWI512:
17080 rcode = LSHIFTRT;
17081 is_vshift = false;
17082 goto do_shift;
17083 case IX86_BUILTIN_PSLLVV16HI:
17084 case IX86_BUILTIN_PSLLVV16SI:
17085 case IX86_BUILTIN_PSLLVV2DI:
17086 case IX86_BUILTIN_PSLLVV2DI_MASK:
17087 case IX86_BUILTIN_PSLLVV32HI:
17088 case IX86_BUILTIN_PSLLVV4DI:
17089 case IX86_BUILTIN_PSLLVV4DI_MASK:
17090 case IX86_BUILTIN_PSLLVV4SI:
17091 case IX86_BUILTIN_PSLLVV4SI_MASK:
17092 case IX86_BUILTIN_PSLLVV8DI:
17093 case IX86_BUILTIN_PSLLVV8HI:
17094 case IX86_BUILTIN_PSLLVV8SI:
17095 case IX86_BUILTIN_PSLLVV8SI_MASK:
17096 rcode = ASHIFT;
17097 is_vshift = true;
17098 goto do_shift;
17099 case IX86_BUILTIN_PSRAVQ128:
17100 case IX86_BUILTIN_PSRAVQ256:
17101 case IX86_BUILTIN_PSRAVV16HI:
17102 case IX86_BUILTIN_PSRAVV16SI:
17103 case IX86_BUILTIN_PSRAVV32HI:
17104 case IX86_BUILTIN_PSRAVV4SI:
17105 case IX86_BUILTIN_PSRAVV4SI_MASK:
17106 case IX86_BUILTIN_PSRAVV8DI:
17107 case IX86_BUILTIN_PSRAVV8HI:
17108 case IX86_BUILTIN_PSRAVV8SI:
17109 case IX86_BUILTIN_PSRAVV8SI_MASK:
17110 rcode = ASHIFTRT;
17111 is_vshift = true;
17112 goto do_shift;
17113 case IX86_BUILTIN_PSRLVV16HI:
17114 case IX86_BUILTIN_PSRLVV16SI:
17115 case IX86_BUILTIN_PSRLVV2DI:
17116 case IX86_BUILTIN_PSRLVV2DI_MASK:
17117 case IX86_BUILTIN_PSRLVV32HI:
17118 case IX86_BUILTIN_PSRLVV4DI:
17119 case IX86_BUILTIN_PSRLVV4DI_MASK:
17120 case IX86_BUILTIN_PSRLVV4SI:
17121 case IX86_BUILTIN_PSRLVV4SI_MASK:
17122 case IX86_BUILTIN_PSRLVV8DI:
17123 case IX86_BUILTIN_PSRLVV8HI:
17124 case IX86_BUILTIN_PSRLVV8SI:
17125 case IX86_BUILTIN_PSRLVV8SI_MASK:
17126 rcode = LSHIFTRT;
17127 is_vshift = true;
17128 goto do_shift;
17129
17130 do_shift:
17131 gcc_assert (n_args >= 2);
17132 if (TREE_CODE (args[0]) != VECTOR_CST)
17133 break;
17134 mask = HOST_WIDE_INT_M1U;
17135 if (n_args > 2)
17136 {
17137 /* This is masked shift. */
17138 if (!tree_fits_uhwi_p (args[n_args - 1])
17139 || TREE_SIDE_EFFECTS (args[n_args - 2]))
17140 break;
17141 mask = tree_to_uhwi (args[n_args - 1]);
17142 unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
17143 mask |= HOST_WIDE_INT_M1U << elems;
17144 if (mask != HOST_WIDE_INT_M1U
17145 && TREE_CODE (args[n_args - 2]) != VECTOR_CST)
17146 break;
17147 if (mask == (HOST_WIDE_INT_M1U << elems))
17148 return args[n_args - 2];
17149 }
17150 if (is_vshift && TREE_CODE (args[1]) != VECTOR_CST)
17151 break;
17152 if (tree tem = (is_vshift ? integer_one_node
17153 : ix86_vector_shift_count (args[1])))
17154 {
17155 unsigned HOST_WIDE_INT count = tree_to_uhwi (tem);
17156 unsigned HOST_WIDE_INT prec
17157 = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args[0])));
17158 if (count == 0 && mask == HOST_WIDE_INT_M1U)
17159 return args[0];
17160 if (count >= prec)
17161 {
17162 if (rcode == ASHIFTRT)
17163 count = prec - 1;
17164 else if (mask == HOST_WIDE_INT_M1U)
17165 return build_zero_cst (TREE_TYPE (args[0]));
17166 }
17167 tree countt = NULL_TREE;
17168 if (!is_vshift)
17169 {
17170 if (count >= prec)
17171 countt = integer_zero_node;
17172 else
17173 countt = build_int_cst (integer_type_node, count);
17174 }
17175 tree_vector_builder builder;
17176 builder.new_unary_operation (TREE_TYPE (args[0]), args[0],
17177 false);
17178 unsigned int cnt = builder.encoded_nelts ();
17179 for (unsigned int i = 0; i < cnt; ++i)
17180 {
17181 tree elt = VECTOR_CST_ELT (args[0], i);
17182 if (TREE_CODE (elt) != INTEGER_CST || TREE_OVERFLOW (elt))
17183 return NULL_TREE;
17184 tree type = TREE_TYPE (elt);
17185 if (rcode == LSHIFTRT)
17186 elt = fold_convert (unsigned_type_for (type), elt);
17187 if (is_vshift)
17188 {
17189 countt = VECTOR_CST_ELT (args[1], i);
17190 if (TREE_CODE (countt) != INTEGER_CST
17191 || TREE_OVERFLOW (countt))
17192 return NULL_TREE;
17193 if (wi::neg_p (wi::to_wide (countt))
17194 || wi::to_widest (countt) >= prec)
17195 {
17196 if (rcode == ASHIFTRT)
17197 countt = build_int_cst (TREE_TYPE (countt),
17198 prec - 1);
17199 else
17200 {
17201 elt = build_zero_cst (TREE_TYPE (elt));
17202 countt = build_zero_cst (TREE_TYPE (countt));
17203 }
17204 }
17205 }
17206 else if (count >= prec)
17207 elt = build_zero_cst (TREE_TYPE (elt));
17208 elt = const_binop (rcode == ASHIFT
17209 ? LSHIFT_EXPR : RSHIFT_EXPR,
17210 TREE_TYPE (elt), elt, countt);
17211 if (!elt || TREE_CODE (elt) != INTEGER_CST)
17212 return NULL_TREE;
17213 if (rcode == LSHIFTRT)
17214 elt = fold_convert (type, elt);
17215 if ((mask & (HOST_WIDE_INT_1U << i)) == 0)
17216 {
17217 elt = VECTOR_CST_ELT (args[n_args - 2], i);
17218 if (TREE_CODE (elt) != INTEGER_CST
17219 || TREE_OVERFLOW (elt))
17220 return NULL_TREE;
17221 }
17222 builder.quick_push (elt);
17223 }
17224 return builder.build ();
17225 }
17226 break;
17227
17228 default:
17229 break;
17230 }
17231 }
17232
17233 #ifdef SUBTARGET_FOLD_BUILTIN
17234 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
17235 #endif
17236
17237 return NULL_TREE;
17238 }
17239
17240 /* Fold a MD builtin (use ix86_fold_builtin for folding into
17241 constant) in GIMPLE. */
17242
17243 bool
17244 ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
17245 {
17246 gimple *stmt = gsi_stmt (*gsi);
17247 tree fndecl = gimple_call_fndecl (stmt);
17248 gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD));
17249 int n_args = gimple_call_num_args (stmt);
17250 enum ix86_builtins fn_code
17251 = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl);
17252 tree decl = NULL_TREE;
17253 tree arg0, arg1, arg2;
17254 enum rtx_code rcode;
17255 unsigned HOST_WIDE_INT count;
17256 bool is_vshift;
17257
17258 switch (fn_code)
17259 {
17260 case IX86_BUILTIN_TZCNT32:
17261 decl = builtin_decl_implicit (BUILT_IN_CTZ);
17262 goto fold_tzcnt_lzcnt;
17263
17264 case IX86_BUILTIN_TZCNT64:
17265 decl = builtin_decl_implicit (BUILT_IN_CTZLL);
17266 goto fold_tzcnt_lzcnt;
17267
17268 case IX86_BUILTIN_LZCNT32:
17269 decl = builtin_decl_implicit (BUILT_IN_CLZ);
17270 goto fold_tzcnt_lzcnt;
17271
17272 case IX86_BUILTIN_LZCNT64:
17273 decl = builtin_decl_implicit (BUILT_IN_CLZLL);
17274 goto fold_tzcnt_lzcnt;
17275
17276 fold_tzcnt_lzcnt:
17277 gcc_assert (n_args == 1);
17278 arg0 = gimple_call_arg (stmt, 0);
17279 if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (stmt))
17280 {
17281 int prec = TYPE_PRECISION (TREE_TYPE (arg0));
17282 /* If arg0 is provably non-zero, optimize into generic
17283 __builtin_c[tl]z{,ll} function the middle-end handles
17284 better. */
17285 if (!expr_not_equal_to (arg0, wi::zero (prec)))
17286 return false;
17287
17288 location_t loc = gimple_location (stmt);
17289 gimple *g = gimple_build_call (decl, 1, arg0);
17290 gimple_set_location (g, loc);
17291 tree lhs = make_ssa_name (integer_type_node);
17292 gimple_call_set_lhs (g, lhs);
17293 gsi_insert_before (gsi, g, GSI_SAME_STMT);
17294 g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR, lhs);
17295 gimple_set_location (g, loc);
17296 gsi_replace (gsi, g, false);
17297 return true;
17298 }
17299 break;
17300
17301 case IX86_BUILTIN_BZHI32:
17302 case IX86_BUILTIN_BZHI64:
17303 gcc_assert (n_args == 2);
17304 arg1 = gimple_call_arg (stmt, 1);
17305 if (tree_fits_uhwi_p (arg1) && gimple_call_lhs (stmt))
17306 {
17307 unsigned int idx = tree_to_uhwi (arg1) & 0xff;
17308 arg0 = gimple_call_arg (stmt, 0);
17309 if (idx < TYPE_PRECISION (TREE_TYPE (arg0)))
17310 break;
17311 location_t loc = gimple_location (stmt);
17312 gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
17313 gimple_set_location (g, loc);
17314 gsi_replace (gsi, g, false);
17315 return true;
17316 }
17317 break;
17318
17319 case IX86_BUILTIN_PDEP32:
17320 case IX86_BUILTIN_PDEP64:
17321 case IX86_BUILTIN_PEXT32:
17322 case IX86_BUILTIN_PEXT64:
17323 gcc_assert (n_args == 2);
17324 arg1 = gimple_call_arg (stmt, 1);
17325 if (integer_all_onesp (arg1) && gimple_call_lhs (stmt))
17326 {
17327 location_t loc = gimple_location (stmt);
17328 arg0 = gimple_call_arg (stmt, 0);
17329 gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
17330 gimple_set_location (g, loc);
17331 gsi_replace (gsi, g, false);
17332 return true;
17333 }
17334 break;
17335
17336 case IX86_BUILTIN_PSLLD:
17337 case IX86_BUILTIN_PSLLD128:
17338 case IX86_BUILTIN_PSLLD128_MASK:
17339 case IX86_BUILTIN_PSLLD256:
17340 case IX86_BUILTIN_PSLLD256_MASK:
17341 case IX86_BUILTIN_PSLLD512:
17342 case IX86_BUILTIN_PSLLDI:
17343 case IX86_BUILTIN_PSLLDI128:
17344 case IX86_BUILTIN_PSLLDI128_MASK:
17345 case IX86_BUILTIN_PSLLDI256:
17346 case IX86_BUILTIN_PSLLDI256_MASK:
17347 case IX86_BUILTIN_PSLLDI512:
17348 case IX86_BUILTIN_PSLLQ:
17349 case IX86_BUILTIN_PSLLQ128:
17350 case IX86_BUILTIN_PSLLQ128_MASK:
17351 case IX86_BUILTIN_PSLLQ256:
17352 case IX86_BUILTIN_PSLLQ256_MASK:
17353 case IX86_BUILTIN_PSLLQ512:
17354 case IX86_BUILTIN_PSLLQI:
17355 case IX86_BUILTIN_PSLLQI128:
17356 case IX86_BUILTIN_PSLLQI128_MASK:
17357 case IX86_BUILTIN_PSLLQI256:
17358 case IX86_BUILTIN_PSLLQI256_MASK:
17359 case IX86_BUILTIN_PSLLQI512:
17360 case IX86_BUILTIN_PSLLW:
17361 case IX86_BUILTIN_PSLLW128:
17362 case IX86_BUILTIN_PSLLW128_MASK:
17363 case IX86_BUILTIN_PSLLW256:
17364 case IX86_BUILTIN_PSLLW256_MASK:
17365 case IX86_BUILTIN_PSLLW512_MASK:
17366 case IX86_BUILTIN_PSLLWI:
17367 case IX86_BUILTIN_PSLLWI128:
17368 case IX86_BUILTIN_PSLLWI128_MASK:
17369 case IX86_BUILTIN_PSLLWI256:
17370 case IX86_BUILTIN_PSLLWI256_MASK:
17371 case IX86_BUILTIN_PSLLWI512_MASK:
17372 rcode = ASHIFT;
17373 is_vshift = false;
17374 goto do_shift;
17375 case IX86_BUILTIN_PSRAD:
17376 case IX86_BUILTIN_PSRAD128:
17377 case IX86_BUILTIN_PSRAD128_MASK:
17378 case IX86_BUILTIN_PSRAD256:
17379 case IX86_BUILTIN_PSRAD256_MASK:
17380 case IX86_BUILTIN_PSRAD512:
17381 case IX86_BUILTIN_PSRADI:
17382 case IX86_BUILTIN_PSRADI128:
17383 case IX86_BUILTIN_PSRADI128_MASK:
17384 case IX86_BUILTIN_PSRADI256:
17385 case IX86_BUILTIN_PSRADI256_MASK:
17386 case IX86_BUILTIN_PSRADI512:
17387 case IX86_BUILTIN_PSRAQ128_MASK:
17388 case IX86_BUILTIN_PSRAQ256_MASK:
17389 case IX86_BUILTIN_PSRAQ512:
17390 case IX86_BUILTIN_PSRAQI128_MASK:
17391 case IX86_BUILTIN_PSRAQI256_MASK:
17392 case IX86_BUILTIN_PSRAQI512:
17393 case IX86_BUILTIN_PSRAW:
17394 case IX86_BUILTIN_PSRAW128:
17395 case IX86_BUILTIN_PSRAW128_MASK:
17396 case IX86_BUILTIN_PSRAW256:
17397 case IX86_BUILTIN_PSRAW256_MASK:
17398 case IX86_BUILTIN_PSRAW512:
17399 case IX86_BUILTIN_PSRAWI:
17400 case IX86_BUILTIN_PSRAWI128:
17401 case IX86_BUILTIN_PSRAWI128_MASK:
17402 case IX86_BUILTIN_PSRAWI256:
17403 case IX86_BUILTIN_PSRAWI256_MASK:
17404 case IX86_BUILTIN_PSRAWI512:
17405 rcode = ASHIFTRT;
17406 is_vshift = false;
17407 goto do_shift;
17408 case IX86_BUILTIN_PSRLD:
17409 case IX86_BUILTIN_PSRLD128:
17410 case IX86_BUILTIN_PSRLD128_MASK:
17411 case IX86_BUILTIN_PSRLD256:
17412 case IX86_BUILTIN_PSRLD256_MASK:
17413 case IX86_BUILTIN_PSRLD512:
17414 case IX86_BUILTIN_PSRLDI:
17415 case IX86_BUILTIN_PSRLDI128:
17416 case IX86_BUILTIN_PSRLDI128_MASK:
17417 case IX86_BUILTIN_PSRLDI256:
17418 case IX86_BUILTIN_PSRLDI256_MASK:
17419 case IX86_BUILTIN_PSRLDI512:
17420 case IX86_BUILTIN_PSRLQ:
17421 case IX86_BUILTIN_PSRLQ128:
17422 case IX86_BUILTIN_PSRLQ128_MASK:
17423 case IX86_BUILTIN_PSRLQ256:
17424 case IX86_BUILTIN_PSRLQ256_MASK:
17425 case IX86_BUILTIN_PSRLQ512:
17426 case IX86_BUILTIN_PSRLQI:
17427 case IX86_BUILTIN_PSRLQI128:
17428 case IX86_BUILTIN_PSRLQI128_MASK:
17429 case IX86_BUILTIN_PSRLQI256:
17430 case IX86_BUILTIN_PSRLQI256_MASK:
17431 case IX86_BUILTIN_PSRLQI512:
17432 case IX86_BUILTIN_PSRLW:
17433 case IX86_BUILTIN_PSRLW128:
17434 case IX86_BUILTIN_PSRLW128_MASK:
17435 case IX86_BUILTIN_PSRLW256:
17436 case IX86_BUILTIN_PSRLW256_MASK:
17437 case IX86_BUILTIN_PSRLW512:
17438 case IX86_BUILTIN_PSRLWI:
17439 case IX86_BUILTIN_PSRLWI128:
17440 case IX86_BUILTIN_PSRLWI128_MASK:
17441 case IX86_BUILTIN_PSRLWI256:
17442 case IX86_BUILTIN_PSRLWI256_MASK:
17443 case IX86_BUILTIN_PSRLWI512:
17444 rcode = LSHIFTRT;
17445 is_vshift = false;
17446 goto do_shift;
17447 case IX86_BUILTIN_PSLLVV16HI:
17448 case IX86_BUILTIN_PSLLVV16SI:
17449 case IX86_BUILTIN_PSLLVV2DI:
17450 case IX86_BUILTIN_PSLLVV2DI_MASK:
17451 case IX86_BUILTIN_PSLLVV32HI:
17452 case IX86_BUILTIN_PSLLVV4DI:
17453 case IX86_BUILTIN_PSLLVV4DI_MASK:
17454 case IX86_BUILTIN_PSLLVV4SI:
17455 case IX86_BUILTIN_PSLLVV4SI_MASK:
17456 case IX86_BUILTIN_PSLLVV8DI:
17457 case IX86_BUILTIN_PSLLVV8HI:
17458 case IX86_BUILTIN_PSLLVV8SI:
17459 case IX86_BUILTIN_PSLLVV8SI_MASK:
17460 rcode = ASHIFT;
17461 is_vshift = true;
17462 goto do_shift;
17463 case IX86_BUILTIN_PSRAVQ128:
17464 case IX86_BUILTIN_PSRAVQ256:
17465 case IX86_BUILTIN_PSRAVV16HI:
17466 case IX86_BUILTIN_PSRAVV16SI:
17467 case IX86_BUILTIN_PSRAVV32HI:
17468 case IX86_BUILTIN_PSRAVV4SI:
17469 case IX86_BUILTIN_PSRAVV4SI_MASK:
17470 case IX86_BUILTIN_PSRAVV8DI:
17471 case IX86_BUILTIN_PSRAVV8HI:
17472 case IX86_BUILTIN_PSRAVV8SI:
17473 case IX86_BUILTIN_PSRAVV8SI_MASK:
17474 rcode = ASHIFTRT;
17475 is_vshift = true;
17476 goto do_shift;
17477 case IX86_BUILTIN_PSRLVV16HI:
17478 case IX86_BUILTIN_PSRLVV16SI:
17479 case IX86_BUILTIN_PSRLVV2DI:
17480 case IX86_BUILTIN_PSRLVV2DI_MASK:
17481 case IX86_BUILTIN_PSRLVV32HI:
17482 case IX86_BUILTIN_PSRLVV4DI:
17483 case IX86_BUILTIN_PSRLVV4DI_MASK:
17484 case IX86_BUILTIN_PSRLVV4SI:
17485 case IX86_BUILTIN_PSRLVV4SI_MASK:
17486 case IX86_BUILTIN_PSRLVV8DI:
17487 case IX86_BUILTIN_PSRLVV8HI:
17488 case IX86_BUILTIN_PSRLVV8SI:
17489 case IX86_BUILTIN_PSRLVV8SI_MASK:
17490 rcode = LSHIFTRT;
17491 is_vshift = true;
17492 goto do_shift;
17493
17494 do_shift:
17495 gcc_assert (n_args >= 2);
17496 arg0 = gimple_call_arg (stmt, 0);
17497 arg1 = gimple_call_arg (stmt, 1);
17498 if (n_args > 2)
17499 {
17500 /* This is masked shift. Only optimize if the mask is all ones. */
17501 tree argl = gimple_call_arg (stmt, n_args - 1);
17502 if (!tree_fits_uhwi_p (argl))
17503 break;
17504 unsigned HOST_WIDE_INT mask = tree_to_uhwi (argl);
17505 unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
17506 if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U)
17507 break;
17508 }
17509 if (is_vshift)
17510 {
17511 if (TREE_CODE (arg1) != VECTOR_CST)
17512 break;
17513 count = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0)));
17514 if (integer_zerop (arg1))
17515 count = 0;
17516 else if (rcode == ASHIFTRT)
17517 break;
17518 else
17519 for (unsigned int i = 0; i < VECTOR_CST_NELTS (arg1); ++i)
17520 {
17521 tree elt = VECTOR_CST_ELT (arg1, i);
17522 if (!wi::neg_p (wi::to_wide (elt))
17523 && wi::to_widest (elt) < count)
17524 return false;
17525 }
17526 }
17527 else
17528 {
17529 arg1 = ix86_vector_shift_count (arg1);
17530 if (!arg1)
17531 break;
17532 count = tree_to_uhwi (arg1);
17533 }
17534 if (count == 0)
17535 {
17536 /* Just return the first argument for shift by 0. */
17537 location_t loc = gimple_location (stmt);
17538 gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
17539 gimple_set_location (g, loc);
17540 gsi_replace (gsi, g, false);
17541 return true;
17542 }
17543 if (rcode != ASHIFTRT
17544 && count >= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0))))
17545 {
17546 /* For shift counts equal or greater than precision, except for
17547 arithmetic right shift the result is zero. */
17548 location_t loc = gimple_location (stmt);
17549 gimple *g = gimple_build_assign (gimple_call_lhs (stmt),
17550 build_zero_cst (TREE_TYPE (arg0)));
17551 gimple_set_location (g, loc);
17552 gsi_replace (gsi, g, false);
17553 return true;
17554 }
17555 break;
17556
17557 case IX86_BUILTIN_SHUFPD:
17558 arg2 = gimple_call_arg (stmt, 2);
17559 if (TREE_CODE (arg2) == INTEGER_CST)
17560 {
17561 location_t loc = gimple_location (stmt);
17562 unsigned HOST_WIDE_INT imask = TREE_INT_CST_LOW (arg2);
17563 arg0 = gimple_call_arg (stmt, 0);
17564 arg1 = gimple_call_arg (stmt, 1);
17565 tree itype = long_long_integer_type_node;
17566 tree vtype = build_vector_type (itype, 2); /* V2DI */
17567 tree_vector_builder elts (vtype, 2, 1);
17568 /* Ignore bits other than the lowest 2. */
17569 elts.quick_push (build_int_cst (itype, imask & 1));
17570 imask >>= 1;
17571 elts.quick_push (build_int_cst (itype, 2 + (imask & 1)));
17572 tree omask = elts.build ();
17573 gimple *g = gimple_build_assign (gimple_call_lhs (stmt),
17574 VEC_PERM_EXPR,
17575 arg0, arg1, omask);
17576 gimple_set_location (g, loc);
17577 gsi_replace (gsi, g, false);
17578 return true;
17579 }
17580 // Do not error yet, the constant could be propagated later?
17581 break;
17582
17583 default:
17584 break;
17585 }
17586
17587 return false;
17588 }
17589
17590 /* Handler for an SVML-style interface to
17591 a library with vectorized intrinsics. */
17592
17593 tree
17594 ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in)
17595 {
17596 char name[20];
17597 tree fntype, new_fndecl, args;
17598 unsigned arity;
17599 const char *bname;
17600 machine_mode el_mode, in_mode;
17601 int n, in_n;
17602
17603 /* The SVML is suitable for unsafe math only. */
17604 if (!flag_unsafe_math_optimizations)
17605 return NULL_TREE;
17606
17607 el_mode = TYPE_MODE (TREE_TYPE (type_out));
17608 n = TYPE_VECTOR_SUBPARTS (type_out);
17609 in_mode = TYPE_MODE (TREE_TYPE (type_in));
17610 in_n = TYPE_VECTOR_SUBPARTS (type_in);
17611 if (el_mode != in_mode
17612 || n != in_n)
17613 return NULL_TREE;
17614
17615 switch (fn)
17616 {
17617 CASE_CFN_EXP:
17618 CASE_CFN_LOG:
17619 CASE_CFN_LOG10:
17620 CASE_CFN_POW:
17621 CASE_CFN_TANH:
17622 CASE_CFN_TAN:
17623 CASE_CFN_ATAN:
17624 CASE_CFN_ATAN2:
17625 CASE_CFN_ATANH:
17626 CASE_CFN_CBRT:
17627 CASE_CFN_SINH:
17628 CASE_CFN_SIN:
17629 CASE_CFN_ASINH:
17630 CASE_CFN_ASIN:
17631 CASE_CFN_COSH:
17632 CASE_CFN_COS:
17633 CASE_CFN_ACOSH:
17634 CASE_CFN_ACOS:
17635 if ((el_mode != DFmode || n != 2)
17636 && (el_mode != SFmode || n != 4))
17637 return NULL_TREE;
17638 break;
17639
17640 default:
17641 return NULL_TREE;
17642 }
17643
17644 tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
17645 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
17646
17647 if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOGF)
17648 strcpy (name, "vmlsLn4");
17649 else if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOG)
17650 strcpy (name, "vmldLn2");
17651 else if (n == 4)
17652 {
17653 sprintf (name, "vmls%s", bname+10);
17654 name[strlen (name)-1] = '4';
17655 }
17656 else
17657 sprintf (name, "vmld%s2", bname+10);
17658
17659 /* Convert to uppercase. */
17660 name[4] &= ~0x20;
17661
17662 arity = 0;
17663 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
17664 arity++;
17665
17666 if (arity == 1)
17667 fntype = build_function_type_list (type_out, type_in, NULL);
17668 else
17669 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
17670
17671 /* Build a function declaration for the vectorized function. */
17672 new_fndecl = build_decl (BUILTINS_LOCATION,
17673 FUNCTION_DECL, get_identifier (name), fntype);
17674 TREE_PUBLIC (new_fndecl) = 1;
17675 DECL_EXTERNAL (new_fndecl) = 1;
17676 DECL_IS_NOVOPS (new_fndecl) = 1;
17677 TREE_READONLY (new_fndecl) = 1;
17678
17679 return new_fndecl;
17680 }
17681
17682 /* Handler for an ACML-style interface to
17683 a library with vectorized intrinsics. */
17684
17685 tree
17686 ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in)
17687 {
17688 char name[20] = "__vr.._";
17689 tree fntype, new_fndecl, args;
17690 unsigned arity;
17691 const char *bname;
17692 machine_mode el_mode, in_mode;
17693 int n, in_n;
17694
17695 /* The ACML is 64bits only and suitable for unsafe math only as
17696 it does not correctly support parts of IEEE with the required
17697 precision such as denormals. */
17698 if (!TARGET_64BIT
17699 || !flag_unsafe_math_optimizations)
17700 return NULL_TREE;
17701
17702 el_mode = TYPE_MODE (TREE_TYPE (type_out));
17703 n = TYPE_VECTOR_SUBPARTS (type_out);
17704 in_mode = TYPE_MODE (TREE_TYPE (type_in));
17705 in_n = TYPE_VECTOR_SUBPARTS (type_in);
17706 if (el_mode != in_mode
17707 || n != in_n)
17708 return NULL_TREE;
17709
17710 switch (fn)
17711 {
17712 CASE_CFN_SIN:
17713 CASE_CFN_COS:
17714 CASE_CFN_EXP:
17715 CASE_CFN_LOG:
17716 CASE_CFN_LOG2:
17717 CASE_CFN_LOG10:
17718 if (el_mode == DFmode && n == 2)
17719 {
17720 name[4] = 'd';
17721 name[5] = '2';
17722 }
17723 else if (el_mode == SFmode && n == 4)
17724 {
17725 name[4] = 's';
17726 name[5] = '4';
17727 }
17728 else
17729 return NULL_TREE;
17730 break;
17731
17732 default:
17733 return NULL_TREE;
17734 }
17735
17736 tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
17737 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
17738 sprintf (name + 7, "%s", bname+10);
17739
17740 arity = 0;
17741 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
17742 arity++;
17743
17744 if (arity == 1)
17745 fntype = build_function_type_list (type_out, type_in, NULL);
17746 else
17747 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
17748
17749 /* Build a function declaration for the vectorized function. */
17750 new_fndecl = build_decl (BUILTINS_LOCATION,
17751 FUNCTION_DECL, get_identifier (name), fntype);
17752 TREE_PUBLIC (new_fndecl) = 1;
17753 DECL_EXTERNAL (new_fndecl) = 1;
17754 DECL_IS_NOVOPS (new_fndecl) = 1;
17755 TREE_READONLY (new_fndecl) = 1;
17756
17757 return new_fndecl;
17758 }
17759
17760 /* Returns a decl of a function that implements scatter store with
17761 register type VECTYPE and index type INDEX_TYPE and SCALE.
17762 Return NULL_TREE if it is not available. */
17763
17764 static tree
17765 ix86_vectorize_builtin_scatter (const_tree vectype,
17766 const_tree index_type, int scale)
17767 {
17768 bool si;
17769 enum ix86_builtins code;
17770
17771 if (!TARGET_AVX512F)
17772 return NULL_TREE;
17773
17774 if ((TREE_CODE (index_type) != INTEGER_TYPE
17775 && !POINTER_TYPE_P (index_type))
17776 || (TYPE_MODE (index_type) != SImode
17777 && TYPE_MODE (index_type) != DImode))
17778 return NULL_TREE;
17779
17780 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
17781 return NULL_TREE;
17782
17783 /* v*scatter* insn sign extends index to pointer mode. */
17784 if (TYPE_PRECISION (index_type) < POINTER_SIZE
17785 && TYPE_UNSIGNED (index_type))
17786 return NULL_TREE;
17787
17788 /* Scale can be 1, 2, 4 or 8. */
17789 if (scale <= 0
17790 || scale > 8
17791 || (scale & (scale - 1)) != 0)
17792 return NULL_TREE;
17793
17794 si = TYPE_MODE (index_type) == SImode;
17795 switch (TYPE_MODE (vectype))
17796 {
17797 case E_V8DFmode:
17798 code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF;
17799 break;
17800 case E_V8DImode:
17801 code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI;
17802 break;
17803 case E_V16SFmode:
17804 code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF;
17805 break;
17806 case E_V16SImode:
17807 code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI;
17808 break;
17809 case E_V4DFmode:
17810 if (TARGET_AVX512VL)
17811 code = si ? IX86_BUILTIN_SCATTERALTSIV4DF : IX86_BUILTIN_SCATTERDIV4DF;
17812 else
17813 return NULL_TREE;
17814 break;
17815 case E_V4DImode:
17816 if (TARGET_AVX512VL)
17817 code = si ? IX86_BUILTIN_SCATTERALTSIV4DI : IX86_BUILTIN_SCATTERDIV4DI;
17818 else
17819 return NULL_TREE;
17820 break;
17821 case E_V8SFmode:
17822 if (TARGET_AVX512VL)
17823 code = si ? IX86_BUILTIN_SCATTERSIV8SF : IX86_BUILTIN_SCATTERALTDIV8SF;
17824 else
17825 return NULL_TREE;
17826 break;
17827 case E_V8SImode:
17828 if (TARGET_AVX512VL)
17829 code = si ? IX86_BUILTIN_SCATTERSIV8SI : IX86_BUILTIN_SCATTERALTDIV8SI;
17830 else
17831 return NULL_TREE;
17832 break;
17833 case E_V2DFmode:
17834 if (TARGET_AVX512VL)
17835 code = si ? IX86_BUILTIN_SCATTERALTSIV2DF : IX86_BUILTIN_SCATTERDIV2DF;
17836 else
17837 return NULL_TREE;
17838 break;
17839 case E_V2DImode:
17840 if (TARGET_AVX512VL)
17841 code = si ? IX86_BUILTIN_SCATTERALTSIV2DI : IX86_BUILTIN_SCATTERDIV2DI;
17842 else
17843 return NULL_TREE;
17844 break;
17845 case E_V4SFmode:
17846 if (TARGET_AVX512VL)
17847 code = si ? IX86_BUILTIN_SCATTERSIV4SF : IX86_BUILTIN_SCATTERALTDIV4SF;
17848 else
17849 return NULL_TREE;
17850 break;
17851 case E_V4SImode:
17852 if (TARGET_AVX512VL)
17853 code = si ? IX86_BUILTIN_SCATTERSIV4SI : IX86_BUILTIN_SCATTERALTDIV4SI;
17854 else
17855 return NULL_TREE;
17856 break;
17857 default:
17858 return NULL_TREE;
17859 }
17860
17861 return get_ix86_builtin (code);
17862 }
17863
17864 /* Return true if it is safe to use the rsqrt optabs to optimize
17865 1.0/sqrt. */
17866
17867 static bool
17868 use_rsqrt_p ()
17869 {
17870 return (TARGET_SSE && TARGET_SSE_MATH
17871 && flag_finite_math_only
17872 && !flag_trapping_math
17873 && flag_unsafe_math_optimizations);
17874 }
17875 \f
17876 /* Helper for avx_vpermilps256_operand et al. This is also used by
17877 the expansion functions to turn the parallel back into a mask.
17878 The return value is 0 for no match and the imm8+1 for a match. */
17879
17880 int
17881 avx_vpermilp_parallel (rtx par, machine_mode mode)
17882 {
17883 unsigned i, nelt = GET_MODE_NUNITS (mode);
17884 unsigned mask = 0;
17885 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
17886
17887 if (XVECLEN (par, 0) != (int) nelt)
17888 return 0;
17889
17890 /* Validate that all of the elements are constants, and not totally
17891 out of range. Copy the data into an integral array to make the
17892 subsequent checks easier. */
17893 for (i = 0; i < nelt; ++i)
17894 {
17895 rtx er = XVECEXP (par, 0, i);
17896 unsigned HOST_WIDE_INT ei;
17897
17898 if (!CONST_INT_P (er))
17899 return 0;
17900 ei = INTVAL (er);
17901 if (ei >= nelt)
17902 return 0;
17903 ipar[i] = ei;
17904 }
17905
17906 switch (mode)
17907 {
17908 case E_V8DFmode:
17909 /* In the 512-bit DFmode case, we can only move elements within
17910 a 128-bit lane. First fill the second part of the mask,
17911 then fallthru. */
17912 for (i = 4; i < 6; ++i)
17913 {
17914 if (ipar[i] < 4 || ipar[i] >= 6)
17915 return 0;
17916 mask |= (ipar[i] - 4) << i;
17917 }
17918 for (i = 6; i < 8; ++i)
17919 {
17920 if (ipar[i] < 6)
17921 return 0;
17922 mask |= (ipar[i] - 6) << i;
17923 }
17924 /* FALLTHRU */
17925
17926 case E_V4DFmode:
17927 /* In the 256-bit DFmode case, we can only move elements within
17928 a 128-bit lane. */
17929 for (i = 0; i < 2; ++i)
17930 {
17931 if (ipar[i] >= 2)
17932 return 0;
17933 mask |= ipar[i] << i;
17934 }
17935 for (i = 2; i < 4; ++i)
17936 {
17937 if (ipar[i] < 2)
17938 return 0;
17939 mask |= (ipar[i] - 2) << i;
17940 }
17941 break;
17942
17943 case E_V16SFmode:
17944 /* In 512 bit SFmode case, permutation in the upper 256 bits
17945 must mirror the permutation in the lower 256-bits. */
17946 for (i = 0; i < 8; ++i)
17947 if (ipar[i] + 8 != ipar[i + 8])
17948 return 0;
17949 /* FALLTHRU */
17950
17951 case E_V8SFmode:
17952 /* In 256 bit SFmode case, we have full freedom of
17953 movement within the low 128-bit lane, but the high 128-bit
17954 lane must mirror the exact same pattern. */
17955 for (i = 0; i < 4; ++i)
17956 if (ipar[i] + 4 != ipar[i + 4])
17957 return 0;
17958 nelt = 4;
17959 /* FALLTHRU */
17960
17961 case E_V2DFmode:
17962 case E_V4SFmode:
17963 /* In the 128-bit case, we've full freedom in the placement of
17964 the elements from the source operand. */
17965 for (i = 0; i < nelt; ++i)
17966 mask |= ipar[i] << (i * (nelt / 2));
17967 break;
17968
17969 default:
17970 gcc_unreachable ();
17971 }
17972
17973 /* Make sure success has a non-zero value by adding one. */
17974 return mask + 1;
17975 }
17976
17977 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
17978 the expansion functions to turn the parallel back into a mask.
17979 The return value is 0 for no match and the imm8+1 for a match. */
17980
17981 int
17982 avx_vperm2f128_parallel (rtx par, machine_mode mode)
17983 {
17984 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
17985 unsigned mask = 0;
17986 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
17987
17988 if (XVECLEN (par, 0) != (int) nelt)
17989 return 0;
17990
17991 /* Validate that all of the elements are constants, and not totally
17992 out of range. Copy the data into an integral array to make the
17993 subsequent checks easier. */
17994 for (i = 0; i < nelt; ++i)
17995 {
17996 rtx er = XVECEXP (par, 0, i);
17997 unsigned HOST_WIDE_INT ei;
17998
17999 if (!CONST_INT_P (er))
18000 return 0;
18001 ei = INTVAL (er);
18002 if (ei >= 2 * nelt)
18003 return 0;
18004 ipar[i] = ei;
18005 }
18006
18007 /* Validate that the halves of the permute are halves. */
18008 for (i = 0; i < nelt2 - 1; ++i)
18009 if (ipar[i] + 1 != ipar[i + 1])
18010 return 0;
18011 for (i = nelt2; i < nelt - 1; ++i)
18012 if (ipar[i] + 1 != ipar[i + 1])
18013 return 0;
18014
18015 /* Reconstruct the mask. */
18016 for (i = 0; i < 2; ++i)
18017 {
18018 unsigned e = ipar[i * nelt2];
18019 if (e % nelt2)
18020 return 0;
18021 e /= nelt2;
18022 mask |= e << (i * 4);
18023 }
18024
18025 /* Make sure success has a non-zero value by adding one. */
18026 return mask + 1;
18027 }
18028 \f
18029 /* Return a register priority for hard reg REGNO. */
18030 static int
18031 ix86_register_priority (int hard_regno)
18032 {
18033 /* ebp and r13 as the base always wants a displacement, r12 as the
18034 base always wants an index. So discourage their usage in an
18035 address. */
18036 if (hard_regno == R12_REG || hard_regno == R13_REG)
18037 return 0;
18038 if (hard_regno == BP_REG)
18039 return 1;
18040 /* New x86-64 int registers result in bigger code size. Discourage
18041 them. */
18042 if (IN_RANGE (hard_regno, FIRST_REX_INT_REG, LAST_REX_INT_REG))
18043 return 2;
18044 /* New x86-64 SSE registers result in bigger code size. Discourage
18045 them. */
18046 if (IN_RANGE (hard_regno, FIRST_REX_SSE_REG, LAST_REX_SSE_REG))
18047 return 2;
18048 if (IN_RANGE (hard_regno, FIRST_EXT_REX_SSE_REG, LAST_EXT_REX_SSE_REG))
18049 return 1;
18050 /* Usage of AX register results in smaller code. Prefer it. */
18051 if (hard_regno == AX_REG)
18052 return 4;
18053 return 3;
18054 }
18055
18056 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
18057
18058 Put float CONST_DOUBLE in the constant pool instead of fp regs.
18059 QImode must go into class Q_REGS.
18060 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
18061 movdf to do mem-to-mem moves through integer regs. */
18062
18063 static reg_class_t
18064 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
18065 {
18066 machine_mode mode = GET_MODE (x);
18067
18068 /* We're only allowed to return a subclass of CLASS. Many of the
18069 following checks fail for NO_REGS, so eliminate that early. */
18070 if (regclass == NO_REGS)
18071 return NO_REGS;
18072
18073 /* All classes can load zeros. */
18074 if (x == CONST0_RTX (mode))
18075 return regclass;
18076
18077 /* Force constants into memory if we are loading a (nonzero) constant into
18078 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
18079 instructions to load from a constant. */
18080 if (CONSTANT_P (x)
18081 && (MAYBE_MMX_CLASS_P (regclass)
18082 || MAYBE_SSE_CLASS_P (regclass)
18083 || MAYBE_MASK_CLASS_P (regclass)))
18084 return NO_REGS;
18085
18086 /* Floating-point constants need more complex checks. */
18087 if (CONST_DOUBLE_P (x))
18088 {
18089 /* General regs can load everything. */
18090 if (INTEGER_CLASS_P (regclass))
18091 return regclass;
18092
18093 /* Floats can load 0 and 1 plus some others. Note that we eliminated
18094 zero above. We only want to wind up preferring 80387 registers if
18095 we plan on doing computation with them. */
18096 if (IS_STACK_MODE (mode)
18097 && standard_80387_constant_p (x) > 0)
18098 {
18099 /* Limit class to FP regs. */
18100 if (FLOAT_CLASS_P (regclass))
18101 return FLOAT_REGS;
18102 }
18103
18104 return NO_REGS;
18105 }
18106
18107 /* Prefer SSE regs only, if we can use them for math. */
18108 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
18109 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
18110
18111 /* Generally when we see PLUS here, it's the function invariant
18112 (plus soft-fp const_int). Which can only be computed into general
18113 regs. */
18114 if (GET_CODE (x) == PLUS)
18115 return INTEGER_CLASS_P (regclass) ? regclass : NO_REGS;
18116
18117 /* QImode constants are easy to load, but non-constant QImode data
18118 must go into Q_REGS. */
18119 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
18120 {
18121 if (Q_CLASS_P (regclass))
18122 return regclass;
18123 else if (reg_class_subset_p (Q_REGS, regclass))
18124 return Q_REGS;
18125 else
18126 return NO_REGS;
18127 }
18128
18129 return regclass;
18130 }
18131
18132 /* Discourage putting floating-point values in SSE registers unless
18133 SSE math is being used, and likewise for the 387 registers. */
18134 static reg_class_t
18135 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
18136 {
18137 /* Restrict the output reload class to the register bank that we are doing
18138 math on. If we would like not to return a subset of CLASS, reject this
18139 alternative: if reload cannot do this, it will still use its choice. */
18140 machine_mode mode = GET_MODE (x);
18141 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
18142 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
18143
18144 if (IS_STACK_MODE (mode))
18145 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
18146
18147 return regclass;
18148 }
18149
18150 static reg_class_t
18151 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
18152 machine_mode mode, secondary_reload_info *sri)
18153 {
18154 /* Double-word spills from general registers to non-offsettable memory
18155 references (zero-extended addresses) require special handling. */
18156 if (TARGET_64BIT
18157 && MEM_P (x)
18158 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
18159 && INTEGER_CLASS_P (rclass)
18160 && !offsettable_memref_p (x))
18161 {
18162 sri->icode = (in_p
18163 ? CODE_FOR_reload_noff_load
18164 : CODE_FOR_reload_noff_store);
18165 /* Add the cost of moving address to a temporary. */
18166 sri->extra_cost = 1;
18167
18168 return NO_REGS;
18169 }
18170
18171 /* QImode spills from non-QI registers require
18172 intermediate register on 32bit targets. */
18173 if (mode == QImode
18174 && ((!TARGET_64BIT && !in_p
18175 && INTEGER_CLASS_P (rclass)
18176 && MAYBE_NON_Q_CLASS_P (rclass))
18177 || (!TARGET_AVX512DQ
18178 && MAYBE_MASK_CLASS_P (rclass))))
18179 {
18180 int regno = true_regnum (x);
18181
18182 /* Return Q_REGS if the operand is in memory. */
18183 if (regno == -1)
18184 return Q_REGS;
18185
18186 return NO_REGS;
18187 }
18188
18189 /* This condition handles corner case where an expression involving
18190 pointers gets vectorized. We're trying to use the address of a
18191 stack slot as a vector initializer.
18192
18193 (set (reg:V2DI 74 [ vect_cst_.2 ])
18194 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
18195
18196 Eventually frame gets turned into sp+offset like this:
18197
18198 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18199 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
18200 (const_int 392 [0x188]))))
18201
18202 That later gets turned into:
18203
18204 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18205 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
18206 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
18207
18208 We'll have the following reload recorded:
18209
18210 Reload 0: reload_in (DI) =
18211 (plus:DI (reg/f:DI 7 sp)
18212 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
18213 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18214 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
18215 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
18216 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18217 reload_reg_rtx: (reg:V2DI 22 xmm1)
18218
18219 Which isn't going to work since SSE instructions can't handle scalar
18220 additions. Returning GENERAL_REGS forces the addition into integer
18221 register and reload can handle subsequent reloads without problems. */
18222
18223 if (in_p && GET_CODE (x) == PLUS
18224 && SSE_CLASS_P (rclass)
18225 && SCALAR_INT_MODE_P (mode))
18226 return GENERAL_REGS;
18227
18228 return NO_REGS;
18229 }
18230
18231 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
18232
18233 static bool
18234 ix86_class_likely_spilled_p (reg_class_t rclass)
18235 {
18236 switch (rclass)
18237 {
18238 case AREG:
18239 case DREG:
18240 case CREG:
18241 case BREG:
18242 case AD_REGS:
18243 case SIREG:
18244 case DIREG:
18245 case SSE_FIRST_REG:
18246 case FP_TOP_REG:
18247 case FP_SECOND_REG:
18248 return true;
18249
18250 default:
18251 break;
18252 }
18253
18254 return false;
18255 }
18256
18257 /* If we are copying between registers from different register sets
18258 (e.g. FP and integer), we may need a memory location.
18259
18260 The function can't work reliably when one of the CLASSES is a class
18261 containing registers from multiple sets. We avoid this by never combining
18262 different sets in a single alternative in the machine description.
18263 Ensure that this constraint holds to avoid unexpected surprises.
18264
18265 When STRICT is false, we are being called from REGISTER_MOVE_COST,
18266 so do not enforce these sanity checks.
18267
18268 To optimize register_move_cost performance, define inline variant. */
18269
18270 static inline bool
18271 inline_secondary_memory_needed (machine_mode mode, reg_class_t class1,
18272 reg_class_t class2, int strict)
18273 {
18274 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
18275 return false;
18276
18277 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
18278 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
18279 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
18280 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
18281 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
18282 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)
18283 || MAYBE_MASK_CLASS_P (class1) != MASK_CLASS_P (class1)
18284 || MAYBE_MASK_CLASS_P (class2) != MASK_CLASS_P (class2))
18285 {
18286 gcc_assert (!strict || lra_in_progress);
18287 return true;
18288 }
18289
18290 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
18291 return true;
18292
18293 /* Between mask and general, we have moves no larger than word size. */
18294 if ((MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
18295 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
18296 return true;
18297
18298 /* ??? This is a lie. We do have moves between mmx/general, and for
18299 mmx/sse2. But by saying we need secondary memory we discourage the
18300 register allocator from using the mmx registers unless needed. */
18301 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
18302 return true;
18303
18304 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18305 {
18306 /* SSE1 doesn't have any direct moves from other classes. */
18307 if (!TARGET_SSE2)
18308 return true;
18309
18310 /* If the target says that inter-unit moves are more expensive
18311 than moving through memory, then don't generate them. */
18312 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
18313 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
18314 return true;
18315
18316 /* Between SSE and general, we have moves no larger than word size. */
18317 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
18318 return true;
18319 }
18320
18321 return false;
18322 }
18323
18324 /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
18325
18326 static bool
18327 ix86_secondary_memory_needed (machine_mode mode, reg_class_t class1,
18328 reg_class_t class2)
18329 {
18330 return inline_secondary_memory_needed (mode, class1, class2, true);
18331 }
18332
18333 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
18334
18335 get_secondary_mem widens integral modes to BITS_PER_WORD.
18336 There is no need to emit full 64 bit move on 64 bit targets
18337 for integral modes that can be moved using 32 bit move. */
18338
18339 static machine_mode
18340 ix86_secondary_memory_needed_mode (machine_mode mode)
18341 {
18342 if (GET_MODE_BITSIZE (mode) < 32 && INTEGRAL_MODE_P (mode))
18343 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
18344 return mode;
18345 }
18346
18347 /* Implement the TARGET_CLASS_MAX_NREGS hook.
18348
18349 On the 80386, this is the size of MODE in words,
18350 except in the FP regs, where a single reg is always enough. */
18351
18352 static unsigned char
18353 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
18354 {
18355 if (MAYBE_INTEGER_CLASS_P (rclass))
18356 {
18357 if (mode == XFmode)
18358 return (TARGET_64BIT ? 2 : 3);
18359 else if (mode == XCmode)
18360 return (TARGET_64BIT ? 4 : 6);
18361 else
18362 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
18363 }
18364 else
18365 {
18366 if (COMPLEX_MODE_P (mode))
18367 return 2;
18368 else
18369 return 1;
18370 }
18371 }
18372
18373 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
18374
18375 static bool
18376 ix86_can_change_mode_class (machine_mode from, machine_mode to,
18377 reg_class_t regclass)
18378 {
18379 if (from == to)
18380 return true;
18381
18382 /* x87 registers can't do subreg at all, as all values are reformatted
18383 to extended precision. */
18384 if (MAYBE_FLOAT_CLASS_P (regclass))
18385 return false;
18386
18387 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
18388 {
18389 /* Vector registers do not support QI or HImode loads. If we don't
18390 disallow a change to these modes, reload will assume it's ok to
18391 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
18392 the vec_dupv4hi pattern. */
18393 if (GET_MODE_SIZE (from) < 4)
18394 return false;
18395 }
18396
18397 return true;
18398 }
18399
18400 /* Return index of MODE in the sse load/store tables. */
18401
18402 static inline int
18403 sse_store_index (machine_mode mode)
18404 {
18405 switch (GET_MODE_SIZE (mode))
18406 {
18407 case 4:
18408 return 0;
18409 case 8:
18410 return 1;
18411 case 16:
18412 return 2;
18413 case 32:
18414 return 3;
18415 case 64:
18416 return 4;
18417 default:
18418 return -1;
18419 }
18420 }
18421
18422 /* Return the cost of moving data of mode M between a
18423 register and memory. A value of 2 is the default; this cost is
18424 relative to those in `REGISTER_MOVE_COST'.
18425
18426 This function is used extensively by register_move_cost that is used to
18427 build tables at startup. Make it inline in this case.
18428 When IN is 2, return maximum of in and out move cost.
18429
18430 If moving between registers and memory is more expensive than
18431 between two registers, you should define this macro to express the
18432 relative cost.
18433
18434 Model also increased moving costs of QImode registers in non
18435 Q_REGS classes.
18436 */
18437 static inline int
18438 inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in)
18439 {
18440 int cost;
18441 if (FLOAT_CLASS_P (regclass))
18442 {
18443 int index;
18444 switch (mode)
18445 {
18446 case E_SFmode:
18447 index = 0;
18448 break;
18449 case E_DFmode:
18450 index = 1;
18451 break;
18452 case E_XFmode:
18453 index = 2;
18454 break;
18455 default:
18456 return 100;
18457 }
18458 if (in == 2)
18459 return MAX (ix86_cost->hard_register.fp_load [index],
18460 ix86_cost->hard_register.fp_store [index]);
18461 return in ? ix86_cost->hard_register.fp_load [index]
18462 : ix86_cost->hard_register.fp_store [index];
18463 }
18464 if (SSE_CLASS_P (regclass))
18465 {
18466 int index = sse_store_index (mode);
18467 if (index == -1)
18468 return 100;
18469 if (in == 2)
18470 return MAX (ix86_cost->hard_register.sse_load [index],
18471 ix86_cost->hard_register.sse_store [index]);
18472 return in ? ix86_cost->hard_register.sse_load [index]
18473 : ix86_cost->hard_register.sse_store [index];
18474 }
18475 if (MMX_CLASS_P (regclass))
18476 {
18477 int index;
18478 switch (GET_MODE_SIZE (mode))
18479 {
18480 case 4:
18481 index = 0;
18482 break;
18483 case 8:
18484 index = 1;
18485 break;
18486 default:
18487 return 100;
18488 }
18489 if (in == 2)
18490 return MAX (ix86_cost->hard_register.mmx_load [index],
18491 ix86_cost->hard_register.mmx_store [index]);
18492 return in ? ix86_cost->hard_register.mmx_load [index]
18493 : ix86_cost->hard_register.mmx_store [index];
18494 }
18495 switch (GET_MODE_SIZE (mode))
18496 {
18497 case 1:
18498 if (Q_CLASS_P (regclass) || TARGET_64BIT)
18499 {
18500 if (!in)
18501 return ix86_cost->hard_register.int_store[0];
18502 if (TARGET_PARTIAL_REG_DEPENDENCY
18503 && optimize_function_for_speed_p (cfun))
18504 cost = ix86_cost->hard_register.movzbl_load;
18505 else
18506 cost = ix86_cost->hard_register.int_load[0];
18507 if (in == 2)
18508 return MAX (cost, ix86_cost->hard_register.int_store[0]);
18509 return cost;
18510 }
18511 else
18512 {
18513 if (in == 2)
18514 return MAX (ix86_cost->hard_register.movzbl_load,
18515 ix86_cost->hard_register.int_store[0] + 4);
18516 if (in)
18517 return ix86_cost->hard_register.movzbl_load;
18518 else
18519 return ix86_cost->hard_register.int_store[0] + 4;
18520 }
18521 break;
18522 case 2:
18523 if (in == 2)
18524 return MAX (ix86_cost->hard_register.int_load[1],
18525 ix86_cost->hard_register.int_store[1]);
18526 return in ? ix86_cost->hard_register.int_load[1]
18527 : ix86_cost->hard_register.int_store[1];
18528 default:
18529 if (in == 2)
18530 cost = MAX (ix86_cost->hard_register.int_load[2],
18531 ix86_cost->hard_register.int_store[2]);
18532 else if (in)
18533 cost = ix86_cost->hard_register.int_load[2];
18534 else
18535 cost = ix86_cost->hard_register.int_store[2];
18536 /* Multiply with the number of GPR moves needed. */
18537 return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD);
18538 }
18539 }
18540
18541 static int
18542 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in)
18543 {
18544 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
18545 }
18546
18547
18548 /* Return the cost of moving data from a register in class CLASS1 to
18549 one in class CLASS2.
18550
18551 It is not required that the cost always equal 2 when FROM is the same as TO;
18552 on some machines it is expensive to move between registers if they are not
18553 general registers. */
18554
18555 static int
18556 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
18557 reg_class_t class2_i)
18558 {
18559 enum reg_class class1 = (enum reg_class) class1_i;
18560 enum reg_class class2 = (enum reg_class) class2_i;
18561
18562 /* In case we require secondary memory, compute cost of the store followed
18563 by load. In order to avoid bad register allocation choices, we need
18564 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
18565
18566 if (inline_secondary_memory_needed (mode, class1, class2, false))
18567 {
18568 int cost = 1;
18569
18570 cost += inline_memory_move_cost (mode, class1, 2);
18571 cost += inline_memory_move_cost (mode, class2, 2);
18572
18573 /* In case of copying from general_purpose_register we may emit multiple
18574 stores followed by single load causing memory size mismatch stall.
18575 Count this as arbitrarily high cost of 20. */
18576 if (GET_MODE_BITSIZE (mode) > BITS_PER_WORD
18577 && TARGET_MEMORY_MISMATCH_STALL
18578 && targetm.class_max_nregs (class1, mode)
18579 > targetm.class_max_nregs (class2, mode))
18580 cost += 20;
18581
18582 /* In the case of FP/MMX moves, the registers actually overlap, and we
18583 have to switch modes in order to treat them differently. */
18584 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
18585 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
18586 cost += 20;
18587
18588 return cost;
18589 }
18590
18591 /* Moves between MMX and non-MMX units require secondary memory. */
18592 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
18593 gcc_unreachable ();
18594
18595 /* Moves between SSE and integer units are expensive. */
18596 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18597
18598 /* ??? By keeping returned value relatively high, we limit the number
18599 of moves between integer and SSE registers for all targets.
18600 Additionally, high value prevents problem with x86_modes_tieable_p(),
18601 where integer modes in SSE registers are not tieable
18602 because of missing QImode and HImode moves to, from or between
18603 MMX/SSE registers. */
18604 return MAX (8, SSE_CLASS_P (class1)
18605 ? ix86_cost->hard_register.sse_to_integer
18606 : ix86_cost->hard_register.integer_to_sse);
18607
18608 if (MAYBE_FLOAT_CLASS_P (class1))
18609 return ix86_cost->hard_register.fp_move;
18610 if (MAYBE_SSE_CLASS_P (class1))
18611 {
18612 if (GET_MODE_BITSIZE (mode) <= 128)
18613 return ix86_cost->hard_register.xmm_move;
18614 if (GET_MODE_BITSIZE (mode) <= 256)
18615 return ix86_cost->hard_register.ymm_move;
18616 return ix86_cost->hard_register.zmm_move;
18617 }
18618 if (MAYBE_MMX_CLASS_P (class1))
18619 return ix86_cost->hard_register.mmx_move;
18620 return 2;
18621 }
18622
18623 /* Implement TARGET_HARD_REGNO_NREGS. This is ordinarily the length in
18624 words of a value of mode MODE but can be less for certain modes in
18625 special long registers.
18626
18627 Actually there are no two word move instructions for consecutive
18628 registers. And only registers 0-3 may have mov byte instructions
18629 applied to them. */
18630
18631 static unsigned int
18632 ix86_hard_regno_nregs (unsigned int regno, machine_mode mode)
18633 {
18634 if (GENERAL_REGNO_P (regno))
18635 {
18636 if (mode == XFmode)
18637 return TARGET_64BIT ? 2 : 3;
18638 if (mode == XCmode)
18639 return TARGET_64BIT ? 4 : 6;
18640 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
18641 }
18642 if (COMPLEX_MODE_P (mode))
18643 return 2;
18644 /* Register pair for mask registers. */
18645 if (mode == P2QImode || mode == P2HImode)
18646 return 2;
18647 if (mode == V64SFmode || mode == V64SImode)
18648 return 4;
18649 return 1;
18650 }
18651
18652 /* Implement REGMODE_NATURAL_SIZE(MODE). */
18653 unsigned int
18654 ix86_regmode_natural_size (machine_mode mode)
18655 {
18656 if (mode == P2HImode || mode == P2QImode)
18657 return GET_MODE_SIZE (mode) / 2;
18658 return UNITS_PER_WORD;
18659 }
18660
18661 /* Implement TARGET_HARD_REGNO_MODE_OK. */
18662
18663 static bool
18664 ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
18665 {
18666 /* Flags and only flags can only hold CCmode values. */
18667 if (CC_REGNO_P (regno))
18668 return GET_MODE_CLASS (mode) == MODE_CC;
18669 if (GET_MODE_CLASS (mode) == MODE_CC
18670 || GET_MODE_CLASS (mode) == MODE_RANDOM)
18671 return false;
18672 if (STACK_REGNO_P (regno))
18673 return VALID_FP_MODE_P (mode);
18674 if (MASK_REGNO_P (regno))
18675 {
18676 /* Register pair only starts at even register number. */
18677 if ((mode == P2QImode || mode == P2HImode))
18678 return MASK_PAIR_REGNO_P(regno);
18679
18680 return (VALID_MASK_REG_MODE (mode)
18681 || (TARGET_AVX512BW
18682 && VALID_MASK_AVX512BW_MODE (mode)));
18683 }
18684
18685 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
18686 return false;
18687
18688 if (SSE_REGNO_P (regno))
18689 {
18690 /* We implement the move patterns for all vector modes into and
18691 out of SSE registers, even when no operation instructions
18692 are available. */
18693
18694 /* For AVX-512 we allow, regardless of regno:
18695 - XI mode
18696 - any of 512-bit wide vector mode
18697 - any scalar mode. */
18698 if (TARGET_AVX512F
18699 && (mode == XImode
18700 || VALID_AVX512F_REG_MODE (mode)
18701 || VALID_AVX512F_SCALAR_MODE (mode)))
18702 return true;
18703
18704 /* For AVX-5124FMAPS or AVX-5124VNNIW
18705 allow V64SF and V64SI modes for special regnos. */
18706 if ((TARGET_AVX5124FMAPS || TARGET_AVX5124VNNIW)
18707 && (mode == V64SFmode || mode == V64SImode)
18708 && MOD4_SSE_REGNO_P (regno))
18709 return true;
18710
18711 /* TODO check for QI/HI scalars. */
18712 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
18713 if (TARGET_AVX512VL
18714 && (mode == OImode
18715 || mode == TImode
18716 || VALID_AVX256_REG_MODE (mode)
18717 || VALID_AVX512VL_128_REG_MODE (mode)))
18718 return true;
18719
18720 /* xmm16-xmm31 are only available for AVX-512. */
18721 if (EXT_REX_SSE_REGNO_P (regno))
18722 return false;
18723
18724 /* OImode and AVX modes are available only when AVX is enabled. */
18725 return ((TARGET_AVX
18726 && VALID_AVX256_REG_OR_OI_MODE (mode))
18727 || VALID_SSE_REG_MODE (mode)
18728 || VALID_SSE2_REG_MODE (mode)
18729 || VALID_MMX_REG_MODE (mode)
18730 || VALID_MMX_REG_MODE_3DNOW (mode));
18731 }
18732 if (MMX_REGNO_P (regno))
18733 {
18734 /* We implement the move patterns for 3DNOW modes even in MMX mode,
18735 so if the register is available at all, then we can move data of
18736 the given mode into or out of it. */
18737 return (VALID_MMX_REG_MODE (mode)
18738 || VALID_MMX_REG_MODE_3DNOW (mode));
18739 }
18740
18741 if (mode == QImode)
18742 {
18743 /* Take care for QImode values - they can be in non-QI regs,
18744 but then they do cause partial register stalls. */
18745 if (ANY_QI_REGNO_P (regno))
18746 return true;
18747 if (!TARGET_PARTIAL_REG_STALL)
18748 return true;
18749 /* LRA checks if the hard register is OK for the given mode.
18750 QImode values can live in non-QI regs, so we allow all
18751 registers here. */
18752 if (lra_in_progress)
18753 return true;
18754 return !can_create_pseudo_p ();
18755 }
18756 /* We handle both integer and floats in the general purpose registers. */
18757 else if (VALID_INT_MODE_P (mode))
18758 return true;
18759 else if (VALID_FP_MODE_P (mode))
18760 return true;
18761 else if (VALID_DFP_MODE_P (mode))
18762 return true;
18763 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
18764 on to use that value in smaller contexts, this can easily force a
18765 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
18766 supporting DImode, allow it. */
18767 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
18768 return true;
18769
18770 return false;
18771 }
18772
18773 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The only ABI that
18774 saves SSE registers across calls is Win64 (thus no need to check the
18775 current ABI here), and with AVX enabled Win64 only guarantees that
18776 the low 16 bytes are saved. */
18777
18778 static bool
18779 ix86_hard_regno_call_part_clobbered (rtx_insn *insn ATTRIBUTE_UNUSED,
18780 unsigned int regno, machine_mode mode)
18781 {
18782 return SSE_REGNO_P (regno) && GET_MODE_SIZE (mode) > 16;
18783 }
18784
18785 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
18786 tieable integer mode. */
18787
18788 static bool
18789 ix86_tieable_integer_mode_p (machine_mode mode)
18790 {
18791 switch (mode)
18792 {
18793 case E_HImode:
18794 case E_SImode:
18795 return true;
18796
18797 case E_QImode:
18798 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
18799
18800 case E_DImode:
18801 return TARGET_64BIT;
18802
18803 default:
18804 return false;
18805 }
18806 }
18807
18808 /* Implement TARGET_MODES_TIEABLE_P.
18809
18810 Return true if MODE1 is accessible in a register that can hold MODE2
18811 without copying. That is, all register classes that can hold MODE2
18812 can also hold MODE1. */
18813
18814 static bool
18815 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
18816 {
18817 if (mode1 == mode2)
18818 return true;
18819
18820 if (ix86_tieable_integer_mode_p (mode1)
18821 && ix86_tieable_integer_mode_p (mode2))
18822 return true;
18823
18824 /* MODE2 being XFmode implies fp stack or general regs, which means we
18825 can tie any smaller floating point modes to it. Note that we do not
18826 tie this with TFmode. */
18827 if (mode2 == XFmode)
18828 return mode1 == SFmode || mode1 == DFmode;
18829
18830 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
18831 that we can tie it with SFmode. */
18832 if (mode2 == DFmode)
18833 return mode1 == SFmode;
18834
18835 /* If MODE2 is only appropriate for an SSE register, then tie with
18836 any other mode acceptable to SSE registers. */
18837 if (GET_MODE_SIZE (mode2) == 64
18838 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
18839 return (GET_MODE_SIZE (mode1) == 64
18840 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
18841 if (GET_MODE_SIZE (mode2) == 32
18842 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
18843 return (GET_MODE_SIZE (mode1) == 32
18844 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
18845 if (GET_MODE_SIZE (mode2) == 16
18846 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
18847 return (GET_MODE_SIZE (mode1) == 16
18848 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
18849
18850 /* If MODE2 is appropriate for an MMX register, then tie
18851 with any other mode acceptable to MMX registers. */
18852 if (GET_MODE_SIZE (mode2) == 8
18853 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
18854 return (GET_MODE_SIZE (mode1) == 8
18855 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
18856
18857 return false;
18858 }
18859
18860 /* Return the cost of moving between two registers of mode MODE. */
18861
18862 static int
18863 ix86_set_reg_reg_cost (machine_mode mode)
18864 {
18865 unsigned int units = UNITS_PER_WORD;
18866
18867 switch (GET_MODE_CLASS (mode))
18868 {
18869 default:
18870 break;
18871
18872 case MODE_CC:
18873 units = GET_MODE_SIZE (CCmode);
18874 break;
18875
18876 case MODE_FLOAT:
18877 if ((TARGET_SSE && mode == TFmode)
18878 || (TARGET_80387 && mode == XFmode)
18879 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
18880 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
18881 units = GET_MODE_SIZE (mode);
18882 break;
18883
18884 case MODE_COMPLEX_FLOAT:
18885 if ((TARGET_SSE && mode == TCmode)
18886 || (TARGET_80387 && mode == XCmode)
18887 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
18888 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
18889 units = GET_MODE_SIZE (mode);
18890 break;
18891
18892 case MODE_VECTOR_INT:
18893 case MODE_VECTOR_FLOAT:
18894 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
18895 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
18896 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
18897 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
18898 || ((TARGET_MMX || TARGET_MMX_WITH_SSE)
18899 && VALID_MMX_REG_MODE (mode)))
18900 units = GET_MODE_SIZE (mode);
18901 }
18902
18903 /* Return the cost of moving between two registers of mode MODE,
18904 assuming that the move will be in pieces of at most UNITS bytes. */
18905 return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units));
18906 }
18907
18908 /* Return cost of vector operation in MODE given that scalar version has
18909 COST. */
18910
18911 static int
18912 ix86_vec_cost (machine_mode mode, int cost)
18913 {
18914 if (!VECTOR_MODE_P (mode))
18915 return cost;
18916
18917 if (GET_MODE_BITSIZE (mode) == 128
18918 && TARGET_SSE_SPLIT_REGS)
18919 return cost * 2;
18920 if (GET_MODE_BITSIZE (mode) > 128
18921 && TARGET_AVX128_OPTIMAL)
18922 return cost * GET_MODE_BITSIZE (mode) / 128;
18923 return cost;
18924 }
18925
18926 /* Return cost of multiplication in MODE. */
18927
18928 static int
18929 ix86_multiplication_cost (const struct processor_costs *cost,
18930 enum machine_mode mode)
18931 {
18932 machine_mode inner_mode = mode;
18933 if (VECTOR_MODE_P (mode))
18934 inner_mode = GET_MODE_INNER (mode);
18935
18936 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
18937 return inner_mode == DFmode ? cost->mulsd : cost->mulss;
18938 else if (X87_FLOAT_MODE_P (mode))
18939 return cost->fmul;
18940 else if (FLOAT_MODE_P (mode))
18941 return ix86_vec_cost (mode,
18942 inner_mode == DFmode ? cost->mulsd : cost->mulss);
18943 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
18944 {
18945 /* vpmullq is used in this case. No emulation is needed. */
18946 if (TARGET_AVX512DQ)
18947 return ix86_vec_cost (mode, cost->mulss);
18948
18949 /* V*QImode is emulated with 7-13 insns. */
18950 if (mode == V16QImode || mode == V32QImode)
18951 {
18952 int extra = 11;
18953 if (TARGET_XOP && mode == V16QImode)
18954 extra = 5;
18955 else if (TARGET_SSSE3)
18956 extra = 6;
18957 return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * extra);
18958 }
18959 /* V*DImode is emulated with 5-8 insns. */
18960 else if (mode == V2DImode || mode == V4DImode)
18961 {
18962 if (TARGET_XOP && mode == V2DImode)
18963 return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 3);
18964 else
18965 return ix86_vec_cost (mode, cost->mulss * 3 + cost->sse_op * 5);
18966 }
18967 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
18968 insns, including two PMULUDQ. */
18969 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
18970 return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5);
18971 else
18972 return ix86_vec_cost (mode, cost->mulss);
18973 }
18974 else
18975 return (cost->mult_init[MODE_INDEX (mode)] + cost->mult_bit * 7);
18976 }
18977
18978 /* Return cost of multiplication in MODE. */
18979
18980 static int
18981 ix86_division_cost (const struct processor_costs *cost,
18982 enum machine_mode mode)
18983 {
18984 machine_mode inner_mode = mode;
18985 if (VECTOR_MODE_P (mode))
18986 inner_mode = GET_MODE_INNER (mode);
18987
18988 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
18989 return inner_mode == DFmode ? cost->divsd : cost->divss;
18990 else if (X87_FLOAT_MODE_P (mode))
18991 return cost->fdiv;
18992 else if (FLOAT_MODE_P (mode))
18993 return ix86_vec_cost (mode,
18994 inner_mode == DFmode ? cost->divsd : cost->divss);
18995 else
18996 return cost->divide[MODE_INDEX (mode)];
18997 }
18998
18999 #define COSTS_N_BYTES(N) ((N) * 2)
19000
19001 /* Return cost of shift in MODE.
19002 If CONSTANT_OP1 is true, the op1 value is known and set in OP1_VAL.
19003 AND_IN_OP1 specify in op1 is result of and and SHIFT_AND_TRUNCATE
19004 if op1 is a result of subreg.
19005
19006 SKIP_OP0/1 is set to true if cost of OP0/1 should be ignored. */
19007
19008 static int
19009 ix86_shift_rotate_cost (const struct processor_costs *cost,
19010 enum machine_mode mode, bool constant_op1,
19011 HOST_WIDE_INT op1_val,
19012 bool speed,
19013 bool and_in_op1,
19014 bool shift_and_truncate,
19015 bool *skip_op0, bool *skip_op1)
19016 {
19017 if (skip_op0)
19018 *skip_op0 = *skip_op1 = false;
19019 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19020 {
19021 /* V*QImode is emulated with 1-11 insns. */
19022 if (mode == V16QImode || mode == V32QImode)
19023 {
19024 int count = 11;
19025 if (TARGET_XOP && mode == V16QImode)
19026 {
19027 /* For XOP we use vpshab, which requires a broadcast of the
19028 value to the variable shift insn. For constants this
19029 means a V16Q const in mem; even when we can perform the
19030 shift with one insn set the cost to prefer paddb. */
19031 if (constant_op1)
19032 {
19033 if (skip_op1)
19034 *skip_op1 = true;
19035 return ix86_vec_cost (mode,
19036 cost->sse_op
19037 + (speed
19038 ? 2
19039 : COSTS_N_BYTES
19040 (GET_MODE_UNIT_SIZE (mode))));
19041 }
19042 count = 3;
19043 }
19044 else if (TARGET_SSSE3)
19045 count = 7;
19046 return ix86_vec_cost (mode, cost->sse_op * count);
19047 }
19048 else
19049 return ix86_vec_cost (mode, cost->sse_op);
19050 }
19051 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
19052 {
19053 if (constant_op1)
19054 {
19055 if (op1_val > 32)
19056 return cost->shift_const + COSTS_N_INSNS (2);
19057 else
19058 return cost->shift_const * 2;
19059 }
19060 else
19061 {
19062 if (and_in_op1)
19063 return cost->shift_var * 2;
19064 else
19065 return cost->shift_var * 6 + COSTS_N_INSNS (2);
19066 }
19067 }
19068 else
19069 {
19070 if (constant_op1)
19071 return cost->shift_const;
19072 else if (shift_and_truncate)
19073 {
19074 if (skip_op0)
19075 *skip_op0 = *skip_op1 = true;
19076 /* Return the cost after shift-and truncation. */
19077 return cost->shift_var;
19078 }
19079 else
19080 return cost->shift_var;
19081 }
19082 return cost->shift_const;
19083 }
19084
19085 /* Compute a (partial) cost for rtx X. Return true if the complete
19086 cost has been computed, and false if subexpressions should be
19087 scanned. In either case, *TOTAL contains the cost result. */
19088
19089 static bool
19090 ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
19091 int *total, bool speed)
19092 {
19093 rtx mask;
19094 enum rtx_code code = GET_CODE (x);
19095 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
19096 const struct processor_costs *cost
19097 = speed ? ix86_tune_cost : &ix86_size_cost;
19098 int src_cost;
19099
19100 switch (code)
19101 {
19102 case SET:
19103 if (register_operand (SET_DEST (x), VOIDmode)
19104 && register_operand (SET_SRC (x), VOIDmode))
19105 {
19106 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
19107 return true;
19108 }
19109
19110 if (register_operand (SET_SRC (x), VOIDmode))
19111 /* Avoid potentially incorrect high cost from rtx_costs
19112 for non-tieable SUBREGs. */
19113 src_cost = 0;
19114 else
19115 {
19116 src_cost = rtx_cost (SET_SRC (x), mode, SET, 1, speed);
19117
19118 if (CONSTANT_P (SET_SRC (x)))
19119 /* Constant costs assume a base value of COSTS_N_INSNS (1) and add
19120 a small value, possibly zero for cheap constants. */
19121 src_cost += COSTS_N_INSNS (1);
19122 }
19123
19124 *total = src_cost + rtx_cost (SET_DEST (x), mode, SET, 0, speed);
19125 return true;
19126
19127 case CONST_INT:
19128 case CONST:
19129 case LABEL_REF:
19130 case SYMBOL_REF:
19131 if (x86_64_immediate_operand (x, VOIDmode))
19132 *total = 0;
19133 else
19134 *total = 1;
19135 return true;
19136
19137 case CONST_DOUBLE:
19138 if (IS_STACK_MODE (mode))
19139 switch (standard_80387_constant_p (x))
19140 {
19141 case -1:
19142 case 0:
19143 break;
19144 case 1: /* 0.0 */
19145 *total = 1;
19146 return true;
19147 default: /* Other constants */
19148 *total = 2;
19149 return true;
19150 }
19151 /* FALLTHRU */
19152
19153 case CONST_VECTOR:
19154 switch (standard_sse_constant_p (x, mode))
19155 {
19156 case 0:
19157 break;
19158 case 1: /* 0: xor eliminates false dependency */
19159 *total = 0;
19160 return true;
19161 default: /* -1: cmp contains false dependency */
19162 *total = 1;
19163 return true;
19164 }
19165 /* FALLTHRU */
19166
19167 case CONST_WIDE_INT:
19168 /* Fall back to (MEM (SYMBOL_REF)), since that's where
19169 it'll probably end up. Add a penalty for size. */
19170 *total = (COSTS_N_INSNS (1)
19171 + (!TARGET_64BIT && flag_pic)
19172 + (GET_MODE_SIZE (mode) <= 4
19173 ? 0 : GET_MODE_SIZE (mode) <= 8 ? 1 : 2));
19174 return true;
19175
19176 case ZERO_EXTEND:
19177 /* The zero extensions is often completely free on x86_64, so make
19178 it as cheap as possible. */
19179 if (TARGET_64BIT && mode == DImode
19180 && GET_MODE (XEXP (x, 0)) == SImode)
19181 *total = 1;
19182 else if (TARGET_ZERO_EXTEND_WITH_AND)
19183 *total = cost->add;
19184 else
19185 *total = cost->movzx;
19186 return false;
19187
19188 case SIGN_EXTEND:
19189 *total = cost->movsx;
19190 return false;
19191
19192 case ASHIFT:
19193 if (SCALAR_INT_MODE_P (mode)
19194 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
19195 && CONST_INT_P (XEXP (x, 1)))
19196 {
19197 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19198 if (value == 1)
19199 {
19200 *total = cost->add;
19201 return false;
19202 }
19203 if ((value == 2 || value == 3)
19204 && cost->lea <= cost->shift_const)
19205 {
19206 *total = cost->lea;
19207 return false;
19208 }
19209 }
19210 /* FALLTHRU */
19211
19212 case ROTATE:
19213 case ASHIFTRT:
19214 case LSHIFTRT:
19215 case ROTATERT:
19216 bool skip_op0, skip_op1;
19217 *total = ix86_shift_rotate_cost (cost, mode, CONSTANT_P (XEXP (x, 1)),
19218 CONST_INT_P (XEXP (x, 1))
19219 ? INTVAL (XEXP (x, 1)) : -1,
19220 speed,
19221 GET_CODE (XEXP (x, 1)) == AND,
19222 SUBREG_P (XEXP (x, 1))
19223 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND,
19224 &skip_op0, &skip_op1);
19225 if (skip_op0 || skip_op1)
19226 {
19227 if (!skip_op0)
19228 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
19229 if (!skip_op1)
19230 *total += rtx_cost (XEXP (x, 1), mode, code, 0, speed);
19231 return true;
19232 }
19233 return false;
19234
19235 case FMA:
19236 {
19237 rtx sub;
19238
19239 gcc_assert (FLOAT_MODE_P (mode));
19240 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
19241
19242 *total = ix86_vec_cost (mode,
19243 GET_MODE_INNER (mode) == SFmode
19244 ? cost->fmass : cost->fmasd);
19245 *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed);
19246
19247 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
19248 sub = XEXP (x, 0);
19249 if (GET_CODE (sub) == NEG)
19250 sub = XEXP (sub, 0);
19251 *total += rtx_cost (sub, mode, FMA, 0, speed);
19252
19253 sub = XEXP (x, 2);
19254 if (GET_CODE (sub) == NEG)
19255 sub = XEXP (sub, 0);
19256 *total += rtx_cost (sub, mode, FMA, 2, speed);
19257 return true;
19258 }
19259
19260 case MULT:
19261 if (!FLOAT_MODE_P (mode) && !VECTOR_MODE_P (mode))
19262 {
19263 rtx op0 = XEXP (x, 0);
19264 rtx op1 = XEXP (x, 1);
19265 int nbits;
19266 if (CONST_INT_P (XEXP (x, 1)))
19267 {
19268 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19269 for (nbits = 0; value != 0; value &= value - 1)
19270 nbits++;
19271 }
19272 else
19273 /* This is arbitrary. */
19274 nbits = 7;
19275
19276 /* Compute costs correctly for widening multiplication. */
19277 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
19278 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
19279 == GET_MODE_SIZE (mode))
19280 {
19281 int is_mulwiden = 0;
19282 machine_mode inner_mode = GET_MODE (op0);
19283
19284 if (GET_CODE (op0) == GET_CODE (op1))
19285 is_mulwiden = 1, op1 = XEXP (op1, 0);
19286 else if (CONST_INT_P (op1))
19287 {
19288 if (GET_CODE (op0) == SIGN_EXTEND)
19289 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
19290 == INTVAL (op1);
19291 else
19292 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
19293 }
19294
19295 if (is_mulwiden)
19296 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
19297 }
19298
19299 *total = (cost->mult_init[MODE_INDEX (mode)]
19300 + nbits * cost->mult_bit
19301 + rtx_cost (op0, mode, outer_code, opno, speed)
19302 + rtx_cost (op1, mode, outer_code, opno, speed));
19303
19304 return true;
19305 }
19306 *total = ix86_multiplication_cost (cost, mode);
19307 return false;
19308
19309 case DIV:
19310 case UDIV:
19311 case MOD:
19312 case UMOD:
19313 *total = ix86_division_cost (cost, mode);
19314 return false;
19315
19316 case PLUS:
19317 if (GET_MODE_CLASS (mode) == MODE_INT
19318 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
19319 {
19320 if (GET_CODE (XEXP (x, 0)) == PLUS
19321 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
19322 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
19323 && CONSTANT_P (XEXP (x, 1)))
19324 {
19325 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
19326 if (val == 2 || val == 4 || val == 8)
19327 {
19328 *total = cost->lea;
19329 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
19330 outer_code, opno, speed);
19331 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
19332 outer_code, opno, speed);
19333 *total += rtx_cost (XEXP (x, 1), mode,
19334 outer_code, opno, speed);
19335 return true;
19336 }
19337 }
19338 else if (GET_CODE (XEXP (x, 0)) == MULT
19339 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
19340 {
19341 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
19342 if (val == 2 || val == 4 || val == 8)
19343 {
19344 *total = cost->lea;
19345 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
19346 outer_code, opno, speed);
19347 *total += rtx_cost (XEXP (x, 1), mode,
19348 outer_code, opno, speed);
19349 return true;
19350 }
19351 }
19352 else if (GET_CODE (XEXP (x, 0)) == PLUS)
19353 {
19354 /* Add with carry, ignore the cost of adding a carry flag. */
19355 if (ix86_carry_flag_operator (XEXP (XEXP (x, 0), 0), mode))
19356 *total = cost->add;
19357 else
19358 {
19359 *total = cost->lea;
19360 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
19361 outer_code, opno, speed);
19362 }
19363
19364 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
19365 outer_code, opno, speed);
19366 *total += rtx_cost (XEXP (x, 1), mode,
19367 outer_code, opno, speed);
19368 return true;
19369 }
19370 }
19371 /* FALLTHRU */
19372
19373 case MINUS:
19374 /* Subtract with borrow, ignore the cost of subtracting a carry flag. */
19375 if (GET_MODE_CLASS (mode) == MODE_INT
19376 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
19377 && GET_CODE (XEXP (x, 0)) == MINUS
19378 && ix86_carry_flag_operator (XEXP (XEXP (x, 0), 1), mode))
19379 {
19380 *total = cost->add;
19381 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
19382 outer_code, opno, speed);
19383 *total += rtx_cost (XEXP (x, 1), mode,
19384 outer_code, opno, speed);
19385 return true;
19386 }
19387
19388 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19389 {
19390 *total = cost->addss;
19391 return false;
19392 }
19393 else if (X87_FLOAT_MODE_P (mode))
19394 {
19395 *total = cost->fadd;
19396 return false;
19397 }
19398 else if (FLOAT_MODE_P (mode))
19399 {
19400 *total = ix86_vec_cost (mode, cost->addss);
19401 return false;
19402 }
19403 /* FALLTHRU */
19404
19405 case AND:
19406 case IOR:
19407 case XOR:
19408 if (GET_MODE_CLASS (mode) == MODE_INT
19409 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
19410 {
19411 *total = (cost->add * 2
19412 + (rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
19413 << (GET_MODE (XEXP (x, 0)) != DImode))
19414 + (rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed)
19415 << (GET_MODE (XEXP (x, 1)) != DImode)));
19416 return true;
19417 }
19418 /* FALLTHRU */
19419
19420 case NEG:
19421 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19422 {
19423 *total = cost->sse_op;
19424 return false;
19425 }
19426 else if (X87_FLOAT_MODE_P (mode))
19427 {
19428 *total = cost->fchs;
19429 return false;
19430 }
19431 else if (FLOAT_MODE_P (mode))
19432 {
19433 *total = ix86_vec_cost (mode, cost->sse_op);
19434 return false;
19435 }
19436 /* FALLTHRU */
19437
19438 case NOT:
19439 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19440 *total = ix86_vec_cost (mode, cost->sse_op);
19441 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
19442 *total = cost->add * 2;
19443 else
19444 *total = cost->add;
19445 return false;
19446
19447 case COMPARE:
19448 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
19449 && XEXP (XEXP (x, 0), 1) == const1_rtx
19450 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
19451 && XEXP (x, 1) == const0_rtx)
19452 {
19453 /* This kind of construct is implemented using test[bwl].
19454 Treat it as if we had an AND. */
19455 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
19456 *total = (cost->add
19457 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, outer_code,
19458 opno, speed)
19459 + rtx_cost (const1_rtx, mode, outer_code, opno, speed));
19460 return true;
19461 }
19462
19463 /* The embedded comparison operand is completely free. */
19464 if (!general_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0)))
19465 && XEXP (x, 1) == const0_rtx)
19466 *total = 0;
19467
19468 return false;
19469
19470 case FLOAT_EXTEND:
19471 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
19472 *total = 0;
19473 else
19474 *total = ix86_vec_cost (mode, cost->addss);
19475 return false;
19476
19477 case FLOAT_TRUNCATE:
19478 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
19479 *total = cost->fadd;
19480 else
19481 *total = ix86_vec_cost (mode, cost->addss);
19482 return false;
19483
19484 case ABS:
19485 /* SSE requires memory load for the constant operand. It may make
19486 sense to account for this. Of course the constant operand may or
19487 may not be reused. */
19488 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19489 *total = cost->sse_op;
19490 else if (X87_FLOAT_MODE_P (mode))
19491 *total = cost->fabs;
19492 else if (FLOAT_MODE_P (mode))
19493 *total = ix86_vec_cost (mode, cost->sse_op);
19494 return false;
19495
19496 case SQRT:
19497 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19498 *total = mode == SFmode ? cost->sqrtss : cost->sqrtsd;
19499 else if (X87_FLOAT_MODE_P (mode))
19500 *total = cost->fsqrt;
19501 else if (FLOAT_MODE_P (mode))
19502 *total = ix86_vec_cost (mode,
19503 mode == SFmode ? cost->sqrtss : cost->sqrtsd);
19504 return false;
19505
19506 case UNSPEC:
19507 if (XINT (x, 1) == UNSPEC_TP)
19508 *total = 0;
19509 return false;
19510
19511 case VEC_SELECT:
19512 case VEC_CONCAT:
19513 case VEC_DUPLICATE:
19514 /* ??? Assume all of these vector manipulation patterns are
19515 recognizable. In which case they all pretty much have the
19516 same cost. */
19517 *total = cost->sse_op;
19518 return true;
19519 case VEC_MERGE:
19520 mask = XEXP (x, 2);
19521 /* This is masked instruction, assume the same cost,
19522 as nonmasked variant. */
19523 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
19524 *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed);
19525 else
19526 *total = cost->sse_op;
19527 return true;
19528
19529 default:
19530 return false;
19531 }
19532 }
19533
19534 #if TARGET_MACHO
19535
19536 static int current_machopic_label_num;
19537
19538 /* Given a symbol name and its associated stub, write out the
19539 definition of the stub. */
19540
19541 void
19542 machopic_output_stub (FILE *file, const char *symb, const char *stub)
19543 {
19544 unsigned int length;
19545 char *binder_name, *symbol_name, lazy_ptr_name[32];
19546 int label = ++current_machopic_label_num;
19547
19548 /* For 64-bit we shouldn't get here. */
19549 gcc_assert (!TARGET_64BIT);
19550
19551 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19552 symb = targetm.strip_name_encoding (symb);
19553
19554 length = strlen (stub);
19555 binder_name = XALLOCAVEC (char, length + 32);
19556 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
19557
19558 length = strlen (symb);
19559 symbol_name = XALLOCAVEC (char, length + 32);
19560 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
19561
19562 sprintf (lazy_ptr_name, "L%d$lz", label);
19563
19564 if (MACHOPIC_ATT_STUB)
19565 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
19566 else if (MACHOPIC_PURE)
19567 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
19568 else
19569 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
19570
19571 fprintf (file, "%s:\n", stub);
19572 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19573
19574 if (MACHOPIC_ATT_STUB)
19575 {
19576 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
19577 }
19578 else if (MACHOPIC_PURE)
19579 {
19580 /* PIC stub. */
19581 /* 25-byte PIC stub using "CALL get_pc_thunk". */
19582 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
19583 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
19584 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
19585 label, lazy_ptr_name, label);
19586 fprintf (file, "\tjmp\t*%%ecx\n");
19587 }
19588 else
19589 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
19590
19591 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
19592 it needs no stub-binding-helper. */
19593 if (MACHOPIC_ATT_STUB)
19594 return;
19595
19596 fprintf (file, "%s:\n", binder_name);
19597
19598 if (MACHOPIC_PURE)
19599 {
19600 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
19601 fprintf (file, "\tpushl\t%%ecx\n");
19602 }
19603 else
19604 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
19605
19606 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
19607
19608 /* N.B. Keep the correspondence of these
19609 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
19610 old-pic/new-pic/non-pic stubs; altering this will break
19611 compatibility with existing dylibs. */
19612 if (MACHOPIC_PURE)
19613 {
19614 /* 25-byte PIC stub using "CALL get_pc_thunk". */
19615 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
19616 }
19617 else
19618 /* 16-byte -mdynamic-no-pic stub. */
19619 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
19620
19621 fprintf (file, "%s:\n", lazy_ptr_name);
19622 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19623 fprintf (file, ASM_LONG "%s\n", binder_name);
19624 }
19625 #endif /* TARGET_MACHO */
19626
19627 /* Order the registers for register allocator. */
19628
19629 void
19630 x86_order_regs_for_local_alloc (void)
19631 {
19632 int pos = 0;
19633 int i;
19634
19635 /* First allocate the local general purpose registers. */
19636 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
19637 if (GENERAL_REGNO_P (i) && call_used_regs[i])
19638 reg_alloc_order [pos++] = i;
19639
19640 /* Global general purpose registers. */
19641 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
19642 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
19643 reg_alloc_order [pos++] = i;
19644
19645 /* x87 registers come first in case we are doing FP math
19646 using them. */
19647 if (!TARGET_SSE_MATH)
19648 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
19649 reg_alloc_order [pos++] = i;
19650
19651 /* SSE registers. */
19652 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
19653 reg_alloc_order [pos++] = i;
19654 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
19655 reg_alloc_order [pos++] = i;
19656
19657 /* Extended REX SSE registers. */
19658 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
19659 reg_alloc_order [pos++] = i;
19660
19661 /* Mask register. */
19662 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
19663 reg_alloc_order [pos++] = i;
19664
19665 /* x87 registers. */
19666 if (TARGET_SSE_MATH)
19667 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
19668 reg_alloc_order [pos++] = i;
19669
19670 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
19671 reg_alloc_order [pos++] = i;
19672
19673 /* Initialize the rest of array as we do not allocate some registers
19674 at all. */
19675 while (pos < FIRST_PSEUDO_REGISTER)
19676 reg_alloc_order [pos++] = 0;
19677 }
19678
19679 static bool
19680 ix86_ms_bitfield_layout_p (const_tree record_type)
19681 {
19682 return ((TARGET_MS_BITFIELD_LAYOUT
19683 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
19684 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
19685 }
19686
19687 /* Returns an expression indicating where the this parameter is
19688 located on entry to the FUNCTION. */
19689
19690 static rtx
19691 x86_this_parameter (tree function)
19692 {
19693 tree type = TREE_TYPE (function);
19694 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
19695 int nregs;
19696
19697 if (TARGET_64BIT)
19698 {
19699 const int *parm_regs;
19700
19701 if (ix86_function_type_abi (type) == MS_ABI)
19702 parm_regs = x86_64_ms_abi_int_parameter_registers;
19703 else
19704 parm_regs = x86_64_int_parameter_registers;
19705 return gen_rtx_REG (Pmode, parm_regs[aggr]);
19706 }
19707
19708 nregs = ix86_function_regparm (type, function);
19709
19710 if (nregs > 0 && !stdarg_p (type))
19711 {
19712 int regno;
19713 unsigned int ccvt = ix86_get_callcvt (type);
19714
19715 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
19716 regno = aggr ? DX_REG : CX_REG;
19717 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
19718 {
19719 regno = CX_REG;
19720 if (aggr)
19721 return gen_rtx_MEM (SImode,
19722 plus_constant (Pmode, stack_pointer_rtx, 4));
19723 }
19724 else
19725 {
19726 regno = AX_REG;
19727 if (aggr)
19728 {
19729 regno = DX_REG;
19730 if (nregs == 1)
19731 return gen_rtx_MEM (SImode,
19732 plus_constant (Pmode,
19733 stack_pointer_rtx, 4));
19734 }
19735 }
19736 return gen_rtx_REG (SImode, regno);
19737 }
19738
19739 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
19740 aggr ? 8 : 4));
19741 }
19742
19743 /* Determine whether x86_output_mi_thunk can succeed. */
19744
19745 static bool
19746 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
19747 const_tree function)
19748 {
19749 /* 64-bit can handle anything. */
19750 if (TARGET_64BIT)
19751 return true;
19752
19753 /* For 32-bit, everything's fine if we have one free register. */
19754 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
19755 return true;
19756
19757 /* Need a free register for vcall_offset. */
19758 if (vcall_offset)
19759 return false;
19760
19761 /* Need a free register for GOT references. */
19762 if (flag_pic && !targetm.binds_local_p (function))
19763 return false;
19764
19765 /* Otherwise ok. */
19766 return true;
19767 }
19768
19769 /* Output the assembler code for a thunk function. THUNK_DECL is the
19770 declaration for the thunk function itself, FUNCTION is the decl for
19771 the target function. DELTA is an immediate constant offset to be
19772 added to THIS. If VCALL_OFFSET is nonzero, the word at
19773 *(*this + vcall_offset) should be added to THIS. */
19774
19775 static void
19776 x86_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
19777 HOST_WIDE_INT vcall_offset, tree function)
19778 {
19779 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
19780 rtx this_param = x86_this_parameter (function);
19781 rtx this_reg, tmp, fnaddr;
19782 unsigned int tmp_regno;
19783 rtx_insn *insn;
19784
19785 if (TARGET_64BIT)
19786 tmp_regno = R10_REG;
19787 else
19788 {
19789 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
19790 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
19791 tmp_regno = AX_REG;
19792 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
19793 tmp_regno = DX_REG;
19794 else
19795 tmp_regno = CX_REG;
19796 }
19797
19798 emit_note (NOTE_INSN_PROLOGUE_END);
19799
19800 /* CET is enabled, insert EB instruction. */
19801 if ((flag_cf_protection & CF_BRANCH))
19802 emit_insn (gen_nop_endbr ());
19803
19804 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
19805 pull it in now and let DELTA benefit. */
19806 if (REG_P (this_param))
19807 this_reg = this_param;
19808 else if (vcall_offset)
19809 {
19810 /* Put the this parameter into %eax. */
19811 this_reg = gen_rtx_REG (Pmode, AX_REG);
19812 emit_move_insn (this_reg, this_param);
19813 }
19814 else
19815 this_reg = NULL_RTX;
19816
19817 /* Adjust the this parameter by a fixed constant. */
19818 if (delta)
19819 {
19820 rtx delta_rtx = GEN_INT (delta);
19821 rtx delta_dst = this_reg ? this_reg : this_param;
19822
19823 if (TARGET_64BIT)
19824 {
19825 if (!x86_64_general_operand (delta_rtx, Pmode))
19826 {
19827 tmp = gen_rtx_REG (Pmode, tmp_regno);
19828 emit_move_insn (tmp, delta_rtx);
19829 delta_rtx = tmp;
19830 }
19831 }
19832
19833 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
19834 }
19835
19836 /* Adjust the this parameter by a value stored in the vtable. */
19837 if (vcall_offset)
19838 {
19839 rtx vcall_addr, vcall_mem, this_mem;
19840
19841 tmp = gen_rtx_REG (Pmode, tmp_regno);
19842
19843 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
19844 if (Pmode != ptr_mode)
19845 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
19846 emit_move_insn (tmp, this_mem);
19847
19848 /* Adjust the this parameter. */
19849 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
19850 if (TARGET_64BIT
19851 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
19852 {
19853 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
19854 emit_move_insn (tmp2, GEN_INT (vcall_offset));
19855 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
19856 }
19857
19858 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
19859 if (Pmode != ptr_mode)
19860 emit_insn (gen_addsi_1_zext (this_reg,
19861 gen_rtx_REG (ptr_mode,
19862 REGNO (this_reg)),
19863 vcall_mem));
19864 else
19865 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
19866 }
19867
19868 /* If necessary, drop THIS back to its stack slot. */
19869 if (this_reg && this_reg != this_param)
19870 emit_move_insn (this_param, this_reg);
19871
19872 fnaddr = XEXP (DECL_RTL (function), 0);
19873 if (TARGET_64BIT)
19874 {
19875 if (!flag_pic || targetm.binds_local_p (function)
19876 || TARGET_PECOFF)
19877 ;
19878 else
19879 {
19880 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
19881 tmp = gen_rtx_CONST (Pmode, tmp);
19882 fnaddr = gen_const_mem (Pmode, tmp);
19883 }
19884 }
19885 else
19886 {
19887 if (!flag_pic || targetm.binds_local_p (function))
19888 ;
19889 #if TARGET_MACHO
19890 else if (TARGET_MACHO)
19891 {
19892 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
19893 fnaddr = XEXP (fnaddr, 0);
19894 }
19895 #endif /* TARGET_MACHO */
19896 else
19897 {
19898 tmp = gen_rtx_REG (Pmode, CX_REG);
19899 output_set_got (tmp, NULL_RTX);
19900
19901 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
19902 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
19903 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
19904 fnaddr = gen_const_mem (Pmode, fnaddr);
19905 }
19906 }
19907
19908 /* Our sibling call patterns do not allow memories, because we have no
19909 predicate that can distinguish between frame and non-frame memory.
19910 For our purposes here, we can get away with (ab)using a jump pattern,
19911 because we're going to do no optimization. */
19912 if (MEM_P (fnaddr))
19913 {
19914 if (sibcall_insn_operand (fnaddr, word_mode))
19915 {
19916 fnaddr = XEXP (DECL_RTL (function), 0);
19917 tmp = gen_rtx_MEM (QImode, fnaddr);
19918 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
19919 tmp = emit_call_insn (tmp);
19920 SIBLING_CALL_P (tmp) = 1;
19921 }
19922 else
19923 emit_jump_insn (gen_indirect_jump (fnaddr));
19924 }
19925 else
19926 {
19927 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
19928 {
19929 // CM_LARGE_PIC always uses pseudo PIC register which is
19930 // uninitialized. Since FUNCTION is local and calling it
19931 // doesn't go through PLT, we use scratch register %r11 as
19932 // PIC register and initialize it here.
19933 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
19934 ix86_init_large_pic_reg (tmp_regno);
19935 fnaddr = legitimize_pic_address (fnaddr,
19936 gen_rtx_REG (Pmode, tmp_regno));
19937 }
19938
19939 if (!sibcall_insn_operand (fnaddr, word_mode))
19940 {
19941 tmp = gen_rtx_REG (word_mode, tmp_regno);
19942 if (GET_MODE (fnaddr) != word_mode)
19943 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
19944 emit_move_insn (tmp, fnaddr);
19945 fnaddr = tmp;
19946 }
19947
19948 tmp = gen_rtx_MEM (QImode, fnaddr);
19949 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
19950 tmp = emit_call_insn (tmp);
19951 SIBLING_CALL_P (tmp) = 1;
19952 }
19953 emit_barrier ();
19954
19955 /* Emit just enough of rest_of_compilation to get the insns emitted. */
19956 insn = get_insns ();
19957 shorten_branches (insn);
19958 assemble_start_function (thunk_fndecl, fnname);
19959 final_start_function (insn, file, 1);
19960 final (insn, file, 1);
19961 final_end_function ();
19962 assemble_end_function (thunk_fndecl, fnname);
19963 }
19964
19965 static void
19966 x86_file_start (void)
19967 {
19968 default_file_start ();
19969 if (TARGET_16BIT)
19970 fputs ("\t.code16gcc\n", asm_out_file);
19971 #if TARGET_MACHO
19972 darwin_file_start ();
19973 #endif
19974 if (X86_FILE_START_VERSION_DIRECTIVE)
19975 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
19976 if (X86_FILE_START_FLTUSED)
19977 fputs ("\t.global\t__fltused\n", asm_out_file);
19978 if (ix86_asm_dialect == ASM_INTEL)
19979 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
19980 }
19981
19982 int
19983 x86_field_alignment (tree type, int computed)
19984 {
19985 machine_mode mode;
19986
19987 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
19988 return computed;
19989 if (TARGET_IAMCU)
19990 return iamcu_alignment (type, computed);
19991 mode = TYPE_MODE (strip_array_types (type));
19992 if (mode == DFmode || mode == DCmode
19993 || GET_MODE_CLASS (mode) == MODE_INT
19994 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
19995 return MIN (32, computed);
19996 return computed;
19997 }
19998
19999 /* Print call to TARGET to FILE. */
20000
20001 static void
20002 x86_print_call_or_nop (FILE *file, const char *target)
20003 {
20004 if (flag_nop_mcount || !strcmp (target, "nop"))
20005 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
20006 fprintf (file, "1:" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
20007 else
20008 fprintf (file, "1:\tcall\t%s\n", target);
20009 }
20010
20011 static bool
20012 current_fentry_name (const char **name)
20013 {
20014 tree attr = lookup_attribute ("fentry_name",
20015 DECL_ATTRIBUTES (current_function_decl));
20016 if (!attr)
20017 return false;
20018 *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
20019 return true;
20020 }
20021
20022 static bool
20023 current_fentry_section (const char **name)
20024 {
20025 tree attr = lookup_attribute ("fentry_section",
20026 DECL_ATTRIBUTES (current_function_decl));
20027 if (!attr)
20028 return false;
20029 *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
20030 return true;
20031 }
20032
20033 /* Output assembler code to FILE to increment profiler label # LABELNO
20034 for profiling a function entry. */
20035 void
20036 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
20037 {
20038 if (cfun->machine->endbr_queued_at_entrance)
20039 fprintf (file, "\t%s\n", TARGET_64BIT ? "endbr64" : "endbr32");
20040
20041 const char *mcount_name = MCOUNT_NAME;
20042
20043 if (current_fentry_name (&mcount_name))
20044 ;
20045 else if (fentry_name)
20046 mcount_name = fentry_name;
20047 else if (flag_fentry)
20048 mcount_name = MCOUNT_NAME_BEFORE_PROLOGUE;
20049
20050 if (TARGET_64BIT)
20051 {
20052 #ifndef NO_PROFILE_COUNTERS
20053 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
20054 #endif
20055
20056 if (!TARGET_PECOFF && flag_pic)
20057 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
20058 else
20059 x86_print_call_or_nop (file, mcount_name);
20060 }
20061 else if (flag_pic)
20062 {
20063 #ifndef NO_PROFILE_COUNTERS
20064 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
20065 LPREFIX, labelno);
20066 #endif
20067 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
20068 }
20069 else
20070 {
20071 #ifndef NO_PROFILE_COUNTERS
20072 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
20073 LPREFIX, labelno);
20074 #endif
20075 x86_print_call_or_nop (file, mcount_name);
20076 }
20077
20078 if (flag_record_mcount
20079 || lookup_attribute ("fentry_section",
20080 DECL_ATTRIBUTES (current_function_decl)))
20081 {
20082 const char *sname = "__mcount_loc";
20083
20084 if (current_fentry_section (&sname))
20085 ;
20086 else if (fentry_section)
20087 sname = fentry_section;
20088
20089 fprintf (file, "\t.section %s, \"a\",@progbits\n", sname);
20090 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
20091 fprintf (file, "\t.previous\n");
20092 }
20093 }
20094
20095 /* We don't have exact information about the insn sizes, but we may assume
20096 quite safely that we are informed about all 1 byte insns and memory
20097 address sizes. This is enough to eliminate unnecessary padding in
20098 99% of cases. */
20099
20100 int
20101 ix86_min_insn_size (rtx_insn *insn)
20102 {
20103 int l = 0, len;
20104
20105 if (!INSN_P (insn) || !active_insn_p (insn))
20106 return 0;
20107
20108 /* Discard alignments we've emit and jump instructions. */
20109 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
20110 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
20111 return 0;
20112
20113 /* Important case - calls are always 5 bytes.
20114 It is common to have many calls in the row. */
20115 if (CALL_P (insn)
20116 && symbolic_reference_mentioned_p (PATTERN (insn))
20117 && !SIBLING_CALL_P (insn))
20118 return 5;
20119 len = get_attr_length (insn);
20120 if (len <= 1)
20121 return 1;
20122
20123 /* For normal instructions we rely on get_attr_length being exact,
20124 with a few exceptions. */
20125 if (!JUMP_P (insn))
20126 {
20127 enum attr_type type = get_attr_type (insn);
20128
20129 switch (type)
20130 {
20131 case TYPE_MULTI:
20132 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
20133 || asm_noperands (PATTERN (insn)) >= 0)
20134 return 0;
20135 break;
20136 case TYPE_OTHER:
20137 case TYPE_FCMP:
20138 break;
20139 default:
20140 /* Otherwise trust get_attr_length. */
20141 return len;
20142 }
20143
20144 l = get_attr_length_address (insn);
20145 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
20146 l = 4;
20147 }
20148 if (l)
20149 return 1+l;
20150 else
20151 return 2;
20152 }
20153
20154 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
20155
20156 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
20157 window. */
20158
20159 static void
20160 ix86_avoid_jump_mispredicts (void)
20161 {
20162 rtx_insn *insn, *start = get_insns ();
20163 int nbytes = 0, njumps = 0;
20164 bool isjump = false;
20165
20166 /* Look for all minimal intervals of instructions containing 4 jumps.
20167 The intervals are bounded by START and INSN. NBYTES is the total
20168 size of instructions in the interval including INSN and not including
20169 START. When the NBYTES is smaller than 16 bytes, it is possible
20170 that the end of START and INSN ends up in the same 16byte page.
20171
20172 The smallest offset in the page INSN can start is the case where START
20173 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
20174 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
20175
20176 Don't consider asm goto as jump, while it can contain a jump, it doesn't
20177 have to, control transfer to label(s) can be performed through other
20178 means, and also we estimate minimum length of all asm stmts as 0. */
20179 for (insn = start; insn; insn = NEXT_INSN (insn))
20180 {
20181 int min_size;
20182
20183 if (LABEL_P (insn))
20184 {
20185 align_flags alignment = label_to_alignment (insn);
20186 int align = alignment.levels[0].log;
20187 int max_skip = alignment.levels[0].maxskip;
20188
20189 if (max_skip > 15)
20190 max_skip = 15;
20191 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
20192 already in the current 16 byte page, because otherwise
20193 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
20194 bytes to reach 16 byte boundary. */
20195 if (align <= 0
20196 || (align <= 3 && max_skip != (1 << align) - 1))
20197 max_skip = 0;
20198 if (dump_file)
20199 fprintf (dump_file, "Label %i with max_skip %i\n",
20200 INSN_UID (insn), max_skip);
20201 if (max_skip)
20202 {
20203 while (nbytes + max_skip >= 16)
20204 {
20205 start = NEXT_INSN (start);
20206 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
20207 || CALL_P (start))
20208 njumps--, isjump = true;
20209 else
20210 isjump = false;
20211 nbytes -= ix86_min_insn_size (start);
20212 }
20213 }
20214 continue;
20215 }
20216
20217 min_size = ix86_min_insn_size (insn);
20218 nbytes += min_size;
20219 if (dump_file)
20220 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
20221 INSN_UID (insn), min_size);
20222 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
20223 || CALL_P (insn))
20224 njumps++;
20225 else
20226 continue;
20227
20228 while (njumps > 3)
20229 {
20230 start = NEXT_INSN (start);
20231 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
20232 || CALL_P (start))
20233 njumps--, isjump = true;
20234 else
20235 isjump = false;
20236 nbytes -= ix86_min_insn_size (start);
20237 }
20238 gcc_assert (njumps >= 0);
20239 if (dump_file)
20240 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
20241 INSN_UID (start), INSN_UID (insn), nbytes);
20242
20243 if (njumps == 3 && isjump && nbytes < 16)
20244 {
20245 int padsize = 15 - nbytes + ix86_min_insn_size (insn);
20246
20247 if (dump_file)
20248 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
20249 INSN_UID (insn), padsize);
20250 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
20251 }
20252 }
20253 }
20254 #endif
20255
20256 /* AMD Athlon works faster
20257 when RET is not destination of conditional jump or directly preceded
20258 by other jump instruction. We avoid the penalty by inserting NOP just
20259 before the RET instructions in such cases. */
20260 static void
20261 ix86_pad_returns (void)
20262 {
20263 edge e;
20264 edge_iterator ei;
20265
20266 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20267 {
20268 basic_block bb = e->src;
20269 rtx_insn *ret = BB_END (bb);
20270 rtx_insn *prev;
20271 bool replace = false;
20272
20273 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
20274 || optimize_bb_for_size_p (bb))
20275 continue;
20276 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
20277 if (active_insn_p (prev) || LABEL_P (prev))
20278 break;
20279 if (prev && LABEL_P (prev))
20280 {
20281 edge e;
20282 edge_iterator ei;
20283
20284 FOR_EACH_EDGE (e, ei, bb->preds)
20285 if (EDGE_FREQUENCY (e) && e->src->index >= 0
20286 && !(e->flags & EDGE_FALLTHRU))
20287 {
20288 replace = true;
20289 break;
20290 }
20291 }
20292 if (!replace)
20293 {
20294 prev = prev_active_insn (ret);
20295 if (prev
20296 && ((JUMP_P (prev) && any_condjump_p (prev))
20297 || CALL_P (prev)))
20298 replace = true;
20299 /* Empty functions get branch mispredict even when
20300 the jump destination is not visible to us. */
20301 if (!prev && !optimize_function_for_size_p (cfun))
20302 replace = true;
20303 }
20304 if (replace)
20305 {
20306 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
20307 delete_insn (ret);
20308 }
20309 }
20310 }
20311
20312 /* Count the minimum number of instructions in BB. Return 4 if the
20313 number of instructions >= 4. */
20314
20315 static int
20316 ix86_count_insn_bb (basic_block bb)
20317 {
20318 rtx_insn *insn;
20319 int insn_count = 0;
20320
20321 /* Count number of instructions in this block. Return 4 if the number
20322 of instructions >= 4. */
20323 FOR_BB_INSNS (bb, insn)
20324 {
20325 /* Only happen in exit blocks. */
20326 if (JUMP_P (insn)
20327 && ANY_RETURN_P (PATTERN (insn)))
20328 break;
20329
20330 if (NONDEBUG_INSN_P (insn)
20331 && GET_CODE (PATTERN (insn)) != USE
20332 && GET_CODE (PATTERN (insn)) != CLOBBER)
20333 {
20334 insn_count++;
20335 if (insn_count >= 4)
20336 return insn_count;
20337 }
20338 }
20339
20340 return insn_count;
20341 }
20342
20343
20344 /* Count the minimum number of instructions in code path in BB.
20345 Return 4 if the number of instructions >= 4. */
20346
20347 static int
20348 ix86_count_insn (basic_block bb)
20349 {
20350 edge e;
20351 edge_iterator ei;
20352 int min_prev_count;
20353
20354 /* Only bother counting instructions along paths with no
20355 more than 2 basic blocks between entry and exit. Given
20356 that BB has an edge to exit, determine if a predecessor
20357 of BB has an edge from entry. If so, compute the number
20358 of instructions in the predecessor block. If there
20359 happen to be multiple such blocks, compute the minimum. */
20360 min_prev_count = 4;
20361 FOR_EACH_EDGE (e, ei, bb->preds)
20362 {
20363 edge prev_e;
20364 edge_iterator prev_ei;
20365
20366 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
20367 {
20368 min_prev_count = 0;
20369 break;
20370 }
20371 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
20372 {
20373 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
20374 {
20375 int count = ix86_count_insn_bb (e->src);
20376 if (count < min_prev_count)
20377 min_prev_count = count;
20378 break;
20379 }
20380 }
20381 }
20382
20383 if (min_prev_count < 4)
20384 min_prev_count += ix86_count_insn_bb (bb);
20385
20386 return min_prev_count;
20387 }
20388
20389 /* Pad short function to 4 instructions. */
20390
20391 static void
20392 ix86_pad_short_function (void)
20393 {
20394 edge e;
20395 edge_iterator ei;
20396
20397 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20398 {
20399 rtx_insn *ret = BB_END (e->src);
20400 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
20401 {
20402 int insn_count = ix86_count_insn (e->src);
20403
20404 /* Pad short function. */
20405 if (insn_count < 4)
20406 {
20407 rtx_insn *insn = ret;
20408
20409 /* Find epilogue. */
20410 while (insn
20411 && (!NOTE_P (insn)
20412 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
20413 insn = PREV_INSN (insn);
20414
20415 if (!insn)
20416 insn = ret;
20417
20418 /* Two NOPs count as one instruction. */
20419 insn_count = 2 * (4 - insn_count);
20420 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
20421 }
20422 }
20423 }
20424 }
20425
20426 /* Fix up a Windows system unwinder issue. If an EH region falls through into
20427 the epilogue, the Windows system unwinder will apply epilogue logic and
20428 produce incorrect offsets. This can be avoided by adding a nop between
20429 the last insn that can throw and the first insn of the epilogue. */
20430
20431 static void
20432 ix86_seh_fixup_eh_fallthru (void)
20433 {
20434 edge e;
20435 edge_iterator ei;
20436
20437 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20438 {
20439 rtx_insn *insn, *next;
20440
20441 /* Find the beginning of the epilogue. */
20442 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
20443 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
20444 break;
20445 if (insn == NULL)
20446 continue;
20447
20448 /* We only care about preceding insns that can throw. */
20449 insn = prev_active_insn (insn);
20450 if (insn == NULL || !can_throw_internal (insn))
20451 continue;
20452
20453 /* Do not separate calls from their debug information. */
20454 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
20455 if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION)
20456 insn = next;
20457 else
20458 break;
20459
20460 emit_insn_after (gen_nops (const1_rtx), insn);
20461 }
20462 }
20463
20464 /* Implement machine specific optimizations. We implement padding of returns
20465 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
20466 static void
20467 ix86_reorg (void)
20468 {
20469 /* We are freeing block_for_insn in the toplev to keep compatibility
20470 with old MDEP_REORGS that are not CFG based. Recompute it now. */
20471 compute_bb_for_insn ();
20472
20473 if (TARGET_SEH && current_function_has_exception_handlers ())
20474 ix86_seh_fixup_eh_fallthru ();
20475
20476 if (optimize && optimize_function_for_speed_p (cfun))
20477 {
20478 if (TARGET_PAD_SHORT_FUNCTION)
20479 ix86_pad_short_function ();
20480 else if (TARGET_PAD_RETURNS)
20481 ix86_pad_returns ();
20482 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
20483 if (TARGET_FOUR_JUMP_LIMIT)
20484 ix86_avoid_jump_mispredicts ();
20485 #endif
20486 }
20487 }
20488
20489 /* Return nonzero when QImode register that must be represented via REX prefix
20490 is used. */
20491 bool
20492 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
20493 {
20494 int i;
20495 extract_insn_cached (insn);
20496 for (i = 0; i < recog_data.n_operands; i++)
20497 if (GENERAL_REG_P (recog_data.operand[i])
20498 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
20499 return true;
20500 return false;
20501 }
20502
20503 /* Return true when INSN mentions register that must be encoded using REX
20504 prefix. */
20505 bool
20506 x86_extended_reg_mentioned_p (rtx insn)
20507 {
20508 subrtx_iterator::array_type array;
20509 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
20510 {
20511 const_rtx x = *iter;
20512 if (REG_P (x)
20513 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
20514 return true;
20515 }
20516 return false;
20517 }
20518
20519 /* If profitable, negate (without causing overflow) integer constant
20520 of mode MODE at location LOC. Return true in this case. */
20521 bool
20522 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
20523 {
20524 HOST_WIDE_INT val;
20525
20526 if (!CONST_INT_P (*loc))
20527 return false;
20528
20529 switch (mode)
20530 {
20531 case E_DImode:
20532 /* DImode x86_64 constants must fit in 32 bits. */
20533 gcc_assert (x86_64_immediate_operand (*loc, mode));
20534
20535 mode = SImode;
20536 break;
20537
20538 case E_SImode:
20539 case E_HImode:
20540 case E_QImode:
20541 break;
20542
20543 default:
20544 gcc_unreachable ();
20545 }
20546
20547 /* Avoid overflows. */
20548 if (mode_signbit_p (mode, *loc))
20549 return false;
20550
20551 val = INTVAL (*loc);
20552
20553 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
20554 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
20555 if ((val < 0 && val != -128)
20556 || val == 128)
20557 {
20558 *loc = GEN_INT (-val);
20559 return true;
20560 }
20561
20562 return false;
20563 }
20564
20565 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
20566 optabs would emit if we didn't have TFmode patterns. */
20567
20568 void
20569 x86_emit_floatuns (rtx operands[2])
20570 {
20571 rtx_code_label *neglab, *donelab;
20572 rtx i0, i1, f0, in, out;
20573 machine_mode mode, inmode;
20574
20575 inmode = GET_MODE (operands[1]);
20576 gcc_assert (inmode == SImode || inmode == DImode);
20577
20578 out = operands[0];
20579 in = force_reg (inmode, operands[1]);
20580 mode = GET_MODE (out);
20581 neglab = gen_label_rtx ();
20582 donelab = gen_label_rtx ();
20583 f0 = gen_reg_rtx (mode);
20584
20585 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
20586
20587 expand_float (out, in, 0);
20588
20589 emit_jump_insn (gen_jump (donelab));
20590 emit_barrier ();
20591
20592 emit_label (neglab);
20593
20594 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
20595 1, OPTAB_DIRECT);
20596 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
20597 1, OPTAB_DIRECT);
20598 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
20599
20600 expand_float (f0, i0, 0);
20601
20602 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
20603
20604 emit_label (donelab);
20605 }
20606 \f
20607 /* Target hook for scalar_mode_supported_p. */
20608 static bool
20609 ix86_scalar_mode_supported_p (scalar_mode mode)
20610 {
20611 if (DECIMAL_FLOAT_MODE_P (mode))
20612 return default_decimal_float_supported_p ();
20613 else if (mode == TFmode)
20614 return true;
20615 else
20616 return default_scalar_mode_supported_p (mode);
20617 }
20618
20619 /* Implements target hook vector_mode_supported_p. */
20620 static bool
20621 ix86_vector_mode_supported_p (machine_mode mode)
20622 {
20623 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
20624 return true;
20625 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
20626 return true;
20627 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
20628 return true;
20629 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
20630 return true;
20631 if ((TARGET_MMX || TARGET_MMX_WITH_SSE) && VALID_MMX_REG_MODE (mode))
20632 return true;
20633 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
20634 return true;
20635 return false;
20636 }
20637
20638 /* Target hook for c_mode_for_suffix. */
20639 static machine_mode
20640 ix86_c_mode_for_suffix (char suffix)
20641 {
20642 if (suffix == 'q')
20643 return TFmode;
20644 if (suffix == 'w')
20645 return XFmode;
20646
20647 return VOIDmode;
20648 }
20649
20650 /* Worker function for TARGET_MD_ASM_ADJUST.
20651
20652 We implement asm flag outputs, and maintain source compatibility
20653 with the old cc0-based compiler. */
20654
20655 static rtx_insn *
20656 ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &/*inputs*/,
20657 vec<const char *> &constraints,
20658 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
20659 {
20660 bool saw_asm_flag = false;
20661
20662 start_sequence ();
20663 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
20664 {
20665 const char *con = constraints[i];
20666 if (strncmp (con, "=@cc", 4) != 0)
20667 continue;
20668 con += 4;
20669 if (strchr (con, ',') != NULL)
20670 {
20671 error ("alternatives not allowed in %<asm%> flag output");
20672 continue;
20673 }
20674
20675 bool invert = false;
20676 if (con[0] == 'n')
20677 invert = true, con++;
20678
20679 machine_mode mode = CCmode;
20680 rtx_code code = UNKNOWN;
20681
20682 switch (con[0])
20683 {
20684 case 'a':
20685 if (con[1] == 0)
20686 mode = CCAmode, code = EQ;
20687 else if (con[1] == 'e' && con[2] == 0)
20688 mode = CCCmode, code = NE;
20689 break;
20690 case 'b':
20691 if (con[1] == 0)
20692 mode = CCCmode, code = EQ;
20693 else if (con[1] == 'e' && con[2] == 0)
20694 mode = CCAmode, code = NE;
20695 break;
20696 case 'c':
20697 if (con[1] == 0)
20698 mode = CCCmode, code = EQ;
20699 break;
20700 case 'e':
20701 if (con[1] == 0)
20702 mode = CCZmode, code = EQ;
20703 break;
20704 case 'g':
20705 if (con[1] == 0)
20706 mode = CCGCmode, code = GT;
20707 else if (con[1] == 'e' && con[2] == 0)
20708 mode = CCGCmode, code = GE;
20709 break;
20710 case 'l':
20711 if (con[1] == 0)
20712 mode = CCGCmode, code = LT;
20713 else if (con[1] == 'e' && con[2] == 0)
20714 mode = CCGCmode, code = LE;
20715 break;
20716 case 'o':
20717 if (con[1] == 0)
20718 mode = CCOmode, code = EQ;
20719 break;
20720 case 'p':
20721 if (con[1] == 0)
20722 mode = CCPmode, code = EQ;
20723 break;
20724 case 's':
20725 if (con[1] == 0)
20726 mode = CCSmode, code = EQ;
20727 break;
20728 case 'z':
20729 if (con[1] == 0)
20730 mode = CCZmode, code = EQ;
20731 break;
20732 }
20733 if (code == UNKNOWN)
20734 {
20735 error ("unknown %<asm%> flag output %qs", constraints[i]);
20736 continue;
20737 }
20738 if (invert)
20739 code = reverse_condition (code);
20740
20741 rtx dest = outputs[i];
20742 if (!saw_asm_flag)
20743 {
20744 /* This is the first asm flag output. Here we put the flags
20745 register in as the real output and adjust the condition to
20746 allow it. */
20747 constraints[i] = "=Bf";
20748 outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
20749 saw_asm_flag = true;
20750 }
20751 else
20752 {
20753 /* We don't need the flags register as output twice. */
20754 constraints[i] = "=X";
20755 outputs[i] = gen_rtx_SCRATCH (SImode);
20756 }
20757
20758 rtx x = gen_rtx_REG (mode, FLAGS_REG);
20759 x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
20760
20761 machine_mode dest_mode = GET_MODE (dest);
20762 if (!SCALAR_INT_MODE_P (dest_mode))
20763 {
20764 error ("invalid type for %<asm%> flag output");
20765 continue;
20766 }
20767
20768 if (dest_mode == DImode && !TARGET_64BIT)
20769 dest_mode = SImode;
20770
20771 if (dest_mode != QImode)
20772 {
20773 rtx destqi = gen_reg_rtx (QImode);
20774 emit_insn (gen_rtx_SET (destqi, x));
20775
20776 if (TARGET_ZERO_EXTEND_WITH_AND
20777 && optimize_function_for_speed_p (cfun))
20778 {
20779 x = force_reg (dest_mode, const0_rtx);
20780
20781 emit_insn (gen_movstrictqi
20782 (gen_lowpart (QImode, x), destqi));
20783 }
20784 else
20785 x = gen_rtx_ZERO_EXTEND (dest_mode, destqi);
20786 }
20787
20788 if (dest_mode != GET_MODE (dest))
20789 {
20790 rtx tmp = gen_reg_rtx (SImode);
20791
20792 emit_insn (gen_rtx_SET (tmp, x));
20793 emit_insn (gen_zero_extendsidi2 (dest, tmp));
20794 }
20795 else
20796 emit_insn (gen_rtx_SET (dest, x));
20797 }
20798 rtx_insn *seq = get_insns ();
20799 end_sequence ();
20800
20801 if (saw_asm_flag)
20802 return seq;
20803 else
20804 {
20805 /* If we had no asm flag outputs, clobber the flags. */
20806 clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
20807 SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
20808 return NULL;
20809 }
20810 }
20811
20812 /* Implements target vector targetm.asm.encode_section_info. */
20813
20814 static void ATTRIBUTE_UNUSED
20815 ix86_encode_section_info (tree decl, rtx rtl, int first)
20816 {
20817 default_encode_section_info (decl, rtl, first);
20818
20819 if (ix86_in_large_data_p (decl))
20820 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
20821 }
20822
20823 /* Worker function for REVERSE_CONDITION. */
20824
20825 enum rtx_code
20826 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
20827 {
20828 return (mode == CCFPmode
20829 ? reverse_condition_maybe_unordered (code)
20830 : reverse_condition (code));
20831 }
20832
20833 /* Output code to perform an x87 FP register move, from OPERANDS[1]
20834 to OPERANDS[0]. */
20835
20836 const char *
20837 output_387_reg_move (rtx_insn *insn, rtx *operands)
20838 {
20839 if (REG_P (operands[0]))
20840 {
20841 if (REG_P (operands[1])
20842 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
20843 {
20844 if (REGNO (operands[0]) == FIRST_STACK_REG)
20845 return output_387_ffreep (operands, 0);
20846 return "fstp\t%y0";
20847 }
20848 if (STACK_TOP_P (operands[0]))
20849 return "fld%Z1\t%y1";
20850 return "fst\t%y0";
20851 }
20852 else if (MEM_P (operands[0]))
20853 {
20854 gcc_assert (REG_P (operands[1]));
20855 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
20856 return "fstp%Z0\t%y0";
20857 else
20858 {
20859 /* There is no non-popping store to memory for XFmode.
20860 So if we need one, follow the store with a load. */
20861 if (GET_MODE (operands[0]) == XFmode)
20862 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
20863 else
20864 return "fst%Z0\t%y0";
20865 }
20866 }
20867 else
20868 gcc_unreachable();
20869 }
20870 #ifdef TARGET_SOLARIS
20871 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
20872
20873 static void
20874 i386_solaris_elf_named_section (const char *name, unsigned int flags,
20875 tree decl)
20876 {
20877 /* With Binutils 2.15, the "@unwind" marker must be specified on
20878 every occurrence of the ".eh_frame" section, not just the first
20879 one. */
20880 if (TARGET_64BIT
20881 && strcmp (name, ".eh_frame") == 0)
20882 {
20883 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
20884 flags & SECTION_WRITE ? "aw" : "a");
20885 return;
20886 }
20887
20888 #ifndef USE_GAS
20889 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
20890 {
20891 solaris_elf_asm_comdat_section (name, flags, decl);
20892 return;
20893 }
20894
20895 /* Solaris/x86 as uses the same syntax for the SHF_EXCLUDE flags as the
20896 SPARC assembler. One cannot mix single-letter flags and #exclude, so
20897 only emit the latter here. */
20898 if (flags & SECTION_EXCLUDE)
20899 {
20900 fprintf (asm_out_file, "\t.section\t%s,#exclude\n", name);
20901 return;
20902 }
20903 #endif
20904
20905 default_elf_asm_named_section (name, flags, decl);
20906 }
20907 #endif /* TARGET_SOLARIS */
20908
20909 /* Return the mangling of TYPE if it is an extended fundamental type. */
20910
20911 static const char *
20912 ix86_mangle_type (const_tree type)
20913 {
20914 type = TYPE_MAIN_VARIANT (type);
20915
20916 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
20917 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
20918 return NULL;
20919
20920 switch (TYPE_MODE (type))
20921 {
20922 case E_TFmode:
20923 /* __float128 is "g". */
20924 return "g";
20925 case E_XFmode:
20926 /* "long double" or __float80 is "e". */
20927 return "e";
20928 default:
20929 return NULL;
20930 }
20931 }
20932
20933 static GTY(()) tree ix86_tls_stack_chk_guard_decl;
20934
20935 static tree
20936 ix86_stack_protect_guard (void)
20937 {
20938 if (TARGET_SSP_TLS_GUARD)
20939 {
20940 tree type_node = lang_hooks.types.type_for_mode (ptr_mode, 1);
20941 int qual = ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg);
20942 tree type = build_qualified_type (type_node, qual);
20943 tree t;
20944
20945 if (global_options_set.x_ix86_stack_protector_guard_symbol_str)
20946 {
20947 t = ix86_tls_stack_chk_guard_decl;
20948
20949 if (t == NULL)
20950 {
20951 rtx x;
20952
20953 t = build_decl
20954 (UNKNOWN_LOCATION, VAR_DECL,
20955 get_identifier (ix86_stack_protector_guard_symbol_str),
20956 type);
20957 TREE_STATIC (t) = 1;
20958 TREE_PUBLIC (t) = 1;
20959 DECL_EXTERNAL (t) = 1;
20960 TREE_USED (t) = 1;
20961 TREE_THIS_VOLATILE (t) = 1;
20962 DECL_ARTIFICIAL (t) = 1;
20963 DECL_IGNORED_P (t) = 1;
20964
20965 /* Do not share RTL as the declaration is visible outside of
20966 current function. */
20967 x = DECL_RTL (t);
20968 RTX_FLAG (x, used) = 1;
20969
20970 ix86_tls_stack_chk_guard_decl = t;
20971 }
20972 }
20973 else
20974 {
20975 tree asptrtype = build_pointer_type (type);
20976
20977 t = build_int_cst (asptrtype, ix86_stack_protector_guard_offset);
20978 t = build2 (MEM_REF, asptrtype, t,
20979 build_int_cst (asptrtype, 0));
20980 TREE_THIS_VOLATILE (t) = 1;
20981 }
20982
20983 return t;
20984 }
20985
20986 return default_stack_protect_guard ();
20987 }
20988
20989 /* For 32-bit code we can save PIC register setup by using
20990 __stack_chk_fail_local hidden function instead of calling
20991 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
20992 register, so it is better to call __stack_chk_fail directly. */
20993
20994 static tree ATTRIBUTE_UNUSED
20995 ix86_stack_protect_fail (void)
20996 {
20997 return TARGET_64BIT
20998 ? default_external_stack_protect_fail ()
20999 : default_hidden_stack_protect_fail ();
21000 }
21001
21002 /* Select a format to encode pointers in exception handling data. CODE
21003 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
21004 true if the symbol may be affected by dynamic relocations.
21005
21006 ??? All x86 object file formats are capable of representing this.
21007 After all, the relocation needed is the same as for the call insn.
21008 Whether or not a particular assembler allows us to enter such, I
21009 guess we'll have to see. */
21010 int
21011 asm_preferred_eh_data_format (int code, int global)
21012 {
21013 if (flag_pic)
21014 {
21015 int type = DW_EH_PE_sdata8;
21016 if (!TARGET_64BIT
21017 || ix86_cmodel == CM_SMALL_PIC
21018 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
21019 type = DW_EH_PE_sdata4;
21020 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
21021 }
21022 if (ix86_cmodel == CM_SMALL
21023 || (ix86_cmodel == CM_MEDIUM && code))
21024 return DW_EH_PE_udata4;
21025 return DW_EH_PE_absptr;
21026 }
21027 \f
21028 /* Implement targetm.vectorize.builtin_vectorization_cost. */
21029 static int
21030 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
21031 tree vectype, int)
21032 {
21033 bool fp = false;
21034 machine_mode mode = TImode;
21035 int index;
21036 if (vectype != NULL)
21037 {
21038 fp = FLOAT_TYPE_P (vectype);
21039 mode = TYPE_MODE (vectype);
21040 }
21041
21042 switch (type_of_cost)
21043 {
21044 case scalar_stmt:
21045 return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
21046
21047 case scalar_load:
21048 /* load/store costs are relative to register move which is 2. Recompute
21049 it to COSTS_N_INSNS so everything have same base. */
21050 return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0]
21051 : ix86_cost->int_load [2]) / 2;
21052
21053 case scalar_store:
21054 return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0]
21055 : ix86_cost->int_store [2]) / 2;
21056
21057 case vector_stmt:
21058 return ix86_vec_cost (mode,
21059 fp ? ix86_cost->addss : ix86_cost->sse_op);
21060
21061 case vector_load:
21062 index = sse_store_index (mode);
21063 /* See PR82713 - we may end up being called on non-vector type. */
21064 if (index < 0)
21065 index = 2;
21066 return COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2;
21067
21068 case vector_store:
21069 index = sse_store_index (mode);
21070 /* See PR82713 - we may end up being called on non-vector type. */
21071 if (index < 0)
21072 index = 2;
21073 return COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2;
21074
21075 case vec_to_scalar:
21076 case scalar_to_vec:
21077 return ix86_vec_cost (mode, ix86_cost->sse_op);
21078
21079 /* We should have separate costs for unaligned loads and gather/scatter.
21080 Do that incrementally. */
21081 case unaligned_load:
21082 index = sse_store_index (mode);
21083 /* See PR82713 - we may end up being called on non-vector type. */
21084 if (index < 0)
21085 index = 2;
21086 return COSTS_N_INSNS (ix86_cost->sse_unaligned_load[index]) / 2;
21087
21088 case unaligned_store:
21089 index = sse_store_index (mode);
21090 /* See PR82713 - we may end up being called on non-vector type. */
21091 if (index < 0)
21092 index = 2;
21093 return COSTS_N_INSNS (ix86_cost->sse_unaligned_store[index]) / 2;
21094
21095 case vector_gather_load:
21096 return ix86_vec_cost (mode,
21097 COSTS_N_INSNS
21098 (ix86_cost->gather_static
21099 + ix86_cost->gather_per_elt
21100 * TYPE_VECTOR_SUBPARTS (vectype)) / 2);
21101
21102 case vector_scatter_store:
21103 return ix86_vec_cost (mode,
21104 COSTS_N_INSNS
21105 (ix86_cost->scatter_static
21106 + ix86_cost->scatter_per_elt
21107 * TYPE_VECTOR_SUBPARTS (vectype)) / 2);
21108
21109 case cond_branch_taken:
21110 return ix86_cost->cond_taken_branch_cost;
21111
21112 case cond_branch_not_taken:
21113 return ix86_cost->cond_not_taken_branch_cost;
21114
21115 case vec_perm:
21116 case vec_promote_demote:
21117 return ix86_vec_cost (mode, ix86_cost->sse_op);
21118
21119 case vec_construct:
21120 {
21121 /* N element inserts into SSE vectors. */
21122 int cost = TYPE_VECTOR_SUBPARTS (vectype) * ix86_cost->sse_op;
21123 /* One vinserti128 for combining two SSE vectors for AVX256. */
21124 if (GET_MODE_BITSIZE (mode) == 256)
21125 cost += ix86_vec_cost (mode, ix86_cost->addss);
21126 /* One vinserti64x4 and two vinserti128 for combining SSE
21127 and AVX256 vectors to AVX512. */
21128 else if (GET_MODE_BITSIZE (mode) == 512)
21129 cost += 3 * ix86_vec_cost (mode, ix86_cost->addss);
21130 return cost;
21131 }
21132
21133 default:
21134 gcc_unreachable ();
21135 }
21136 }
21137
21138 \f
21139 /* This function returns the calling abi specific va_list type node.
21140 It returns the FNDECL specific va_list type. */
21141
21142 static tree
21143 ix86_fn_abi_va_list (tree fndecl)
21144 {
21145 if (!TARGET_64BIT)
21146 return va_list_type_node;
21147 gcc_assert (fndecl != NULL_TREE);
21148
21149 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
21150 return ms_va_list_type_node;
21151 else
21152 return sysv_va_list_type_node;
21153 }
21154
21155 /* Returns the canonical va_list type specified by TYPE. If there
21156 is no valid TYPE provided, it return NULL_TREE. */
21157
21158 static tree
21159 ix86_canonical_va_list_type (tree type)
21160 {
21161 if (TARGET_64BIT)
21162 {
21163 if (lookup_attribute ("ms_abi va_list", TYPE_ATTRIBUTES (type)))
21164 return ms_va_list_type_node;
21165
21166 if ((TREE_CODE (type) == ARRAY_TYPE
21167 && integer_zerop (array_type_nelts (type)))
21168 || POINTER_TYPE_P (type))
21169 {
21170 tree elem_type = TREE_TYPE (type);
21171 if (TREE_CODE (elem_type) == RECORD_TYPE
21172 && lookup_attribute ("sysv_abi va_list",
21173 TYPE_ATTRIBUTES (elem_type)))
21174 return sysv_va_list_type_node;
21175 }
21176
21177 return NULL_TREE;
21178 }
21179
21180 return std_canonical_va_list_type (type);
21181 }
21182
21183 /* Iterate through the target-specific builtin types for va_list.
21184 IDX denotes the iterator, *PTREE is set to the result type of
21185 the va_list builtin, and *PNAME to its internal type.
21186 Returns zero if there is no element for this index, otherwise
21187 IDX should be increased upon the next call.
21188 Note, do not iterate a base builtin's name like __builtin_va_list.
21189 Used from c_common_nodes_and_builtins. */
21190
21191 static int
21192 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
21193 {
21194 if (TARGET_64BIT)
21195 {
21196 switch (idx)
21197 {
21198 default:
21199 break;
21200
21201 case 0:
21202 *ptree = ms_va_list_type_node;
21203 *pname = "__builtin_ms_va_list";
21204 return 1;
21205
21206 case 1:
21207 *ptree = sysv_va_list_type_node;
21208 *pname = "__builtin_sysv_va_list";
21209 return 1;
21210 }
21211 }
21212
21213 return 0;
21214 }
21215
21216 #undef TARGET_SCHED_DISPATCH
21217 #define TARGET_SCHED_DISPATCH ix86_bd_has_dispatch
21218 #undef TARGET_SCHED_DISPATCH_DO
21219 #define TARGET_SCHED_DISPATCH_DO ix86_bd_do_dispatch
21220 #undef TARGET_SCHED_REASSOCIATION_WIDTH
21221 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
21222 #undef TARGET_SCHED_REORDER
21223 #define TARGET_SCHED_REORDER ix86_atom_sched_reorder
21224 #undef TARGET_SCHED_ADJUST_PRIORITY
21225 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
21226 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
21227 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
21228 ix86_dependencies_evaluation_hook
21229
21230
21231 /* Implementation of reassociation_width target hook used by
21232 reassoc phase to identify parallelism level in reassociated
21233 tree. Statements tree_code is passed in OPC. Arguments type
21234 is passed in MODE. */
21235
21236 static int
21237 ix86_reassociation_width (unsigned int op, machine_mode mode)
21238 {
21239 int width = 1;
21240 /* Vector part. */
21241 if (VECTOR_MODE_P (mode))
21242 {
21243 int div = 1;
21244 if (INTEGRAL_MODE_P (mode))
21245 width = ix86_cost->reassoc_vec_int;
21246 else if (FLOAT_MODE_P (mode))
21247 width = ix86_cost->reassoc_vec_fp;
21248
21249 if (width == 1)
21250 return 1;
21251
21252 /* Integer vector instructions execute in FP unit
21253 and can execute 3 additions and one multiplication per cycle. */
21254 if ((ix86_tune == PROCESSOR_ZNVER1 || ix86_tune == PROCESSOR_ZNVER2)
21255 && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS)
21256 return 1;
21257
21258 /* Account for targets that splits wide vectors into multiple parts. */
21259 if (TARGET_AVX128_OPTIMAL && GET_MODE_BITSIZE (mode) > 128)
21260 div = GET_MODE_BITSIZE (mode) / 128;
21261 else if (TARGET_SSE_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 64)
21262 div = GET_MODE_BITSIZE (mode) / 64;
21263 width = (width + div - 1) / div;
21264 }
21265 /* Scalar part. */
21266 else if (INTEGRAL_MODE_P (mode))
21267 width = ix86_cost->reassoc_int;
21268 else if (FLOAT_MODE_P (mode))
21269 width = ix86_cost->reassoc_fp;
21270
21271 /* Avoid using too many registers in 32bit mode. */
21272 if (!TARGET_64BIT && width > 2)
21273 width = 2;
21274 return width;
21275 }
21276
21277 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
21278 place emms and femms instructions. */
21279
21280 static machine_mode
21281 ix86_preferred_simd_mode (scalar_mode mode)
21282 {
21283 if (!TARGET_SSE)
21284 return word_mode;
21285
21286 switch (mode)
21287 {
21288 case E_QImode:
21289 if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
21290 return V64QImode;
21291 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
21292 return V32QImode;
21293 else
21294 return V16QImode;
21295
21296 case E_HImode:
21297 if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
21298 return V32HImode;
21299 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
21300 return V16HImode;
21301 else
21302 return V8HImode;
21303
21304 case E_SImode:
21305 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
21306 return V16SImode;
21307 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
21308 return V8SImode;
21309 else
21310 return V4SImode;
21311
21312 case E_DImode:
21313 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
21314 return V8DImode;
21315 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
21316 return V4DImode;
21317 else
21318 return V2DImode;
21319
21320 case E_SFmode:
21321 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
21322 return V16SFmode;
21323 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
21324 return V8SFmode;
21325 else
21326 return V4SFmode;
21327
21328 case E_DFmode:
21329 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
21330 return V8DFmode;
21331 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
21332 return V4DFmode;
21333 else if (TARGET_SSE2)
21334 return V2DFmode;
21335 /* FALLTHRU */
21336
21337 default:
21338 return word_mode;
21339 }
21340 }
21341
21342 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
21343 vectors. If AVX512F is enabled then try vectorizing with 512bit,
21344 256bit and 128bit vectors. */
21345
21346 static void
21347 ix86_autovectorize_vector_sizes (vector_sizes *sizes, bool all)
21348 {
21349 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
21350 {
21351 sizes->safe_push (64);
21352 sizes->safe_push (32);
21353 sizes->safe_push (16);
21354 }
21355 else if (TARGET_AVX512F && all)
21356 {
21357 sizes->safe_push (32);
21358 sizes->safe_push (16);
21359 sizes->safe_push (64);
21360 }
21361 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
21362 {
21363 sizes->safe_push (32);
21364 sizes->safe_push (16);
21365 }
21366 else if (TARGET_AVX && all)
21367 {
21368 sizes->safe_push (16);
21369 sizes->safe_push (32);
21370 }
21371 else if (TARGET_MMX_WITH_SSE)
21372 sizes->safe_push (16);
21373
21374 if (TARGET_MMX_WITH_SSE)
21375 sizes->safe_push (8);
21376 }
21377
21378 /* Implemenation of targetm.vectorize.get_mask_mode. */
21379
21380 static opt_machine_mode
21381 ix86_get_mask_mode (poly_uint64 nunits, poly_uint64 vector_size)
21382 {
21383 unsigned elem_size = vector_size / nunits;
21384
21385 /* Scalar mask case. */
21386 if ((TARGET_AVX512F && vector_size == 64)
21387 || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
21388 {
21389 if (elem_size == 4 || elem_size == 8 || TARGET_AVX512BW)
21390 return smallest_int_mode_for_size (nunits);
21391 }
21392
21393 scalar_int_mode elem_mode
21394 = smallest_int_mode_for_size (elem_size * BITS_PER_UNIT);
21395
21396 gcc_assert (elem_size * nunits == vector_size);
21397
21398 return mode_for_vector (elem_mode, nunits);
21399 }
21400
21401 \f
21402
21403 /* Return class of registers which could be used for pseudo of MODE
21404 and of class RCLASS for spilling instead of memory. Return NO_REGS
21405 if it is not possible or non-profitable. */
21406
21407 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
21408
21409 static reg_class_t
21410 ix86_spill_class (reg_class_t rclass, machine_mode mode)
21411 {
21412 if (0 && TARGET_GENERAL_REGS_SSE_SPILL
21413 && TARGET_SSE2
21414 && TARGET_INTER_UNIT_MOVES_TO_VEC
21415 && TARGET_INTER_UNIT_MOVES_FROM_VEC
21416 && (mode == SImode || (TARGET_64BIT && mode == DImode))
21417 && INTEGER_CLASS_P (rclass))
21418 return ALL_SSE_REGS;
21419 return NO_REGS;
21420 }
21421
21422 /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation,
21423 but returns a lower bound. */
21424
21425 static unsigned int
21426 ix86_max_noce_ifcvt_seq_cost (edge e)
21427 {
21428 bool predictable_p = predictable_edge_p (e);
21429
21430 enum compiler_param param
21431 = (predictable_p
21432 ? PARAM_MAX_RTL_IF_CONVERSION_PREDICTABLE_COST
21433 : PARAM_MAX_RTL_IF_CONVERSION_UNPREDICTABLE_COST);
21434
21435 /* If we have a parameter set, use that, otherwise take a guess using
21436 BRANCH_COST. */
21437 if (global_options_set.x_param_values[param])
21438 return PARAM_VALUE (param);
21439 else
21440 return BRANCH_COST (true, predictable_p) * COSTS_N_INSNS (2);
21441 }
21442
21443 /* Return true if SEQ is a good candidate as a replacement for the
21444 if-convertible sequence described in IF_INFO. */
21445
21446 static bool
21447 ix86_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
21448 {
21449 if (TARGET_ONE_IF_CONV_INSN && if_info->speed_p)
21450 {
21451 int cmov_cnt = 0;
21452 /* Punt if SEQ contains more than one CMOV or FCMOV instruction.
21453 Maybe we should allow even more conditional moves as long as they
21454 are used far enough not to stall the CPU, or also consider
21455 IF_INFO->TEST_BB succ edge probabilities. */
21456 for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
21457 {
21458 rtx set = single_set (insn);
21459 if (!set)
21460 continue;
21461 if (GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
21462 continue;
21463 rtx src = SET_SRC (set);
21464 machine_mode mode = GET_MODE (src);
21465 if (GET_MODE_CLASS (mode) != MODE_INT
21466 && GET_MODE_CLASS (mode) != MODE_FLOAT)
21467 continue;
21468 if ((!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)))
21469 || (!REG_P (XEXP (src, 2)) && !MEM_P (XEXP (src, 2))))
21470 continue;
21471 /* insn is CMOV or FCMOV. */
21472 if (++cmov_cnt > 1)
21473 return false;
21474 }
21475 }
21476 return default_noce_conversion_profitable_p (seq, if_info);
21477 }
21478
21479 /* Implement targetm.vectorize.init_cost. */
21480
21481 static void *
21482 ix86_init_cost (class loop *)
21483 {
21484 unsigned *cost = XNEWVEC (unsigned, 3);
21485 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
21486 return cost;
21487 }
21488
21489 /* Implement targetm.vectorize.add_stmt_cost. */
21490
21491 static unsigned
21492 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
21493 class _stmt_vec_info *stmt_info, int misalign,
21494 enum vect_cost_model_location where)
21495 {
21496 unsigned *cost = (unsigned *) data;
21497 unsigned retval = 0;
21498 bool scalar_p
21499 = (kind == scalar_stmt || kind == scalar_load || kind == scalar_store);
21500
21501 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
21502 int stmt_cost = - 1;
21503
21504 bool fp = false;
21505 machine_mode mode = scalar_p ? SImode : TImode;
21506
21507 if (vectype != NULL)
21508 {
21509 fp = FLOAT_TYPE_P (vectype);
21510 mode = TYPE_MODE (vectype);
21511 if (scalar_p)
21512 mode = TYPE_MODE (TREE_TYPE (vectype));
21513 }
21514
21515 if ((kind == vector_stmt || kind == scalar_stmt)
21516 && stmt_info
21517 && stmt_info->stmt && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
21518 {
21519 tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
21520 /*machine_mode inner_mode = mode;
21521 if (VECTOR_MODE_P (mode))
21522 inner_mode = GET_MODE_INNER (mode);*/
21523
21524 switch (subcode)
21525 {
21526 case PLUS_EXPR:
21527 case POINTER_PLUS_EXPR:
21528 case MINUS_EXPR:
21529 if (kind == scalar_stmt)
21530 {
21531 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
21532 stmt_cost = ix86_cost->addss;
21533 else if (X87_FLOAT_MODE_P (mode))
21534 stmt_cost = ix86_cost->fadd;
21535 else
21536 stmt_cost = ix86_cost->add;
21537 }
21538 else
21539 stmt_cost = ix86_vec_cost (mode, fp ? ix86_cost->addss
21540 : ix86_cost->sse_op);
21541 break;
21542
21543 case MULT_EXPR:
21544 case WIDEN_MULT_EXPR:
21545 case MULT_HIGHPART_EXPR:
21546 stmt_cost = ix86_multiplication_cost (ix86_cost, mode);
21547 break;
21548 case NEGATE_EXPR:
21549 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
21550 stmt_cost = ix86_cost->sse_op;
21551 else if (X87_FLOAT_MODE_P (mode))
21552 stmt_cost = ix86_cost->fchs;
21553 else if (VECTOR_MODE_P (mode))
21554 stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
21555 else
21556 stmt_cost = ix86_cost->add;
21557 break;
21558 case TRUNC_DIV_EXPR:
21559 case CEIL_DIV_EXPR:
21560 case FLOOR_DIV_EXPR:
21561 case ROUND_DIV_EXPR:
21562 case TRUNC_MOD_EXPR:
21563 case CEIL_MOD_EXPR:
21564 case FLOOR_MOD_EXPR:
21565 case RDIV_EXPR:
21566 case ROUND_MOD_EXPR:
21567 case EXACT_DIV_EXPR:
21568 stmt_cost = ix86_division_cost (ix86_cost, mode);
21569 break;
21570
21571 case RSHIFT_EXPR:
21572 case LSHIFT_EXPR:
21573 case LROTATE_EXPR:
21574 case RROTATE_EXPR:
21575 {
21576 tree op2 = gimple_assign_rhs2 (stmt_info->stmt);
21577 stmt_cost = ix86_shift_rotate_cost
21578 (ix86_cost, mode,
21579 TREE_CODE (op2) == INTEGER_CST,
21580 cst_and_fits_in_hwi (op2) ? int_cst_value (op2) : -1,
21581 true, false, false, NULL, NULL);
21582 }
21583 break;
21584 case NOP_EXPR:
21585 /* Only sign-conversions are free. */
21586 if (tree_nop_conversion_p
21587 (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)),
21588 TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
21589 stmt_cost = 0;
21590 break;
21591
21592 case BIT_IOR_EXPR:
21593 case ABS_EXPR:
21594 case ABSU_EXPR:
21595 case MIN_EXPR:
21596 case MAX_EXPR:
21597 case BIT_XOR_EXPR:
21598 case BIT_AND_EXPR:
21599 case BIT_NOT_EXPR:
21600 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
21601 stmt_cost = ix86_cost->sse_op;
21602 else if (VECTOR_MODE_P (mode))
21603 stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
21604 else
21605 stmt_cost = ix86_cost->add;
21606 break;
21607 default:
21608 break;
21609 }
21610 }
21611
21612 combined_fn cfn;
21613 if ((kind == vector_stmt || kind == scalar_stmt)
21614 && stmt_info
21615 && stmt_info->stmt
21616 && (cfn = gimple_call_combined_fn (stmt_info->stmt)) != CFN_LAST)
21617 switch (cfn)
21618 {
21619 case CFN_FMA:
21620 stmt_cost = ix86_vec_cost (mode,
21621 mode == SFmode ? ix86_cost->fmass
21622 : ix86_cost->fmasd);
21623 break;
21624 default:
21625 break;
21626 }
21627
21628 /* If we do elementwise loads into a vector then we are bound by
21629 latency and execution resources for the many scalar loads
21630 (AGU and load ports). Try to account for this by scaling the
21631 construction cost by the number of elements involved. */
21632 if ((kind == vec_construct || kind == vec_to_scalar)
21633 && stmt_info
21634 && (STMT_VINFO_TYPE (stmt_info) == load_vec_info_type
21635 || STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
21636 && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE
21637 && TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF (stmt_info))) != INTEGER_CST)
21638 {
21639 stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
21640 stmt_cost *= (TYPE_VECTOR_SUBPARTS (vectype) + 1);
21641 }
21642 if (stmt_cost == -1)
21643 stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
21644
21645 /* Penalize DFmode vector operations for Bonnell. */
21646 if (TARGET_BONNELL && kind == vector_stmt
21647 && vectype && GET_MODE_INNER (TYPE_MODE (vectype)) == DFmode)
21648 stmt_cost *= 5; /* FIXME: The value here is arbitrary. */
21649
21650 /* Statements in an inner loop relative to the loop being
21651 vectorized are weighted more heavily. The value here is
21652 arbitrary and could potentially be improved with analysis. */
21653 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
21654 count *= 50; /* FIXME. */
21655
21656 retval = (unsigned) (count * stmt_cost);
21657
21658 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
21659 for Silvermont as it has out of order integer pipeline and can execute
21660 2 scalar instruction per tick, but has in order SIMD pipeline. */
21661 if ((TARGET_SILVERMONT || TARGET_GOLDMONT || TARGET_GOLDMONT_PLUS
21662 || TARGET_TREMONT || TARGET_INTEL) && stmt_info && stmt_info->stmt)
21663 {
21664 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
21665 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
21666 retval = (retval * 17) / 10;
21667 }
21668
21669 cost[where] += retval;
21670
21671 return retval;
21672 }
21673
21674 /* Implement targetm.vectorize.finish_cost. */
21675
21676 static void
21677 ix86_finish_cost (void *data, unsigned *prologue_cost,
21678 unsigned *body_cost, unsigned *epilogue_cost)
21679 {
21680 unsigned *cost = (unsigned *) data;
21681 *prologue_cost = cost[vect_prologue];
21682 *body_cost = cost[vect_body];
21683 *epilogue_cost = cost[vect_epilogue];
21684 }
21685
21686 /* Implement targetm.vectorize.destroy_cost_data. */
21687
21688 static void
21689 ix86_destroy_cost_data (void *data)
21690 {
21691 free (data);
21692 }
21693
21694 /* Validate target specific memory model bits in VAL. */
21695
21696 static unsigned HOST_WIDE_INT
21697 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
21698 {
21699 enum memmodel model = memmodel_from_int (val);
21700 bool strong;
21701
21702 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
21703 |MEMMODEL_MASK)
21704 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
21705 {
21706 warning (OPT_Winvalid_memory_model,
21707 "unknown architecture specific memory model");
21708 return MEMMODEL_SEQ_CST;
21709 }
21710 strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
21711 if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
21712 {
21713 warning (OPT_Winvalid_memory_model,
21714 "%<HLE_ACQUIRE%> not used with %<ACQUIRE%> or stronger "
21715 "memory model");
21716 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
21717 }
21718 if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
21719 {
21720 warning (OPT_Winvalid_memory_model,
21721 "%<HLE_RELEASE%> not used with %<RELEASE%> or stronger "
21722 "memory model");
21723 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
21724 }
21725 return val;
21726 }
21727
21728 /* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int,
21729 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
21730 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
21731 or number of vecsize_mangle variants that should be emitted. */
21732
21733 static int
21734 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
21735 struct cgraph_simd_clone *clonei,
21736 tree base_type, int num)
21737 {
21738 int ret = 1;
21739
21740 if (clonei->simdlen
21741 && (clonei->simdlen < 2
21742 || clonei->simdlen > 1024
21743 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
21744 {
21745 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
21746 "unsupported simdlen %d", clonei->simdlen);
21747 return 0;
21748 }
21749
21750 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
21751 if (TREE_CODE (ret_type) != VOID_TYPE)
21752 switch (TYPE_MODE (ret_type))
21753 {
21754 case E_QImode:
21755 case E_HImode:
21756 case E_SImode:
21757 case E_DImode:
21758 case E_SFmode:
21759 case E_DFmode:
21760 /* case E_SCmode: */
21761 /* case E_DCmode: */
21762 if (!AGGREGATE_TYPE_P (ret_type))
21763 break;
21764 /* FALLTHRU */
21765 default:
21766 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
21767 "unsupported return type %qT for simd", ret_type);
21768 return 0;
21769 }
21770
21771 tree t;
21772 int i;
21773 tree type_arg_types = TYPE_ARG_TYPES (TREE_TYPE (node->decl));
21774 bool decl_arg_p = (node->definition || type_arg_types == NULL_TREE);
21775
21776 for (t = (decl_arg_p ? DECL_ARGUMENTS (node->decl) : type_arg_types), i = 0;
21777 t && t != void_list_node; t = TREE_CHAIN (t), i++)
21778 {
21779 tree arg_type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t);
21780 switch (TYPE_MODE (arg_type))
21781 {
21782 case E_QImode:
21783 case E_HImode:
21784 case E_SImode:
21785 case E_DImode:
21786 case E_SFmode:
21787 case E_DFmode:
21788 /* case E_SCmode: */
21789 /* case E_DCmode: */
21790 if (!AGGREGATE_TYPE_P (arg_type))
21791 break;
21792 /* FALLTHRU */
21793 default:
21794 if (clonei->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM)
21795 break;
21796 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
21797 "unsupported argument type %qT for simd", arg_type);
21798 return 0;
21799 }
21800 }
21801
21802 if (!TREE_PUBLIC (node->decl))
21803 {
21804 /* If the function isn't exported, we can pick up just one ISA
21805 for the clones. */
21806 if (TARGET_AVX512F)
21807 clonei->vecsize_mangle = 'e';
21808 else if (TARGET_AVX2)
21809 clonei->vecsize_mangle = 'd';
21810 else if (TARGET_AVX)
21811 clonei->vecsize_mangle = 'c';
21812 else
21813 clonei->vecsize_mangle = 'b';
21814 ret = 1;
21815 }
21816 else
21817 {
21818 clonei->vecsize_mangle = "bcde"[num];
21819 ret = 4;
21820 }
21821 clonei->mask_mode = VOIDmode;
21822 switch (clonei->vecsize_mangle)
21823 {
21824 case 'b':
21825 clonei->vecsize_int = 128;
21826 clonei->vecsize_float = 128;
21827 break;
21828 case 'c':
21829 clonei->vecsize_int = 128;
21830 clonei->vecsize_float = 256;
21831 break;
21832 case 'd':
21833 clonei->vecsize_int = 256;
21834 clonei->vecsize_float = 256;
21835 break;
21836 case 'e':
21837 clonei->vecsize_int = 512;
21838 clonei->vecsize_float = 512;
21839 if (TYPE_MODE (base_type) == QImode)
21840 clonei->mask_mode = DImode;
21841 else
21842 clonei->mask_mode = SImode;
21843 break;
21844 }
21845 if (clonei->simdlen == 0)
21846 {
21847 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
21848 clonei->simdlen = clonei->vecsize_int;
21849 else
21850 clonei->simdlen = clonei->vecsize_float;
21851 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
21852 }
21853 else if (clonei->simdlen > 16)
21854 {
21855 /* For compatibility with ICC, use the same upper bounds
21856 for simdlen. In particular, for CTYPE below, use the return type,
21857 unless the function returns void, in that case use the characteristic
21858 type. If it is possible for given SIMDLEN to pass CTYPE value
21859 in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs
21860 for 64-bit code), accept that SIMDLEN, otherwise warn and don't
21861 emit corresponding clone. */
21862 tree ctype = ret_type;
21863 if (TREE_CODE (ret_type) == VOID_TYPE)
21864 ctype = base_type;
21865 int cnt = GET_MODE_BITSIZE (TYPE_MODE (ctype)) * clonei->simdlen;
21866 if (SCALAR_INT_MODE_P (TYPE_MODE (ctype)))
21867 cnt /= clonei->vecsize_int;
21868 else
21869 cnt /= clonei->vecsize_float;
21870 if (cnt > (TARGET_64BIT ? 16 : 8))
21871 {
21872 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
21873 "unsupported simdlen %d", clonei->simdlen);
21874 return 0;
21875 }
21876 }
21877 return ret;
21878 }
21879
21880 /* If SIMD clone NODE can't be used in a vectorized loop
21881 in current function, return -1, otherwise return a badness of using it
21882 (0 if it is most desirable from vecsize_mangle point of view, 1
21883 slightly less desirable, etc.). */
21884
21885 static int
21886 ix86_simd_clone_usable (struct cgraph_node *node)
21887 {
21888 switch (node->simdclone->vecsize_mangle)
21889 {
21890 case 'b':
21891 if (!TARGET_SSE2)
21892 return -1;
21893 if (!TARGET_AVX)
21894 return 0;
21895 return TARGET_AVX2 ? 2 : 1;
21896 case 'c':
21897 if (!TARGET_AVX)
21898 return -1;
21899 return TARGET_AVX2 ? 1 : 0;
21900 case 'd':
21901 if (!TARGET_AVX2)
21902 return -1;
21903 return 0;
21904 case 'e':
21905 if (!TARGET_AVX512F)
21906 return -1;
21907 return 0;
21908 default:
21909 gcc_unreachable ();
21910 }
21911 }
21912
21913 /* This function adjusts the unroll factor based on
21914 the hardware capabilities. For ex, bdver3 has
21915 a loop buffer which makes unrolling of smaller
21916 loops less important. This function decides the
21917 unroll factor using number of memory references
21918 (value 32 is used) as a heuristic. */
21919
21920 static unsigned
21921 ix86_loop_unroll_adjust (unsigned nunroll, class loop *loop)
21922 {
21923 basic_block *bbs;
21924 rtx_insn *insn;
21925 unsigned i;
21926 unsigned mem_count = 0;
21927
21928 if (!TARGET_ADJUST_UNROLL)
21929 return nunroll;
21930
21931 /* Count the number of memory references within the loop body.
21932 This value determines the unrolling factor for bdver3 and bdver4
21933 architectures. */
21934 subrtx_iterator::array_type array;
21935 bbs = get_loop_body (loop);
21936 for (i = 0; i < loop->num_nodes; i++)
21937 FOR_BB_INSNS (bbs[i], insn)
21938 if (NONDEBUG_INSN_P (insn))
21939 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
21940 if (const_rtx x = *iter)
21941 if (MEM_P (x))
21942 {
21943 machine_mode mode = GET_MODE (x);
21944 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
21945 if (n_words > 4)
21946 mem_count += 2;
21947 else
21948 mem_count += 1;
21949 }
21950 free (bbs);
21951
21952 if (mem_count && mem_count <=32)
21953 return MIN (nunroll, 32 / mem_count);
21954
21955 return nunroll;
21956 }
21957
21958
21959 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
21960
21961 static bool
21962 ix86_float_exceptions_rounding_supported_p (void)
21963 {
21964 /* For x87 floating point with standard excess precision handling,
21965 there is no adddf3 pattern (since x87 floating point only has
21966 XFmode operations) so the default hook implementation gets this
21967 wrong. */
21968 return TARGET_80387 || (TARGET_SSE && TARGET_SSE_MATH);
21969 }
21970
21971 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
21972
21973 static void
21974 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
21975 {
21976 if (!TARGET_80387 && !(TARGET_SSE && TARGET_SSE_MATH))
21977 return;
21978 tree exceptions_var = create_tmp_var_raw (integer_type_node);
21979 if (TARGET_80387)
21980 {
21981 tree fenv_index_type = build_index_type (size_int (6));
21982 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
21983 tree fenv_var = create_tmp_var_raw (fenv_type);
21984 TREE_ADDRESSABLE (fenv_var) = 1;
21985 tree fenv_ptr = build_pointer_type (fenv_type);
21986 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
21987 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
21988 tree fnstenv = get_ix86_builtin (IX86_BUILTIN_FNSTENV);
21989 tree fldenv = get_ix86_builtin (IX86_BUILTIN_FLDENV);
21990 tree fnstsw = get_ix86_builtin (IX86_BUILTIN_FNSTSW);
21991 tree fnclex = get_ix86_builtin (IX86_BUILTIN_FNCLEX);
21992 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
21993 tree hold_fnclex = build_call_expr (fnclex, 0);
21994 fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv,
21995 NULL_TREE, NULL_TREE);
21996 *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var,
21997 hold_fnclex);
21998 *clear = build_call_expr (fnclex, 0);
21999 tree sw_var = create_tmp_var_raw (short_unsigned_type_node);
22000 tree fnstsw_call = build_call_expr (fnstsw, 0);
22001 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
22002 sw_var, fnstsw_call);
22003 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
22004 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
22005 exceptions_var, exceptions_x87);
22006 *update = build2 (COMPOUND_EXPR, integer_type_node,
22007 sw_mod, update_mod);
22008 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
22009 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
22010 }
22011 if (TARGET_SSE && TARGET_SSE_MATH)
22012 {
22013 tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node);
22014 tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node);
22015 tree stmxcsr = get_ix86_builtin (IX86_BUILTIN_STMXCSR);
22016 tree ldmxcsr = get_ix86_builtin (IX86_BUILTIN_LDMXCSR);
22017 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
22018 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
22019 mxcsr_orig_var, stmxcsr_hold_call);
22020 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
22021 mxcsr_orig_var,
22022 build_int_cst (unsigned_type_node, 0x1f80));
22023 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
22024 build_int_cst (unsigned_type_node, 0xffffffc0));
22025 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
22026 mxcsr_mod_var, hold_mod_val);
22027 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
22028 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
22029 hold_assign_orig, hold_assign_mod);
22030 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
22031 ldmxcsr_hold_call);
22032 if (*hold)
22033 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
22034 else
22035 *hold = hold_all;
22036 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
22037 if (*clear)
22038 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
22039 ldmxcsr_clear_call);
22040 else
22041 *clear = ldmxcsr_clear_call;
22042 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
22043 tree exceptions_sse = fold_convert (integer_type_node,
22044 stxmcsr_update_call);
22045 if (*update)
22046 {
22047 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
22048 exceptions_var, exceptions_sse);
22049 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
22050 exceptions_var, exceptions_mod);
22051 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
22052 exceptions_assign);
22053 }
22054 else
22055 *update = build2 (MODIFY_EXPR, integer_type_node,
22056 exceptions_var, exceptions_sse);
22057 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
22058 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
22059 ldmxcsr_update_call);
22060 }
22061 tree atomic_feraiseexcept
22062 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
22063 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
22064 1, exceptions_var);
22065 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
22066 atomic_feraiseexcept_call);
22067 }
22068
22069 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
22070 /* For i386, common symbol is local only for non-PIE binaries. For
22071 x86-64, common symbol is local only for non-PIE binaries or linker
22072 supports copy reloc in PIE binaries. */
22073
22074 static bool
22075 ix86_binds_local_p (const_tree exp)
22076 {
22077 return default_binds_local_p_3 (exp, flag_shlib != 0, true, true,
22078 (!flag_pic
22079 || (TARGET_64BIT
22080 && HAVE_LD_PIE_COPYRELOC != 0)));
22081 }
22082 #endif
22083
22084 /* If MEM is in the form of [base+offset], extract the two parts
22085 of address and set to BASE and OFFSET, otherwise return false. */
22086
22087 static bool
22088 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
22089 {
22090 rtx addr;
22091
22092 gcc_assert (MEM_P (mem));
22093
22094 addr = XEXP (mem, 0);
22095
22096 if (GET_CODE (addr) == CONST)
22097 addr = XEXP (addr, 0);
22098
22099 if (REG_P (addr) || GET_CODE (addr) == SYMBOL_REF)
22100 {
22101 *base = addr;
22102 *offset = const0_rtx;
22103 return true;
22104 }
22105
22106 if (GET_CODE (addr) == PLUS
22107 && (REG_P (XEXP (addr, 0))
22108 || GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
22109 && CONST_INT_P (XEXP (addr, 1)))
22110 {
22111 *base = XEXP (addr, 0);
22112 *offset = XEXP (addr, 1);
22113 return true;
22114 }
22115
22116 return false;
22117 }
22118
22119 /* Given OPERANDS of consecutive load/store, check if we can merge
22120 them into move multiple. LOAD is true if they are load instructions.
22121 MODE is the mode of memory operands. */
22122
22123 bool
22124 ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
22125 machine_mode mode)
22126 {
22127 HOST_WIDE_INT offval_1, offval_2, msize;
22128 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
22129
22130 if (load)
22131 {
22132 mem_1 = operands[1];
22133 mem_2 = operands[3];
22134 reg_1 = operands[0];
22135 reg_2 = operands[2];
22136 }
22137 else
22138 {
22139 mem_1 = operands[0];
22140 mem_2 = operands[2];
22141 reg_1 = operands[1];
22142 reg_2 = operands[3];
22143 }
22144
22145 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
22146
22147 if (REGNO (reg_1) != REGNO (reg_2))
22148 return false;
22149
22150 /* Check if the addresses are in the form of [base+offset]. */
22151 if (!extract_base_offset_in_addr (mem_1, &base_1, &offset_1))
22152 return false;
22153 if (!extract_base_offset_in_addr (mem_2, &base_2, &offset_2))
22154 return false;
22155
22156 /* Check if the bases are the same. */
22157 if (!rtx_equal_p (base_1, base_2))
22158 return false;
22159
22160 offval_1 = INTVAL (offset_1);
22161 offval_2 = INTVAL (offset_2);
22162 msize = GET_MODE_SIZE (mode);
22163 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
22164 if (offval_1 + msize != offval_2)
22165 return false;
22166
22167 return true;
22168 }
22169
22170 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
22171
22172 static bool
22173 ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
22174 optimization_type opt_type)
22175 {
22176 switch (op)
22177 {
22178 case asin_optab:
22179 case acos_optab:
22180 case log1p_optab:
22181 case exp_optab:
22182 case exp10_optab:
22183 case exp2_optab:
22184 case expm1_optab:
22185 case ldexp_optab:
22186 case scalb_optab:
22187 case round_optab:
22188 return opt_type == OPTIMIZE_FOR_SPEED;
22189
22190 case rint_optab:
22191 if (SSE_FLOAT_MODE_P (mode1)
22192 && TARGET_SSE_MATH
22193 && !flag_trapping_math
22194 && !TARGET_SSE4_1)
22195 return opt_type == OPTIMIZE_FOR_SPEED;
22196 return true;
22197
22198 case floor_optab:
22199 case ceil_optab:
22200 case btrunc_optab:
22201 if (SSE_FLOAT_MODE_P (mode1)
22202 && TARGET_SSE_MATH
22203 && !flag_trapping_math
22204 && TARGET_SSE4_1)
22205 return true;
22206 return opt_type == OPTIMIZE_FOR_SPEED;
22207
22208 case rsqrt_optab:
22209 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p ();
22210
22211 default:
22212 return true;
22213 }
22214 }
22215
22216 /* Address space support.
22217
22218 This is not "far pointers" in the 16-bit sense, but an easy way
22219 to use %fs and %gs segment prefixes. Therefore:
22220
22221 (a) All address spaces have the same modes,
22222 (b) All address spaces have the same addresss forms,
22223 (c) While %fs and %gs are technically subsets of the generic
22224 address space, they are probably not subsets of each other.
22225 (d) Since we have no access to the segment base register values
22226 without resorting to a system call, we cannot convert a
22227 non-default address space to a default address space.
22228 Therefore we do not claim %fs or %gs are subsets of generic.
22229
22230 Therefore we can (mostly) use the default hooks. */
22231
22232 /* All use of segmentation is assumed to make address 0 valid. */
22233
22234 static bool
22235 ix86_addr_space_zero_address_valid (addr_space_t as)
22236 {
22237 return as != ADDR_SPACE_GENERIC;
22238 }
22239
22240 static void
22241 ix86_init_libfuncs (void)
22242 {
22243 if (TARGET_64BIT)
22244 {
22245 set_optab_libfunc (sdivmod_optab, TImode, "__divmodti4");
22246 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
22247 }
22248 else
22249 {
22250 set_optab_libfunc (sdivmod_optab, DImode, "__divmoddi4");
22251 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
22252 }
22253
22254 #if TARGET_MACHO
22255 darwin_rename_builtins ();
22256 #endif
22257 }
22258
22259 /* Set the value of FLT_EVAL_METHOD in float.h. When using only the
22260 FPU, assume that the fpcw is set to extended precision; when using
22261 only SSE, rounding is correct; when using both SSE and the FPU,
22262 the rounding precision is indeterminate, since either may be chosen
22263 apparently at random. */
22264
22265 static enum flt_eval_method
22266 ix86_excess_precision (enum excess_precision_type type)
22267 {
22268 switch (type)
22269 {
22270 case EXCESS_PRECISION_TYPE_FAST:
22271 /* The fastest type to promote to will always be the native type,
22272 whether that occurs with implicit excess precision or
22273 otherwise. */
22274 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
22275 case EXCESS_PRECISION_TYPE_STANDARD:
22276 case EXCESS_PRECISION_TYPE_IMPLICIT:
22277 /* Otherwise, the excess precision we want when we are
22278 in a standards compliant mode, and the implicit precision we
22279 provide would be identical were it not for the unpredictable
22280 cases. */
22281 if (!TARGET_80387)
22282 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
22283 else if (!TARGET_MIX_SSE_I387)
22284 {
22285 if (!(TARGET_SSE && TARGET_SSE_MATH))
22286 return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE;
22287 else if (TARGET_SSE2)
22288 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
22289 }
22290
22291 /* If we are in standards compliant mode, but we know we will
22292 calculate in unpredictable precision, return
22293 FLT_EVAL_METHOD_FLOAT. There is no reason to introduce explicit
22294 excess precision if the target can't guarantee it will honor
22295 it. */
22296 return (type == EXCESS_PRECISION_TYPE_STANDARD
22297 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
22298 : FLT_EVAL_METHOD_UNPREDICTABLE);
22299 default:
22300 gcc_unreachable ();
22301 }
22302
22303 return FLT_EVAL_METHOD_UNPREDICTABLE;
22304 }
22305
22306 /* Implement PUSH_ROUNDING. On 386, we have pushw instruction that
22307 decrements by exactly 2 no matter what the position was, there is no pushb.
22308
22309 But as CIE data alignment factor on this arch is -4 for 32bit targets
22310 and -8 for 64bit targets, we need to make sure all stack pointer adjustments
22311 are in multiple of 4 for 32bit targets and 8 for 64bit targets. */
22312
22313 poly_int64
22314 ix86_push_rounding (poly_int64 bytes)
22315 {
22316 return ROUND_UP (bytes, UNITS_PER_WORD);
22317 }
22318
22319 /* Target-specific selftests. */
22320
22321 #if CHECKING_P
22322
22323 namespace selftest {
22324
22325 /* Verify that hard regs are dumped as expected (in compact mode). */
22326
22327 static void
22328 ix86_test_dumping_hard_regs ()
22329 {
22330 ASSERT_RTL_DUMP_EQ ("(reg:SI ax)", gen_raw_REG (SImode, 0));
22331 ASSERT_RTL_DUMP_EQ ("(reg:SI dx)", gen_raw_REG (SImode, 1));
22332 }
22333
22334 /* Test dumping an insn with repeated references to the same SCRATCH,
22335 to verify the rtx_reuse code. */
22336
22337 static void
22338 ix86_test_dumping_memory_blockage ()
22339 {
22340 set_new_first_and_last_insn (NULL, NULL);
22341
22342 rtx pat = gen_memory_blockage ();
22343 rtx_reuse_manager r;
22344 r.preprocess (pat);
22345
22346 /* Verify that the repeated references to the SCRATCH show use
22347 reuse IDS. The first should be prefixed with a reuse ID,
22348 and the second should be dumped as a "reuse_rtx" of that ID.
22349 The expected string assumes Pmode == DImode. */
22350 if (Pmode == DImode)
22351 ASSERT_RTL_DUMP_EQ_WITH_REUSE
22352 ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0 A8])\n"
22353 " (unspec:BLK [\n"
22354 " (mem/v:BLK (reuse_rtx 0) [0 A8])\n"
22355 " ] UNSPEC_MEMORY_BLOCKAGE)))\n", pat, &r);
22356 }
22357
22358 /* Verify loading an RTL dump; specifically a dump of copying
22359 a param on x86_64 from a hard reg into the frame.
22360 This test is target-specific since the dump contains target-specific
22361 hard reg names. */
22362
22363 static void
22364 ix86_test_loading_dump_fragment_1 ()
22365 {
22366 rtl_dump_test t (SELFTEST_LOCATION,
22367 locate_file ("x86_64/copy-hard-reg-into-frame.rtl"));
22368
22369 rtx_insn *insn = get_insn_by_uid (1);
22370
22371 /* The block structure and indentation here is purely for
22372 readability; it mirrors the structure of the rtx. */
22373 tree mem_expr;
22374 {
22375 rtx pat = PATTERN (insn);
22376 ASSERT_EQ (SET, GET_CODE (pat));
22377 {
22378 rtx dest = SET_DEST (pat);
22379 ASSERT_EQ (MEM, GET_CODE (dest));
22380 /* Verify the "/c" was parsed. */
22381 ASSERT_TRUE (RTX_FLAG (dest, call));
22382 ASSERT_EQ (SImode, GET_MODE (dest));
22383 {
22384 rtx addr = XEXP (dest, 0);
22385 ASSERT_EQ (PLUS, GET_CODE (addr));
22386 ASSERT_EQ (DImode, GET_MODE (addr));
22387 {
22388 rtx lhs = XEXP (addr, 0);
22389 /* Verify that the "frame" REG was consolidated. */
22390 ASSERT_RTX_PTR_EQ (frame_pointer_rtx, lhs);
22391 }
22392 {
22393 rtx rhs = XEXP (addr, 1);
22394 ASSERT_EQ (CONST_INT, GET_CODE (rhs));
22395 ASSERT_EQ (-4, INTVAL (rhs));
22396 }
22397 }
22398 /* Verify the "[1 i+0 S4 A32]" was parsed. */
22399 ASSERT_EQ (1, MEM_ALIAS_SET (dest));
22400 /* "i" should have been handled by synthesizing a global int
22401 variable named "i". */
22402 mem_expr = MEM_EXPR (dest);
22403 ASSERT_NE (mem_expr, NULL);
22404 ASSERT_EQ (VAR_DECL, TREE_CODE (mem_expr));
22405 ASSERT_EQ (integer_type_node, TREE_TYPE (mem_expr));
22406 ASSERT_EQ (IDENTIFIER_NODE, TREE_CODE (DECL_NAME (mem_expr)));
22407 ASSERT_STREQ ("i", IDENTIFIER_POINTER (DECL_NAME (mem_expr)));
22408 /* "+0". */
22409 ASSERT_TRUE (MEM_OFFSET_KNOWN_P (dest));
22410 ASSERT_EQ (0, MEM_OFFSET (dest));
22411 /* "S4". */
22412 ASSERT_EQ (4, MEM_SIZE (dest));
22413 /* "A32. */
22414 ASSERT_EQ (32, MEM_ALIGN (dest));
22415 }
22416 {
22417 rtx src = SET_SRC (pat);
22418 ASSERT_EQ (REG, GET_CODE (src));
22419 ASSERT_EQ (SImode, GET_MODE (src));
22420 ASSERT_EQ (5, REGNO (src));
22421 tree reg_expr = REG_EXPR (src);
22422 /* "i" here should point to the same var as for the MEM_EXPR. */
22423 ASSERT_EQ (reg_expr, mem_expr);
22424 }
22425 }
22426 }
22427
22428 /* Verify that the RTL loader copes with a call_insn dump.
22429 This test is target-specific since the dump contains a target-specific
22430 hard reg name. */
22431
22432 static void
22433 ix86_test_loading_call_insn ()
22434 {
22435 /* The test dump includes register "xmm0", where requires TARGET_SSE
22436 to exist. */
22437 if (!TARGET_SSE)
22438 return;
22439
22440 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/call-insn.rtl"));
22441
22442 rtx_insn *insn = get_insns ();
22443 ASSERT_EQ (CALL_INSN, GET_CODE (insn));
22444
22445 /* "/j". */
22446 ASSERT_TRUE (RTX_FLAG (insn, jump));
22447
22448 rtx pat = PATTERN (insn);
22449 ASSERT_EQ (CALL, GET_CODE (SET_SRC (pat)));
22450
22451 /* Verify REG_NOTES. */
22452 {
22453 /* "(expr_list:REG_CALL_DECL". */
22454 ASSERT_EQ (EXPR_LIST, GET_CODE (REG_NOTES (insn)));
22455 rtx_expr_list *note0 = as_a <rtx_expr_list *> (REG_NOTES (insn));
22456 ASSERT_EQ (REG_CALL_DECL, REG_NOTE_KIND (note0));
22457
22458 /* "(expr_list:REG_EH_REGION (const_int 0 [0])". */
22459 rtx_expr_list *note1 = note0->next ();
22460 ASSERT_EQ (REG_EH_REGION, REG_NOTE_KIND (note1));
22461
22462 ASSERT_EQ (NULL, note1->next ());
22463 }
22464
22465 /* Verify CALL_INSN_FUNCTION_USAGE. */
22466 {
22467 /* "(expr_list:DF (use (reg:DF 21 xmm0))". */
22468 rtx_expr_list *usage
22469 = as_a <rtx_expr_list *> (CALL_INSN_FUNCTION_USAGE (insn));
22470 ASSERT_EQ (EXPR_LIST, GET_CODE (usage));
22471 ASSERT_EQ (DFmode, GET_MODE (usage));
22472 ASSERT_EQ (USE, GET_CODE (usage->element ()));
22473 ASSERT_EQ (NULL, usage->next ());
22474 }
22475 }
22476
22477 /* Verify that the RTL loader copes a dump from print_rtx_function.
22478 This test is target-specific since the dump contains target-specific
22479 hard reg names. */
22480
22481 static void
22482 ix86_test_loading_full_dump ()
22483 {
22484 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/times-two.rtl"));
22485
22486 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
22487
22488 rtx_insn *insn_1 = get_insn_by_uid (1);
22489 ASSERT_EQ (NOTE, GET_CODE (insn_1));
22490
22491 rtx_insn *insn_7 = get_insn_by_uid (7);
22492 ASSERT_EQ (INSN, GET_CODE (insn_7));
22493 ASSERT_EQ (PARALLEL, GET_CODE (PATTERN (insn_7)));
22494
22495 rtx_insn *insn_15 = get_insn_by_uid (15);
22496 ASSERT_EQ (INSN, GET_CODE (insn_15));
22497 ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
22498
22499 /* Verify crtl->return_rtx. */
22500 ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
22501 ASSERT_EQ (0, REGNO (crtl->return_rtx));
22502 ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
22503 }
22504
22505 /* Verify that the RTL loader copes with UNSPEC and UNSPEC_VOLATILE insns.
22506 In particular, verify that it correctly loads the 2nd operand.
22507 This test is target-specific since these are machine-specific
22508 operands (and enums). */
22509
22510 static void
22511 ix86_test_loading_unspec ()
22512 {
22513 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/unspec.rtl"));
22514
22515 ASSERT_STREQ ("test_unspec", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
22516
22517 ASSERT_TRUE (cfun);
22518
22519 /* Test of an UNSPEC. */
22520 rtx_insn *insn = get_insns ();
22521 ASSERT_EQ (INSN, GET_CODE (insn));
22522 rtx set = single_set (insn);
22523 ASSERT_NE (NULL, set);
22524 rtx dst = SET_DEST (set);
22525 ASSERT_EQ (MEM, GET_CODE (dst));
22526 rtx src = SET_SRC (set);
22527 ASSERT_EQ (UNSPEC, GET_CODE (src));
22528 ASSERT_EQ (BLKmode, GET_MODE (src));
22529 ASSERT_EQ (UNSPEC_MEMORY_BLOCKAGE, XINT (src, 1));
22530
22531 rtx v0 = XVECEXP (src, 0, 0);
22532
22533 /* Verify that the two uses of the first SCRATCH have pointer
22534 equality. */
22535 rtx scratch_a = XEXP (dst, 0);
22536 ASSERT_EQ (SCRATCH, GET_CODE (scratch_a));
22537
22538 rtx scratch_b = XEXP (v0, 0);
22539 ASSERT_EQ (SCRATCH, GET_CODE (scratch_b));
22540
22541 ASSERT_EQ (scratch_a, scratch_b);
22542
22543 /* Verify that the two mems are thus treated as equal. */
22544 ASSERT_TRUE (rtx_equal_p (dst, v0));
22545
22546 /* Verify the the insn is recognized. */
22547 ASSERT_NE(-1, recog_memoized (insn));
22548
22549 /* Test of an UNSPEC_VOLATILE, which has its own enum values. */
22550 insn = NEXT_INSN (insn);
22551 ASSERT_EQ (INSN, GET_CODE (insn));
22552
22553 set = single_set (insn);
22554 ASSERT_NE (NULL, set);
22555
22556 src = SET_SRC (set);
22557 ASSERT_EQ (UNSPEC_VOLATILE, GET_CODE (src));
22558 ASSERT_EQ (UNSPECV_RDTSCP, XINT (src, 1));
22559 }
22560
22561 /* Run all target-specific selftests. */
22562
22563 static void
22564 ix86_run_selftests (void)
22565 {
22566 ix86_test_dumping_hard_regs ();
22567 ix86_test_dumping_memory_blockage ();
22568
22569 /* Various tests of loading RTL dumps, here because they contain
22570 ix86-isms (e.g. names of hard regs). */
22571 ix86_test_loading_dump_fragment_1 ();
22572 ix86_test_loading_call_insn ();
22573 ix86_test_loading_full_dump ();
22574 ix86_test_loading_unspec ();
22575 }
22576
22577 } // namespace selftest
22578
22579 #endif /* CHECKING_P */
22580
22581 /* Initialize the GCC target structure. */
22582 #undef TARGET_RETURN_IN_MEMORY
22583 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
22584
22585 #undef TARGET_LEGITIMIZE_ADDRESS
22586 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
22587
22588 #undef TARGET_ATTRIBUTE_TABLE
22589 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
22590 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
22591 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
22592 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
22593 # undef TARGET_MERGE_DECL_ATTRIBUTES
22594 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
22595 #endif
22596
22597 #undef TARGET_COMP_TYPE_ATTRIBUTES
22598 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
22599
22600 #undef TARGET_INIT_BUILTINS
22601 #define TARGET_INIT_BUILTINS ix86_init_builtins
22602 #undef TARGET_BUILTIN_DECL
22603 #define TARGET_BUILTIN_DECL ix86_builtin_decl
22604 #undef TARGET_EXPAND_BUILTIN
22605 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
22606
22607 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
22608 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
22609 ix86_builtin_vectorized_function
22610
22611 #undef TARGET_VECTORIZE_BUILTIN_GATHER
22612 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
22613
22614 #undef TARGET_VECTORIZE_BUILTIN_SCATTER
22615 #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
22616
22617 #undef TARGET_BUILTIN_RECIPROCAL
22618 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
22619
22620 #undef TARGET_ASM_FUNCTION_EPILOGUE
22621 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
22622
22623 #undef TARGET_ENCODE_SECTION_INFO
22624 #ifndef SUBTARGET_ENCODE_SECTION_INFO
22625 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
22626 #else
22627 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
22628 #endif
22629
22630 #undef TARGET_ASM_OPEN_PAREN
22631 #define TARGET_ASM_OPEN_PAREN ""
22632 #undef TARGET_ASM_CLOSE_PAREN
22633 #define TARGET_ASM_CLOSE_PAREN ""
22634
22635 #undef TARGET_ASM_BYTE_OP
22636 #define TARGET_ASM_BYTE_OP ASM_BYTE
22637
22638 #undef TARGET_ASM_ALIGNED_HI_OP
22639 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
22640 #undef TARGET_ASM_ALIGNED_SI_OP
22641 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
22642 #ifdef ASM_QUAD
22643 #undef TARGET_ASM_ALIGNED_DI_OP
22644 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
22645 #endif
22646
22647 #undef TARGET_PROFILE_BEFORE_PROLOGUE
22648 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
22649
22650 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
22651 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
22652
22653 #undef TARGET_ASM_UNALIGNED_HI_OP
22654 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
22655 #undef TARGET_ASM_UNALIGNED_SI_OP
22656 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
22657 #undef TARGET_ASM_UNALIGNED_DI_OP
22658 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
22659
22660 #undef TARGET_PRINT_OPERAND
22661 #define TARGET_PRINT_OPERAND ix86_print_operand
22662 #undef TARGET_PRINT_OPERAND_ADDRESS
22663 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
22664 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
22665 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
22666 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
22667 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
22668
22669 #undef TARGET_SCHED_INIT_GLOBAL
22670 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
22671 #undef TARGET_SCHED_ADJUST_COST
22672 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
22673 #undef TARGET_SCHED_ISSUE_RATE
22674 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
22675 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
22676 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
22677 ia32_multipass_dfa_lookahead
22678 #undef TARGET_SCHED_MACRO_FUSION_P
22679 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
22680 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
22681 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
22682
22683 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
22684 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
22685
22686 #undef TARGET_MEMMODEL_CHECK
22687 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
22688
22689 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
22690 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
22691
22692 #ifdef HAVE_AS_TLS
22693 #undef TARGET_HAVE_TLS
22694 #define TARGET_HAVE_TLS true
22695 #endif
22696 #undef TARGET_CANNOT_FORCE_CONST_MEM
22697 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
22698 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
22699 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
22700
22701 #undef TARGET_DELEGITIMIZE_ADDRESS
22702 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
22703
22704 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
22705 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P ix86_const_not_ok_for_debug_p
22706
22707 #undef TARGET_MS_BITFIELD_LAYOUT_P
22708 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
22709
22710 #if TARGET_MACHO
22711 #undef TARGET_BINDS_LOCAL_P
22712 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
22713 #else
22714 #undef TARGET_BINDS_LOCAL_P
22715 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
22716 #endif
22717 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
22718 #undef TARGET_BINDS_LOCAL_P
22719 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
22720 #endif
22721
22722 #undef TARGET_ASM_OUTPUT_MI_THUNK
22723 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
22724 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
22725 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
22726
22727 #undef TARGET_ASM_FILE_START
22728 #define TARGET_ASM_FILE_START x86_file_start
22729
22730 #undef TARGET_OPTION_OVERRIDE
22731 #define TARGET_OPTION_OVERRIDE ix86_option_override
22732
22733 #undef TARGET_REGISTER_MOVE_COST
22734 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
22735 #undef TARGET_MEMORY_MOVE_COST
22736 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
22737 #undef TARGET_RTX_COSTS
22738 #define TARGET_RTX_COSTS ix86_rtx_costs
22739 #undef TARGET_ADDRESS_COST
22740 #define TARGET_ADDRESS_COST ix86_address_cost
22741
22742 #undef TARGET_FLAGS_REGNUM
22743 #define TARGET_FLAGS_REGNUM FLAGS_REG
22744 #undef TARGET_FIXED_CONDITION_CODE_REGS
22745 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
22746 #undef TARGET_CC_MODES_COMPATIBLE
22747 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
22748
22749 #undef TARGET_MACHINE_DEPENDENT_REORG
22750 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
22751
22752 #undef TARGET_BUILD_BUILTIN_VA_LIST
22753 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
22754
22755 #undef TARGET_FOLD_BUILTIN
22756 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
22757
22758 #undef TARGET_GIMPLE_FOLD_BUILTIN
22759 #define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin
22760
22761 #undef TARGET_COMPARE_VERSION_PRIORITY
22762 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
22763
22764 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
22765 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
22766 ix86_generate_version_dispatcher_body
22767
22768 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
22769 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
22770 ix86_get_function_versions_dispatcher
22771
22772 #undef TARGET_ENUM_VA_LIST_P
22773 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
22774
22775 #undef TARGET_FN_ABI_VA_LIST
22776 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
22777
22778 #undef TARGET_CANONICAL_VA_LIST_TYPE
22779 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
22780
22781 #undef TARGET_EXPAND_BUILTIN_VA_START
22782 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
22783
22784 #undef TARGET_MD_ASM_ADJUST
22785 #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
22786
22787 #undef TARGET_C_EXCESS_PRECISION
22788 #define TARGET_C_EXCESS_PRECISION ix86_excess_precision
22789 #undef TARGET_PROMOTE_PROTOTYPES
22790 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
22791 #undef TARGET_SETUP_INCOMING_VARARGS
22792 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
22793 #undef TARGET_MUST_PASS_IN_STACK
22794 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
22795 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
22796 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args
22797 #undef TARGET_FUNCTION_ARG_ADVANCE
22798 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
22799 #undef TARGET_FUNCTION_ARG
22800 #define TARGET_FUNCTION_ARG ix86_function_arg
22801 #undef TARGET_INIT_PIC_REG
22802 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
22803 #undef TARGET_USE_PSEUDO_PIC_REG
22804 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
22805 #undef TARGET_FUNCTION_ARG_BOUNDARY
22806 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
22807 #undef TARGET_PASS_BY_REFERENCE
22808 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
22809 #undef TARGET_INTERNAL_ARG_POINTER
22810 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
22811 #undef TARGET_UPDATE_STACK_BOUNDARY
22812 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
22813 #undef TARGET_GET_DRAP_RTX
22814 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
22815 #undef TARGET_STRICT_ARGUMENT_NAMING
22816 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
22817 #undef TARGET_STATIC_CHAIN
22818 #define TARGET_STATIC_CHAIN ix86_static_chain
22819 #undef TARGET_TRAMPOLINE_INIT
22820 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
22821 #undef TARGET_RETURN_POPS_ARGS
22822 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
22823
22824 #undef TARGET_WARN_FUNC_RETURN
22825 #define TARGET_WARN_FUNC_RETURN ix86_warn_func_return
22826
22827 #undef TARGET_LEGITIMATE_COMBINED_INSN
22828 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
22829
22830 #undef TARGET_ASAN_SHADOW_OFFSET
22831 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
22832
22833 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
22834 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
22835
22836 #undef TARGET_SCALAR_MODE_SUPPORTED_P
22837 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
22838
22839 #undef TARGET_VECTOR_MODE_SUPPORTED_P
22840 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
22841
22842 #undef TARGET_C_MODE_FOR_SUFFIX
22843 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
22844
22845 #ifdef HAVE_AS_TLS
22846 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
22847 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
22848 #endif
22849
22850 #ifdef SUBTARGET_INSERT_ATTRIBUTES
22851 #undef TARGET_INSERT_ATTRIBUTES
22852 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
22853 #endif
22854
22855 #undef TARGET_MANGLE_TYPE
22856 #define TARGET_MANGLE_TYPE ix86_mangle_type
22857
22858 #undef TARGET_STACK_PROTECT_GUARD
22859 #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
22860
22861 #if !TARGET_MACHO
22862 #undef TARGET_STACK_PROTECT_FAIL
22863 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
22864 #endif
22865
22866 #undef TARGET_FUNCTION_VALUE
22867 #define TARGET_FUNCTION_VALUE ix86_function_value
22868
22869 #undef TARGET_FUNCTION_VALUE_REGNO_P
22870 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
22871
22872 #undef TARGET_PROMOTE_FUNCTION_MODE
22873 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
22874
22875 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
22876 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
22877
22878 #undef TARGET_MEMBER_TYPE_FORCES_BLK
22879 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
22880
22881 #undef TARGET_INSTANTIATE_DECLS
22882 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
22883
22884 #undef TARGET_SECONDARY_RELOAD
22885 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
22886 #undef TARGET_SECONDARY_MEMORY_NEEDED
22887 #define TARGET_SECONDARY_MEMORY_NEEDED ix86_secondary_memory_needed
22888 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
22889 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE ix86_secondary_memory_needed_mode
22890
22891 #undef TARGET_CLASS_MAX_NREGS
22892 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
22893
22894 #undef TARGET_PREFERRED_RELOAD_CLASS
22895 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
22896 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
22897 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
22898 #undef TARGET_CLASS_LIKELY_SPILLED_P
22899 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
22900
22901 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
22902 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
22903 ix86_builtin_vectorization_cost
22904 #undef TARGET_VECTORIZE_VEC_PERM_CONST
22905 #define TARGET_VECTORIZE_VEC_PERM_CONST ix86_vectorize_vec_perm_const
22906 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
22907 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
22908 ix86_preferred_simd_mode
22909 #undef TARGET_VECTORIZE_SPLIT_REDUCTION
22910 #define TARGET_VECTORIZE_SPLIT_REDUCTION \
22911 ix86_split_reduction
22912 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
22913 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
22914 ix86_autovectorize_vector_sizes
22915 #undef TARGET_VECTORIZE_GET_MASK_MODE
22916 #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
22917 #undef TARGET_VECTORIZE_INIT_COST
22918 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
22919 #undef TARGET_VECTORIZE_ADD_STMT_COST
22920 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
22921 #undef TARGET_VECTORIZE_FINISH_COST
22922 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
22923 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
22924 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
22925
22926 #undef TARGET_SET_CURRENT_FUNCTION
22927 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
22928
22929 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
22930 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
22931
22932 #undef TARGET_OPTION_SAVE
22933 #define TARGET_OPTION_SAVE ix86_function_specific_save
22934
22935 #undef TARGET_OPTION_RESTORE
22936 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
22937
22938 #undef TARGET_OPTION_POST_STREAM_IN
22939 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
22940
22941 #undef TARGET_OPTION_PRINT
22942 #define TARGET_OPTION_PRINT ix86_function_specific_print
22943
22944 #undef TARGET_OPTION_FUNCTION_VERSIONS
22945 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
22946
22947 #undef TARGET_CAN_INLINE_P
22948 #define TARGET_CAN_INLINE_P ix86_can_inline_p
22949
22950 #undef TARGET_LEGITIMATE_ADDRESS_P
22951 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
22952
22953 #undef TARGET_REGISTER_PRIORITY
22954 #define TARGET_REGISTER_PRIORITY ix86_register_priority
22955
22956 #undef TARGET_REGISTER_USAGE_LEVELING_P
22957 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
22958
22959 #undef TARGET_LEGITIMATE_CONSTANT_P
22960 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
22961
22962 #undef TARGET_COMPUTE_FRAME_LAYOUT
22963 #define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout
22964
22965 #undef TARGET_FRAME_POINTER_REQUIRED
22966 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
22967
22968 #undef TARGET_CAN_ELIMINATE
22969 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
22970
22971 #undef TARGET_EXTRA_LIVE_ON_ENTRY
22972 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
22973
22974 #undef TARGET_ASM_CODE_END
22975 #define TARGET_ASM_CODE_END ix86_code_end
22976
22977 #undef TARGET_CONDITIONAL_REGISTER_USAGE
22978 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
22979
22980 #undef TARGET_CANONICALIZE_COMPARISON
22981 #define TARGET_CANONICALIZE_COMPARISON ix86_canonicalize_comparison
22982
22983 #undef TARGET_LOOP_UNROLL_ADJUST
22984 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
22985
22986 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
22987 #undef TARGET_SPILL_CLASS
22988 #define TARGET_SPILL_CLASS ix86_spill_class
22989
22990 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
22991 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
22992 ix86_simd_clone_compute_vecsize_and_simdlen
22993
22994 #undef TARGET_SIMD_CLONE_ADJUST
22995 #define TARGET_SIMD_CLONE_ADJUST \
22996 ix86_simd_clone_adjust
22997
22998 #undef TARGET_SIMD_CLONE_USABLE
22999 #define TARGET_SIMD_CLONE_USABLE \
23000 ix86_simd_clone_usable
23001
23002 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
23003 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
23004 ix86_float_exceptions_rounding_supported_p
23005
23006 #undef TARGET_MODE_EMIT
23007 #define TARGET_MODE_EMIT ix86_emit_mode_set
23008
23009 #undef TARGET_MODE_NEEDED
23010 #define TARGET_MODE_NEEDED ix86_mode_needed
23011
23012 #undef TARGET_MODE_AFTER
23013 #define TARGET_MODE_AFTER ix86_mode_after
23014
23015 #undef TARGET_MODE_ENTRY
23016 #define TARGET_MODE_ENTRY ix86_mode_entry
23017
23018 #undef TARGET_MODE_EXIT
23019 #define TARGET_MODE_EXIT ix86_mode_exit
23020
23021 #undef TARGET_MODE_PRIORITY
23022 #define TARGET_MODE_PRIORITY ix86_mode_priority
23023
23024 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
23025 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
23026
23027 #undef TARGET_OFFLOAD_OPTIONS
23028 #define TARGET_OFFLOAD_OPTIONS \
23029 ix86_offload_options
23030
23031 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
23032 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
23033
23034 #undef TARGET_OPTAB_SUPPORTED_P
23035 #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
23036
23037 #undef TARGET_HARD_REGNO_SCRATCH_OK
23038 #define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok
23039
23040 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
23041 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
23042
23043 #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
23044 #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
23045
23046 #undef TARGET_INIT_LIBFUNCS
23047 #define TARGET_INIT_LIBFUNCS ix86_init_libfuncs
23048
23049 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
23050 #define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc
23051
23052 #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
23053 #define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost
23054
23055 #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
23056 #define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p
23057
23058 #undef TARGET_HARD_REGNO_NREGS
23059 #define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs
23060 #undef TARGET_HARD_REGNO_MODE_OK
23061 #define TARGET_HARD_REGNO_MODE_OK ix86_hard_regno_mode_ok
23062
23063 #undef TARGET_MODES_TIEABLE_P
23064 #define TARGET_MODES_TIEABLE_P ix86_modes_tieable_p
23065
23066 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
23067 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
23068 ix86_hard_regno_call_part_clobbered
23069
23070 #undef TARGET_CAN_CHANGE_MODE_CLASS
23071 #define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class
23072
23073 #undef TARGET_STATIC_RTX_ALIGNMENT
23074 #define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment
23075 #undef TARGET_CONSTANT_ALIGNMENT
23076 #define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment
23077
23078 #undef TARGET_EMPTY_RECORD_P
23079 #define TARGET_EMPTY_RECORD_P ix86_is_empty_record
23080
23081 #undef TARGET_WARN_PARAMETER_PASSING_ABI
23082 #define TARGET_WARN_PARAMETER_PASSING_ABI ix86_warn_parameter_passing_abi
23083
23084 #undef TARGET_GET_MULTILIB_ABI_NAME
23085 #define TARGET_GET_MULTILIB_ABI_NAME \
23086 ix86_get_multilib_abi_name
23087
23088 static bool ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED)
23089 {
23090 #ifdef OPTION_GLIBC
23091 if (OPTION_GLIBC)
23092 return (built_in_function)fcode == BUILT_IN_MEMPCPY;
23093 else
23094 return false;
23095 #else
23096 return false;
23097 #endif
23098 }
23099
23100 #undef TARGET_LIBC_HAS_FAST_FUNCTION
23101 #define TARGET_LIBC_HAS_FAST_FUNCTION ix86_libc_has_fast_function
23102
23103 #if CHECKING_P
23104 #undef TARGET_RUN_TARGET_SELFTESTS
23105 #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
23106 #endif /* #if CHECKING_P */
23107
23108 struct gcc_target targetm = TARGET_INITIALIZER;
23109 \f
23110 #include "gt-i386.h"