1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2017 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #define IN_TARGET_CODE 1
24 #include "coretypes.h"
34 #include "stringpool.h"
41 #include "diagnostic.h"
44 #include "fold-const.h"
47 #include "stor-layout.h"
50 #include "insn-attr.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
61 #include "tm-constrs.h"
64 #include "sched-int.h"
66 #include "tree-pass.h"
68 #include "pass_manager.h"
69 #include "target-globals.h"
70 #include "gimple-iterator.h"
71 #include "tree-vectorizer.h"
72 #include "shrink-wrap.h"
75 #include "tree-iterator.h"
76 #include "tree-chkp.h"
79 #include "case-cfn-macros.h"
80 #include "regrename.h"
82 #include "fold-const-call.h"
84 #include "tree-ssanames.h"
86 #include "selftest-rtl.h"
87 #include "print-rtl.h"
90 #include "symbol-summary.h"
92 #include "ipa-fnsummary.h"
94 /* This file should be included last. */
95 #include "target-def.h"
97 #include "x86-tune-costs.h"
99 static rtx
legitimize_dllimport_symbol (rtx
, bool);
100 static rtx
legitimize_pe_coff_extern_decl (rtx
, bool);
101 static rtx
legitimize_pe_coff_symbol (rtx
, bool);
102 static void ix86_print_operand_address_as (FILE *, rtx
, addr_space_t
, bool);
103 static bool ix86_save_reg (unsigned int, bool, bool);
104 static bool ix86_function_naked (const_tree
);
105 static bool ix86_notrack_prefixed_insn_p (rtx
);
106 static void ix86_emit_restore_reg_using_pop (rtx
);
109 #ifndef CHECK_STACK_LIMIT
110 #define CHECK_STACK_LIMIT (-1)
113 /* Return index of given mode in mult and division cost tables. */
114 #define MODE_INDEX(mode) \
115 ((mode) == QImode ? 0 \
116 : (mode) == HImode ? 1 \
117 : (mode) == SImode ? 2 \
118 : (mode) == DImode ? 3 \
123 const struct processor_costs
*ix86_tune_cost
= NULL
;
125 /* Set by -mtune or -Os. */
126 const struct processor_costs
*ix86_cost
= NULL
;
128 /* Processor feature/optimization bitmasks. */
129 #define m_386 (1U<<PROCESSOR_I386)
130 #define m_486 (1U<<PROCESSOR_I486)
131 #define m_PENT (1U<<PROCESSOR_PENTIUM)
132 #define m_LAKEMONT (1U<<PROCESSOR_LAKEMONT)
133 #define m_PPRO (1U<<PROCESSOR_PENTIUMPRO)
134 #define m_PENT4 (1U<<PROCESSOR_PENTIUM4)
135 #define m_NOCONA (1U<<PROCESSOR_NOCONA)
136 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
137 #define m_CORE2 (1U<<PROCESSOR_CORE2)
138 #define m_NEHALEM (1U<<PROCESSOR_NEHALEM)
139 #define m_SANDYBRIDGE (1U<<PROCESSOR_SANDYBRIDGE)
140 #define m_HASWELL (1U<<PROCESSOR_HASWELL)
141 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
142 #define m_BONNELL (1U<<PROCESSOR_BONNELL)
143 #define m_SILVERMONT (1U<<PROCESSOR_SILVERMONT)
144 #define m_KNL (1U<<PROCESSOR_KNL)
145 #define m_KNM (1U<<PROCESSOR_KNM)
146 #define m_SKYLAKE_AVX512 (1U<<PROCESSOR_SKYLAKE_AVX512)
147 #define m_CANNONLAKE (1U<<PROCESSOR_CANNONLAKE)
148 #define m_INTEL (1U<<PROCESSOR_INTEL)
150 #define m_GEODE (1U<<PROCESSOR_GEODE)
151 #define m_K6 (1U<<PROCESSOR_K6)
152 #define m_K6_GEODE (m_K6 | m_GEODE)
153 #define m_K8 (1U<<PROCESSOR_K8)
154 #define m_ATHLON (1U<<PROCESSOR_ATHLON)
155 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
156 #define m_AMDFAM10 (1U<<PROCESSOR_AMDFAM10)
157 #define m_BDVER1 (1U<<PROCESSOR_BDVER1)
158 #define m_BDVER2 (1U<<PROCESSOR_BDVER2)
159 #define m_BDVER3 (1U<<PROCESSOR_BDVER3)
160 #define m_BDVER4 (1U<<PROCESSOR_BDVER4)
161 #define m_ZNVER1 (1U<<PROCESSOR_ZNVER1)
162 #define m_BTVER1 (1U<<PROCESSOR_BTVER1)
163 #define m_BTVER2 (1U<<PROCESSOR_BTVER2)
164 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
165 #define m_BTVER (m_BTVER1 | m_BTVER2)
166 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER \
169 #define m_GENERIC (1U<<PROCESSOR_GENERIC)
171 const char* ix86_tune_feature_names
[X86_TUNE_LAST
] = {
173 #define DEF_TUNE(tune, name, selector) name,
174 #include "x86-tune.def"
178 /* Feature tests against the various tunings. */
179 unsigned char ix86_tune_features
[X86_TUNE_LAST
];
181 /* Feature tests against the various tunings used to create ix86_tune_features
182 based on the processor mask. */
183 static unsigned int initial_ix86_tune_features
[X86_TUNE_LAST
] = {
185 #define DEF_TUNE(tune, name, selector) selector,
186 #include "x86-tune.def"
190 /* Feature tests against the various architecture variations. */
191 unsigned char ix86_arch_features
[X86_ARCH_LAST
];
193 /* Feature tests against the various architecture variations, used to create
194 ix86_arch_features based on the processor mask. */
195 static unsigned int initial_ix86_arch_features
[X86_ARCH_LAST
] = {
196 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
197 ~(m_386
| m_486
| m_PENT
| m_LAKEMONT
| m_K6
),
199 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
202 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
205 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
208 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
212 /* In case the average insn count for single function invocation is
213 lower than this constant, emit fast (but longer) prologue and
215 #define FAST_PROLOGUE_INSN_COUNT 20
217 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
218 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
219 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
220 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
222 /* Array of the smallest class containing reg number REGNO, indexed by
223 REGNO. Used by REGNO_REG_CLASS in i386.h. */
225 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
228 AREG
, DREG
, CREG
, BREG
,
230 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
232 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
233 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
236 /* flags, fpsr, fpcr, frame */
237 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
239 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
242 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
245 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
246 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
247 /* SSE REX registers */
248 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
250 /* AVX-512 SSE registers */
251 EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
,
252 EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
,
253 EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
,
254 EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
,
255 /* Mask registers. */
256 MASK_REGS
, MASK_EVEX_REGS
, MASK_EVEX_REGS
, MASK_EVEX_REGS
,
257 MASK_EVEX_REGS
, MASK_EVEX_REGS
, MASK_EVEX_REGS
, MASK_EVEX_REGS
,
258 /* MPX bound registers */
259 BND_REGS
, BND_REGS
, BND_REGS
, BND_REGS
,
262 /* The "default" register map used in 32bit mode. */
264 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
266 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
267 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
268 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
269 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
270 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
271 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
272 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
273 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
274 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
275 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
276 101, 102, 103, 104, /* bound registers */
279 /* The "default" register map used in 64bit mode. */
281 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
283 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
284 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
285 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
286 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
287 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
288 8,9,10,11,12,13,14,15, /* extended integer registers */
289 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
290 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
291 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
292 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
293 126, 127, 128, 129, /* bound registers */
296 /* Define the register numbers to be used in Dwarf debugging information.
297 The SVR4 reference port C compiler uses the following register numbers
298 in its Dwarf output code:
299 0 for %eax (gcc regno = 0)
300 1 for %ecx (gcc regno = 2)
301 2 for %edx (gcc regno = 1)
302 3 for %ebx (gcc regno = 3)
303 4 for %esp (gcc regno = 7)
304 5 for %ebp (gcc regno = 6)
305 6 for %esi (gcc regno = 4)
306 7 for %edi (gcc regno = 5)
307 The following three DWARF register numbers are never generated by
308 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
309 believed these numbers have these meanings.
310 8 for %eip (no gcc equivalent)
311 9 for %eflags (gcc regno = 17)
312 10 for %trapno (no gcc equivalent)
313 It is not at all clear how we should number the FP stack registers
314 for the x86 architecture. If the version of SDB on x86/svr4 were
315 a bit less brain dead with respect to floating-point then we would
316 have a precedent to follow with respect to DWARF register numbers
317 for x86 FP registers, but the SDB on x86/svr4 was so completely
318 broken with respect to FP registers that it is hardly worth thinking
319 of it as something to strive for compatibility with.
320 The version of x86/svr4 SDB I had does (partially)
321 seem to believe that DWARF register number 11 is associated with
322 the x86 register %st(0), but that's about all. Higher DWARF
323 register numbers don't seem to be associated with anything in
324 particular, and even for DWARF regno 11, SDB only seemed to under-
325 stand that it should say that a variable lives in %st(0) (when
326 asked via an `=' command) if we said it was in DWARF regno 11,
327 but SDB still printed garbage when asked for the value of the
328 variable in question (via a `/' command).
329 (Also note that the labels SDB printed for various FP stack regs
330 when doing an `x' command were all wrong.)
331 Note that these problems generally don't affect the native SVR4
332 C compiler because it doesn't allow the use of -O with -g and
333 because when it is *not* optimizing, it allocates a memory
334 location for each floating-point variable, and the memory
335 location is what gets described in the DWARF AT_location
336 attribute for the variable in question.
337 Regardless of the severe mental illness of the x86/svr4 SDB, we
338 do something sensible here and we use the following DWARF
339 register numbers. Note that these are all stack-top-relative
341 11 for %st(0) (gcc regno = 8)
342 12 for %st(1) (gcc regno = 9)
343 13 for %st(2) (gcc regno = 10)
344 14 for %st(3) (gcc regno = 11)
345 15 for %st(4) (gcc regno = 12)
346 16 for %st(5) (gcc regno = 13)
347 17 for %st(6) (gcc regno = 14)
348 18 for %st(7) (gcc regno = 15)
350 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
352 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
353 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
354 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
355 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
356 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
357 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
358 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
359 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
360 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
361 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
362 101, 102, 103, 104, /* bound registers */
365 /* Define parameter passing and return registers. */
367 static int const x86_64_int_parameter_registers
[6] =
369 DI_REG
, SI_REG
, DX_REG
, CX_REG
, R8_REG
, R9_REG
372 static int const x86_64_ms_abi_int_parameter_registers
[4] =
374 CX_REG
, DX_REG
, R8_REG
, R9_REG
377 static int const x86_64_int_return_registers
[4] =
379 AX_REG
, DX_REG
, DI_REG
, SI_REG
382 /* Additional registers that are clobbered by SYSV calls. */
384 #define NUM_X86_64_MS_CLOBBERED_REGS 12
385 static int const x86_64_ms_sysv_extra_clobbered_registers
386 [NUM_X86_64_MS_CLOBBERED_REGS
] =
390 XMM8_REG
, XMM9_REG
, XMM10_REG
, XMM11_REG
,
391 XMM12_REG
, XMM13_REG
, XMM14_REG
, XMM15_REG
397 XLOGUE_STUB_RESTORE_TAIL
,
398 XLOGUE_STUB_SAVE_HFP
,
399 XLOGUE_STUB_RESTORE_HFP
,
400 XLOGUE_STUB_RESTORE_HFP_TAIL
,
405 enum xlogue_stub_sets
{
407 XLOGUE_SET_ALIGNED_PLUS_8
,
408 XLOGUE_SET_HFP_ALIGNED_OR_REALIGN
,
409 XLOGUE_SET_HFP_ALIGNED_PLUS_8
,
414 /* Register save/restore layout used by out-of-line stubs. */
415 class xlogue_layout
{
420 HOST_WIDE_INT offset
; /* Offset used by stub base pointer (rax or
421 rsi) to where each register is stored. */
424 unsigned get_nregs () const {return m_nregs
;}
425 HOST_WIDE_INT
get_stack_align_off_in () const {return m_stack_align_off_in
;}
427 const reginfo
&get_reginfo (unsigned reg
) const
429 gcc_assert (reg
< m_nregs
);
433 static const char *get_stub_name (enum xlogue_stub stub
,
434 unsigned n_extra_args
);
436 /* Returns an rtx for the stub's symbol based upon
437 1.) the specified stub (save, restore or restore_ret) and
438 2.) the value of cfun->machine->call_ms2sysv_extra_regs and
439 3.) rather or not stack alignment is being performed. */
440 static rtx
get_stub_rtx (enum xlogue_stub stub
);
442 /* Returns the amount of stack space (including padding) that the stub
443 needs to store registers based upon data in the machine_function. */
444 HOST_WIDE_INT
get_stack_space_used () const
446 const struct machine_function
*m
= cfun
->machine
;
447 unsigned last_reg
= m
->call_ms2sysv_extra_regs
+ MIN_REGS
- 1;
449 gcc_assert (m
->call_ms2sysv_extra_regs
<= MAX_EXTRA_REGS
);
450 return m_regs
[last_reg
].offset
+ STUB_INDEX_OFFSET
;
453 /* Returns the offset for the base pointer used by the stub. */
454 HOST_WIDE_INT
get_stub_ptr_offset () const
456 return STUB_INDEX_OFFSET
+ m_stack_align_off_in
;
459 static const struct xlogue_layout
&get_instance ();
460 static unsigned count_stub_managed_regs ();
461 static bool is_stub_managed_reg (unsigned regno
, unsigned count
);
463 static const HOST_WIDE_INT STUB_INDEX_OFFSET
= 0x70;
464 static const unsigned MIN_REGS
= NUM_X86_64_MS_CLOBBERED_REGS
;
465 static const unsigned MAX_REGS
= 18;
466 static const unsigned MAX_EXTRA_REGS
= MAX_REGS
- MIN_REGS
;
467 static const unsigned VARIANT_COUNT
= MAX_EXTRA_REGS
+ 1;
468 static const unsigned STUB_NAME_MAX_LEN
= 20;
469 static const char * const STUB_BASE_NAMES
[XLOGUE_STUB_COUNT
];
470 static const unsigned REG_ORDER
[MAX_REGS
];
471 static const unsigned REG_ORDER_REALIGN
[MAX_REGS
];
475 xlogue_layout (HOST_WIDE_INT stack_align_off_in
, bool hfp
);
476 xlogue_layout (const xlogue_layout
&);
478 /* True if hard frame pointer is used. */
481 /* Max number of register this layout manages. */
484 /* Incoming offset from 16-byte alignment. */
485 HOST_WIDE_INT m_stack_align_off_in
;
487 /* Register order and offsets. */
488 struct reginfo m_regs
[MAX_REGS
];
490 /* Lazy-inited cache of symbol names for stubs. */
491 static char s_stub_names
[2][XLOGUE_STUB_COUNT
][VARIANT_COUNT
]
494 static const xlogue_layout s_instances
[XLOGUE_SET_COUNT
];
497 const char * const xlogue_layout::STUB_BASE_NAMES
[XLOGUE_STUB_COUNT
] = {
506 const unsigned xlogue_layout::REG_ORDER
[xlogue_layout::MAX_REGS
] = {
507 /* The below offset values are where each register is stored for the layout
508 relative to incoming stack pointer. The value of each m_regs[].offset will
509 be relative to the incoming base pointer (rax or rsi) used by the stub.
512 Offset: realigned or aligned + 8
513 Register aligned aligned + 8 aligned w/HFP w/HFP */
514 XMM15_REG
, /* 0x10 0x18 0x10 0x18 */
515 XMM14_REG
, /* 0x20 0x28 0x20 0x28 */
516 XMM13_REG
, /* 0x30 0x38 0x30 0x38 */
517 XMM12_REG
, /* 0x40 0x48 0x40 0x48 */
518 XMM11_REG
, /* 0x50 0x58 0x50 0x58 */
519 XMM10_REG
, /* 0x60 0x68 0x60 0x68 */
520 XMM9_REG
, /* 0x70 0x78 0x70 0x78 */
521 XMM8_REG
, /* 0x80 0x88 0x80 0x88 */
522 XMM7_REG
, /* 0x90 0x98 0x90 0x98 */
523 XMM6_REG
, /* 0xa0 0xa8 0xa0 0xa8 */
524 SI_REG
, /* 0xa8 0xb0 0xa8 0xb0 */
525 DI_REG
, /* 0xb0 0xb8 0xb0 0xb8 */
526 BX_REG
, /* 0xb8 0xc0 0xb8 0xc0 */
527 BP_REG
, /* 0xc0 0xc8 N/A N/A */
528 R12_REG
, /* 0xc8 0xd0 0xc0 0xc8 */
529 R13_REG
, /* 0xd0 0xd8 0xc8 0xd0 */
530 R14_REG
, /* 0xd8 0xe0 0xd0 0xd8 */
531 R15_REG
, /* 0xe0 0xe8 0xd8 0xe0 */
534 /* Instantiate static const values. */
535 const HOST_WIDE_INT
xlogue_layout::STUB_INDEX_OFFSET
;
536 const unsigned xlogue_layout::MIN_REGS
;
537 const unsigned xlogue_layout::MAX_REGS
;
538 const unsigned xlogue_layout::MAX_EXTRA_REGS
;
539 const unsigned xlogue_layout::VARIANT_COUNT
;
540 const unsigned xlogue_layout::STUB_NAME_MAX_LEN
;
542 /* Initialize xlogue_layout::s_stub_names to zero. */
543 char xlogue_layout::s_stub_names
[2][XLOGUE_STUB_COUNT
][VARIANT_COUNT
]
546 /* Instantiates all xlogue_layout instances. */
547 const xlogue_layout
xlogue_layout::s_instances
[XLOGUE_SET_COUNT
] = {
548 xlogue_layout (0, false),
549 xlogue_layout (8, false),
550 xlogue_layout (0, true),
551 xlogue_layout (8, true)
554 /* Return an appropriate const instance of xlogue_layout based upon values
555 in cfun->machine and crtl. */
556 const struct xlogue_layout
&
557 xlogue_layout::get_instance ()
559 enum xlogue_stub_sets stub_set
;
560 bool aligned_plus_8
= cfun
->machine
->call_ms2sysv_pad_in
;
562 if (stack_realign_fp
)
563 stub_set
= XLOGUE_SET_HFP_ALIGNED_OR_REALIGN
;
564 else if (frame_pointer_needed
)
565 stub_set
= aligned_plus_8
566 ? XLOGUE_SET_HFP_ALIGNED_PLUS_8
567 : XLOGUE_SET_HFP_ALIGNED_OR_REALIGN
;
569 stub_set
= aligned_plus_8
? XLOGUE_SET_ALIGNED_PLUS_8
: XLOGUE_SET_ALIGNED
;
571 return s_instances
[stub_set
];
574 /* Determine how many clobbered registers can be saved by the stub.
575 Returns the count of registers the stub will save and restore. */
577 xlogue_layout::count_stub_managed_regs ()
579 bool hfp
= frame_pointer_needed
|| stack_realign_fp
;
583 for (count
= i
= MIN_REGS
; i
< MAX_REGS
; ++i
)
585 regno
= REG_ORDER
[i
];
586 if (regno
== BP_REG
&& hfp
)
588 if (!ix86_save_reg (regno
, false, false))
595 /* Determine if register REGNO is a stub managed register given the
596 total COUNT of stub managed registers. */
598 xlogue_layout::is_stub_managed_reg (unsigned regno
, unsigned count
)
600 bool hfp
= frame_pointer_needed
|| stack_realign_fp
;
603 for (i
= 0; i
< count
; ++i
)
605 gcc_assert (i
< MAX_REGS
);
606 if (REG_ORDER
[i
] == BP_REG
&& hfp
)
608 else if (REG_ORDER
[i
] == regno
)
614 /* Constructor for xlogue_layout. */
615 xlogue_layout::xlogue_layout (HOST_WIDE_INT stack_align_off_in
, bool hfp
)
616 : m_hfp (hfp
) , m_nregs (hfp
? 17 : 18),
617 m_stack_align_off_in (stack_align_off_in
)
619 HOST_WIDE_INT offset
= stack_align_off_in
;
622 for (i
= j
= 0; i
< MAX_REGS
; ++i
)
624 unsigned regno
= REG_ORDER
[i
];
626 if (regno
== BP_REG
&& hfp
)
628 if (SSE_REGNO_P (regno
))
631 /* Verify that SSE regs are always aligned. */
632 gcc_assert (!((stack_align_off_in
+ offset
) & 15));
637 m_regs
[j
].regno
= regno
;
638 m_regs
[j
++].offset
= offset
- STUB_INDEX_OFFSET
;
640 gcc_assert (j
== m_nregs
);
644 xlogue_layout::get_stub_name (enum xlogue_stub stub
,
645 unsigned n_extra_regs
)
647 const int have_avx
= TARGET_AVX
;
648 char *name
= s_stub_names
[!!have_avx
][stub
][n_extra_regs
];
653 int res
= snprintf (name
, STUB_NAME_MAX_LEN
, "__%s_%s_%u",
654 (have_avx
? "avx" : "sse"),
655 STUB_BASE_NAMES
[stub
],
656 MIN_REGS
+ n_extra_regs
);
657 gcc_checking_assert (res
< (int)STUB_NAME_MAX_LEN
);
663 /* Return rtx of a symbol ref for the entry point (based upon
664 cfun->machine->call_ms2sysv_extra_regs) of the specified stub. */
666 xlogue_layout::get_stub_rtx (enum xlogue_stub stub
)
668 const unsigned n_extra_regs
= cfun
->machine
->call_ms2sysv_extra_regs
;
669 gcc_checking_assert (n_extra_regs
<= MAX_EXTRA_REGS
);
670 gcc_assert (stub
< XLOGUE_STUB_COUNT
);
671 gcc_assert (crtl
->stack_realign_finalized
);
673 return gen_rtx_SYMBOL_REF (Pmode
, get_stub_name (stub
, n_extra_regs
));
676 /* Define the structure for the machine field in struct function. */
678 struct GTY(()) stack_local_entry
{
682 struct stack_local_entry
*next
;
685 /* Which cpu are we scheduling for. */
686 enum attr_cpu ix86_schedule
;
688 /* Which cpu are we optimizing for. */
689 enum processor_type ix86_tune
;
691 /* Which instruction set architecture to use. */
692 enum processor_type ix86_arch
;
694 /* True if processor has SSE prefetch instruction. */
695 unsigned char x86_prefetch_sse
;
697 /* -mstackrealign option */
698 static const char ix86_force_align_arg_pointer_string
[]
699 = "force_align_arg_pointer";
701 static rtx (*ix86_gen_leave
) (void);
702 static rtx (*ix86_gen_add3
) (rtx
, rtx
, rtx
);
703 static rtx (*ix86_gen_sub3
) (rtx
, rtx
, rtx
);
704 static rtx (*ix86_gen_sub3_carry
) (rtx
, rtx
, rtx
, rtx
, rtx
);
705 static rtx (*ix86_gen_one_cmpl2
) (rtx
, rtx
);
706 static rtx (*ix86_gen_monitor
) (rtx
, rtx
, rtx
);
707 static rtx (*ix86_gen_monitorx
) (rtx
, rtx
, rtx
);
708 static rtx (*ix86_gen_clzero
) (rtx
);
709 static rtx (*ix86_gen_andsp
) (rtx
, rtx
, rtx
);
710 static rtx (*ix86_gen_allocate_stack_worker
) (rtx
, rtx
);
711 static rtx (*ix86_gen_adjust_stack_and_probe
) (rtx
, rtx
, rtx
);
712 static rtx (*ix86_gen_probe_stack_range
) (rtx
, rtx
, rtx
);
713 static rtx (*ix86_gen_tls_global_dynamic_64
) (rtx
, rtx
, rtx
);
714 static rtx (*ix86_gen_tls_local_dynamic_base_64
) (rtx
, rtx
);
716 /* Preferred alignment for stack boundary in bits. */
717 unsigned int ix86_preferred_stack_boundary
;
719 /* Alignment for incoming stack boundary in bits specified at
721 static unsigned int ix86_user_incoming_stack_boundary
;
723 /* Default alignment for incoming stack boundary in bits. */
724 static unsigned int ix86_default_incoming_stack_boundary
;
726 /* Alignment for incoming stack boundary in bits. */
727 unsigned int ix86_incoming_stack_boundary
;
729 /* Calling abi specific va_list type nodes. */
730 static GTY(()) tree sysv_va_list_type_node
;
731 static GTY(()) tree ms_va_list_type_node
;
733 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
734 char internal_label_prefix
[16];
735 int internal_label_prefix_len
;
737 /* Fence to use after loop using movnt. */
740 /* Register class used for passing given 64bit part of the argument.
741 These represent classes as documented by the PS ABI, with the exception
742 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
743 use SF or DFmode move instead of DImode to avoid reformatting penalties.
745 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
746 whenever possible (upper half does contain padding). */
747 enum x86_64_reg_class
750 X86_64_INTEGER_CLASS
,
751 X86_64_INTEGERSI_CLASS
,
758 X86_64_COMPLEX_X87_CLASS
,
762 #define MAX_CLASSES 8
764 /* Table of constants used by fldpi, fldln2, etc.... */
765 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
766 static bool ext_80387_constants_init
;
769 static struct machine_function
* ix86_init_machine_status (void);
770 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
771 static bool ix86_function_value_regno_p (const unsigned int);
772 static unsigned int ix86_function_arg_boundary (machine_mode
,
774 static rtx
ix86_static_chain (const_tree
, bool);
775 static int ix86_function_regparm (const_tree
, const_tree
);
776 static void ix86_compute_frame_layout (void);
777 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode
,
779 static void ix86_add_new_builtins (HOST_WIDE_INT
, HOST_WIDE_INT
);
780 static tree
ix86_canonical_va_list_type (tree
);
781 static void predict_jump (int);
782 static unsigned int split_stack_prologue_scratch_regno (void);
783 static bool i386_asm_output_addr_const_extra (FILE *, rtx
);
785 enum ix86_function_specific_strings
787 IX86_FUNCTION_SPECIFIC_ARCH
,
788 IX86_FUNCTION_SPECIFIC_TUNE
,
789 IX86_FUNCTION_SPECIFIC_MAX
792 static char *ix86_target_string (HOST_WIDE_INT
, HOST_WIDE_INT
, int, int,
793 const char *, const char *, enum fpmath_unit
,
795 static void ix86_function_specific_save (struct cl_target_option
*,
796 struct gcc_options
*opts
);
797 static void ix86_function_specific_restore (struct gcc_options
*opts
,
798 struct cl_target_option
*);
799 static void ix86_function_specific_post_stream_in (struct cl_target_option
*);
800 static void ix86_function_specific_print (FILE *, int,
801 struct cl_target_option
*);
802 static bool ix86_valid_target_attribute_p (tree
, tree
, tree
, int);
803 static bool ix86_valid_target_attribute_inner_p (tree
, char *[],
804 struct gcc_options
*,
805 struct gcc_options
*,
806 struct gcc_options
*);
807 static bool ix86_can_inline_p (tree
, tree
);
808 static void ix86_set_current_function (tree
);
809 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
811 static enum calling_abi
ix86_function_abi (const_tree
);
814 #ifndef SUBTARGET32_DEFAULT_CPU
815 #define SUBTARGET32_DEFAULT_CPU "i386"
818 /* Whether -mtune= or -march= were specified */
819 static int ix86_tune_defaulted
;
820 static int ix86_arch_specified
;
822 /* Vectorization library interface and handlers. */
823 static tree (*ix86_veclib_handler
) (combined_fn
, tree
, tree
);
825 static tree
ix86_veclibabi_svml (combined_fn
, tree
, tree
);
826 static tree
ix86_veclibabi_acml (combined_fn
, tree
, tree
);
828 /* Processor target table, indexed by processor number */
831 const char *const name
; /* processor name */
832 const struct processor_costs
*cost
; /* Processor costs */
833 const int align_loop
; /* Default alignments. */
834 const int align_loop_max_skip
;
835 const int align_jump
;
836 const int align_jump_max_skip
;
837 const int align_func
;
840 /* This table must be in sync with enum processor_type in i386.h. */
841 static const struct ptt processor_target_table
[PROCESSOR_max
] =
843 {"generic", &generic_cost
, 16, 10, 16, 10, 16},
844 {"i386", &i386_cost
, 4, 3, 4, 3, 4},
845 {"i486", &i486_cost
, 16, 15, 16, 15, 16},
846 {"pentium", &pentium_cost
, 16, 7, 16, 7, 16},
847 {"lakemont", &lakemont_cost
, 16, 7, 16, 7, 16},
848 {"pentiumpro", &pentiumpro_cost
, 16, 15, 16, 10, 16},
849 {"pentium4", &pentium4_cost
, 0, 0, 0, 0, 0},
850 {"nocona", &nocona_cost
, 0, 0, 0, 0, 0},
851 {"core2", &core_cost
, 16, 10, 16, 10, 16},
852 {"nehalem", &core_cost
, 16, 10, 16, 10, 16},
853 {"sandybridge", &core_cost
, 16, 10, 16, 10, 16},
854 {"haswell", &core_cost
, 16, 10, 16, 10, 16},
855 {"bonnell", &atom_cost
, 16, 15, 16, 7, 16},
856 {"silvermont", &slm_cost
, 16, 15, 16, 7, 16},
857 {"knl", &slm_cost
, 16, 15, 16, 7, 16},
858 {"knm", &slm_cost
, 16, 15, 16, 7, 16},
859 {"skylake-avx512", &skylake_cost
, 16, 10, 16, 10, 16},
860 {"cannonlake", &core_cost
, 16, 10, 16, 10, 16},
861 {"intel", &intel_cost
, 16, 15, 16, 7, 16},
862 {"geode", &geode_cost
, 0, 0, 0, 0, 0},
863 {"k6", &k6_cost
, 32, 7, 32, 7, 32},
864 {"athlon", &athlon_cost
, 16, 7, 16, 7, 16},
865 {"k8", &k8_cost
, 16, 7, 16, 7, 16},
866 {"amdfam10", &amdfam10_cost
, 32, 24, 32, 7, 32},
867 {"bdver1", &bdver1_cost
, 16, 10, 16, 7, 11},
868 {"bdver2", &bdver2_cost
, 16, 10, 16, 7, 11},
869 {"bdver3", &bdver3_cost
, 16, 10, 16, 7, 11},
870 {"bdver4", &bdver4_cost
, 16, 10, 16, 7, 11},
871 {"btver1", &btver1_cost
, 16, 10, 16, 7, 11},
872 {"btver2", &btver2_cost
, 16, 10, 16, 7, 11},
873 {"znver1", &znver1_cost
, 16, 15, 16, 15, 16}
877 rest_of_handle_insert_vzeroupper (void)
881 /* vzeroupper instructions are inserted immediately after reload to
882 account for possible spills from 256bit or 512bit registers. The pass
883 reuses mode switching infrastructure by re-running mode insertion
884 pass, so disable entities that have already been processed. */
885 for (i
= 0; i
< MAX_386_ENTITIES
; i
++)
886 ix86_optimize_mode_switching
[i
] = 0;
888 ix86_optimize_mode_switching
[AVX_U128
] = 1;
890 /* Call optimize_mode_switching. */
891 g
->get_passes ()->execute_pass_mode_switching ();
895 /* Return 1 if INSN uses or defines a hard register.
896 Hard register uses in a memory address are ignored.
897 Clobbers and flags definitions are ignored. */
900 has_non_address_hard_reg (rtx_insn
*insn
)
903 FOR_EACH_INSN_DEF (ref
, insn
)
904 if (HARD_REGISTER_P (DF_REF_REAL_REG (ref
))
905 && !DF_REF_FLAGS_IS_SET (ref
, DF_REF_MUST_CLOBBER
)
906 && DF_REF_REGNO (ref
) != FLAGS_REG
)
909 FOR_EACH_INSN_USE (ref
, insn
)
910 if (!DF_REF_REG_MEM_P (ref
) && HARD_REGISTER_P (DF_REF_REAL_REG (ref
)))
916 /* Check if comparison INSN may be transformed
917 into vector comparison. Currently we transform
918 zero checks only which look like:
920 (set (reg:CCZ 17 flags)
921 (compare:CCZ (ior:SI (subreg:SI (reg:DI x) 4)
922 (subreg:SI (reg:DI x) 0))
923 (const_int 0 [0]))) */
926 convertible_comparison_p (rtx_insn
*insn
)
931 rtx def_set
= single_set (insn
);
933 gcc_assert (def_set
);
935 rtx src
= SET_SRC (def_set
);
936 rtx dst
= SET_DEST (def_set
);
938 gcc_assert (GET_CODE (src
) == COMPARE
);
940 if (GET_CODE (dst
) != REG
941 || REGNO (dst
) != FLAGS_REG
942 || GET_MODE (dst
) != CCZmode
)
945 rtx op1
= XEXP (src
, 0);
946 rtx op2
= XEXP (src
, 1);
948 if (op2
!= CONST0_RTX (GET_MODE (op2
)))
951 if (GET_CODE (op1
) != IOR
)
959 || GET_MODE (op1
) != SImode
960 || GET_MODE (op2
) != SImode
961 || ((SUBREG_BYTE (op1
) != 0
962 || SUBREG_BYTE (op2
) != GET_MODE_SIZE (SImode
))
963 && (SUBREG_BYTE (op2
) != 0
964 || SUBREG_BYTE (op1
) != GET_MODE_SIZE (SImode
))))
967 op1
= SUBREG_REG (op1
);
968 op2
= SUBREG_REG (op2
);
972 || GET_MODE (op1
) != DImode
)
978 /* The DImode version of scalar_to_vector_candidate_p. */
981 dimode_scalar_to_vector_candidate_p (rtx_insn
*insn
)
983 rtx def_set
= single_set (insn
);
988 if (has_non_address_hard_reg (insn
))
991 rtx src
= SET_SRC (def_set
);
992 rtx dst
= SET_DEST (def_set
);
994 if (GET_CODE (src
) == COMPARE
)
995 return convertible_comparison_p (insn
);
997 /* We are interested in DImode promotion only. */
998 if ((GET_MODE (src
) != DImode
999 && !CONST_INT_P (src
))
1000 || GET_MODE (dst
) != DImode
)
1003 if (!REG_P (dst
) && !MEM_P (dst
))
1006 switch (GET_CODE (src
))
1009 if (!TARGET_AVX512VL
)
1015 if (!REG_P (XEXP (src
, 1))
1016 && (!SUBREG_P (XEXP (src
, 1))
1017 || SUBREG_BYTE (XEXP (src
, 1)) != 0
1018 || !REG_P (SUBREG_REG (XEXP (src
, 1))))
1019 && (!CONST_INT_P (XEXP (src
, 1))
1020 || !IN_RANGE (INTVAL (XEXP (src
, 1)), 0, 63)))
1023 if (GET_MODE (XEXP (src
, 1)) != QImode
1024 && !CONST_INT_P (XEXP (src
, 1)))
1033 if (!REG_P (XEXP (src
, 1))
1034 && !MEM_P (XEXP (src
, 1))
1035 && !CONST_INT_P (XEXP (src
, 1)))
1038 if (GET_MODE (XEXP (src
, 1)) != DImode
1039 && !CONST_INT_P (XEXP (src
, 1)))
1058 if (!REG_P (XEXP (src
, 0))
1059 && !MEM_P (XEXP (src
, 0))
1060 && !CONST_INT_P (XEXP (src
, 0))
1061 /* Check for andnot case. */
1062 && (GET_CODE (src
) != AND
1063 || GET_CODE (XEXP (src
, 0)) != NOT
1064 || !REG_P (XEXP (XEXP (src
, 0), 0))))
1067 if (GET_MODE (XEXP (src
, 0)) != DImode
1068 && !CONST_INT_P (XEXP (src
, 0)))
1074 /* The TImode version of scalar_to_vector_candidate_p. */
1077 timode_scalar_to_vector_candidate_p (rtx_insn
*insn
)
1079 rtx def_set
= single_set (insn
);
1084 if (has_non_address_hard_reg (insn
))
1087 rtx src
= SET_SRC (def_set
);
1088 rtx dst
= SET_DEST (def_set
);
1090 /* Only TImode load and store are allowed. */
1091 if (GET_MODE (dst
) != TImode
)
1096 /* Check for store. Memory must be aligned or unaligned store
1097 is optimal. Only support store from register, standard SSE
1098 constant or CONST_WIDE_INT generated from piecewise store.
1100 ??? Verify performance impact before enabling CONST_INT for
1102 if (misaligned_operand (dst
, TImode
)
1103 && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL
)
1106 switch (GET_CODE (src
))
1112 case CONST_WIDE_INT
:
1116 return standard_sse_constant_p (src
, TImode
);
1119 else if (MEM_P (src
))
1121 /* Check for load. Memory must be aligned or unaligned load is
1124 && (!misaligned_operand (src
, TImode
)
1125 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
));
1131 /* Return 1 if INSN may be converted into vector
1135 scalar_to_vector_candidate_p (rtx_insn
*insn
)
1138 return timode_scalar_to_vector_candidate_p (insn
);
1140 return dimode_scalar_to_vector_candidate_p (insn
);
1143 /* The DImode version of remove_non_convertible_regs. */
1146 dimode_remove_non_convertible_regs (bitmap candidates
)
1150 bitmap regs
= BITMAP_ALLOC (NULL
);
1152 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, id
, bi
)
1154 rtx def_set
= single_set (DF_INSN_UID_GET (id
)->insn
);
1155 rtx reg
= SET_DEST (def_set
);
1158 || bitmap_bit_p (regs
, REGNO (reg
))
1159 || HARD_REGISTER_P (reg
))
1162 for (df_ref def
= DF_REG_DEF_CHAIN (REGNO (reg
));
1164 def
= DF_REF_NEXT_REG (def
))
1166 if (!bitmap_bit_p (candidates
, DF_REF_INSN_UID (def
)))
1170 "r%d has non convertible definition in insn %d\n",
1171 REGNO (reg
), DF_REF_INSN_UID (def
));
1173 bitmap_set_bit (regs
, REGNO (reg
));
1179 EXECUTE_IF_SET_IN_BITMAP (regs
, 0, id
, bi
)
1181 for (df_ref def
= DF_REG_DEF_CHAIN (id
);
1183 def
= DF_REF_NEXT_REG (def
))
1184 if (bitmap_bit_p (candidates
, DF_REF_INSN_UID (def
)))
1187 fprintf (dump_file
, "Removing insn %d from candidates list\n",
1188 DF_REF_INSN_UID (def
));
1190 bitmap_clear_bit (candidates
, DF_REF_INSN_UID (def
));
1197 /* For a register REGNO, scan instructions for its defs and uses.
1198 Put REGNO in REGS if a def or use isn't in CANDIDATES. */
1201 timode_check_non_convertible_regs (bitmap candidates
, bitmap regs
,
1204 for (df_ref def
= DF_REG_DEF_CHAIN (regno
);
1206 def
= DF_REF_NEXT_REG (def
))
1208 if (!bitmap_bit_p (candidates
, DF_REF_INSN_UID (def
)))
1212 "r%d has non convertible def in insn %d\n",
1213 regno
, DF_REF_INSN_UID (def
));
1215 bitmap_set_bit (regs
, regno
);
1220 for (df_ref ref
= DF_REG_USE_CHAIN (regno
);
1222 ref
= DF_REF_NEXT_REG (ref
))
1224 /* Debug instructions are skipped. */
1225 if (NONDEBUG_INSN_P (DF_REF_INSN (ref
))
1226 && !bitmap_bit_p (candidates
, DF_REF_INSN_UID (ref
)))
1230 "r%d has non convertible use in insn %d\n",
1231 regno
, DF_REF_INSN_UID (ref
));
1233 bitmap_set_bit (regs
, regno
);
1239 /* The TImode version of remove_non_convertible_regs. */
1242 timode_remove_non_convertible_regs (bitmap candidates
)
1246 bitmap regs
= BITMAP_ALLOC (NULL
);
1248 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, id
, bi
)
1250 rtx def_set
= single_set (DF_INSN_UID_GET (id
)->insn
);
1251 rtx dest
= SET_DEST (def_set
);
1252 rtx src
= SET_SRC (def_set
);
1255 || bitmap_bit_p (regs
, REGNO (dest
))
1256 || HARD_REGISTER_P (dest
))
1258 || bitmap_bit_p (regs
, REGNO (src
))
1259 || HARD_REGISTER_P (src
)))
1263 timode_check_non_convertible_regs (candidates
, regs
,
1267 timode_check_non_convertible_regs (candidates
, regs
,
1271 EXECUTE_IF_SET_IN_BITMAP (regs
, 0, id
, bi
)
1273 for (df_ref def
= DF_REG_DEF_CHAIN (id
);
1275 def
= DF_REF_NEXT_REG (def
))
1276 if (bitmap_bit_p (candidates
, DF_REF_INSN_UID (def
)))
1279 fprintf (dump_file
, "Removing insn %d from candidates list\n",
1280 DF_REF_INSN_UID (def
));
1282 bitmap_clear_bit (candidates
, DF_REF_INSN_UID (def
));
1285 for (df_ref ref
= DF_REG_USE_CHAIN (id
);
1287 ref
= DF_REF_NEXT_REG (ref
))
1288 if (bitmap_bit_p (candidates
, DF_REF_INSN_UID (ref
)))
1291 fprintf (dump_file
, "Removing insn %d from candidates list\n",
1292 DF_REF_INSN_UID (ref
));
1294 bitmap_clear_bit (candidates
, DF_REF_INSN_UID (ref
));
1301 /* For a given bitmap of insn UIDs scans all instruction and
1302 remove insn from CANDIDATES in case it has both convertible
1303 and not convertible definitions.
1305 All insns in a bitmap are conversion candidates according to
1306 scalar_to_vector_candidate_p. Currently it implies all insns
1310 remove_non_convertible_regs (bitmap candidates
)
1313 timode_remove_non_convertible_regs (candidates
);
1315 dimode_remove_non_convertible_regs (candidates
);
1322 virtual ~scalar_chain ();
1324 static unsigned max_id
;
1326 /* ID of a chain. */
1327 unsigned int chain_id
;
1328 /* A queue of instructions to be included into a chain. */
1330 /* Instructions included into a chain. */
1332 /* All registers defined by a chain. */
1334 /* Registers used in both vector and sclar modes. */
1337 void build (bitmap candidates
, unsigned insn_uid
);
1338 virtual int compute_convert_gain () = 0;
1342 void add_to_queue (unsigned insn_uid
);
1343 void emit_conversion_insns (rtx insns
, rtx_insn
*pos
);
1346 void add_insn (bitmap candidates
, unsigned insn_uid
);
1347 void analyze_register_chain (bitmap candidates
, df_ref ref
);
1348 virtual void mark_dual_mode_def (df_ref def
) = 0;
1349 virtual void convert_insn (rtx_insn
*insn
) = 0;
1350 virtual void convert_registers () = 0;
1353 class dimode_scalar_chain
: public scalar_chain
1356 int compute_convert_gain ();
1358 void mark_dual_mode_def (df_ref def
);
1359 rtx
replace_with_subreg (rtx x
, rtx reg
, rtx subreg
);
1360 void replace_with_subreg_in_insn (rtx_insn
*insn
, rtx reg
, rtx subreg
);
1361 void convert_insn (rtx_insn
*insn
);
1362 void convert_op (rtx
*op
, rtx_insn
*insn
);
1363 void convert_reg (unsigned regno
);
1364 void make_vector_copies (unsigned regno
);
1365 void convert_registers ();
1366 int vector_const_cost (rtx exp
);
1369 class timode_scalar_chain
: public scalar_chain
1372 /* Convert from TImode to V1TImode is always faster. */
1373 int compute_convert_gain () { return 1; }
1376 void mark_dual_mode_def (df_ref def
);
1377 void fix_debug_reg_uses (rtx reg
);
1378 void convert_insn (rtx_insn
*insn
);
1379 /* We don't convert registers to difference size. */
1380 void convert_registers () {}
1383 unsigned scalar_chain::max_id
= 0;
1385 /* Initialize new chain. */
1387 scalar_chain::scalar_chain ()
1389 chain_id
= ++max_id
;
1392 fprintf (dump_file
, "Created a new instruction chain #%d\n", chain_id
);
1394 bitmap_obstack_initialize (NULL
);
1395 insns
= BITMAP_ALLOC (NULL
);
1396 defs
= BITMAP_ALLOC (NULL
);
1397 defs_conv
= BITMAP_ALLOC (NULL
);
1401 /* Free chain's data. */
1403 scalar_chain::~scalar_chain ()
1405 BITMAP_FREE (insns
);
1407 BITMAP_FREE (defs_conv
);
1408 bitmap_obstack_release (NULL
);
1411 /* Add instruction into chains' queue. */
1414 scalar_chain::add_to_queue (unsigned insn_uid
)
1416 if (bitmap_bit_p (insns
, insn_uid
)
1417 || bitmap_bit_p (queue
, insn_uid
))
1421 fprintf (dump_file
, " Adding insn %d into chain's #%d queue\n",
1422 insn_uid
, chain_id
);
1423 bitmap_set_bit (queue
, insn_uid
);
1426 /* For DImode conversion, mark register defined by DEF as requiring
1430 dimode_scalar_chain::mark_dual_mode_def (df_ref def
)
1432 gcc_assert (DF_REF_REG_DEF_P (def
));
1434 if (bitmap_bit_p (defs_conv
, DF_REF_REGNO (def
)))
1439 " Mark r%d def in insn %d as requiring both modes in chain #%d\n",
1440 DF_REF_REGNO (def
), DF_REF_INSN_UID (def
), chain_id
);
1442 bitmap_set_bit (defs_conv
, DF_REF_REGNO (def
));
1445 /* For TImode conversion, it is unused. */
1448 timode_scalar_chain::mark_dual_mode_def (df_ref
)
1453 /* Check REF's chain to add new insns into a queue
1454 and find registers requiring conversion. */
1457 scalar_chain::analyze_register_chain (bitmap candidates
, df_ref ref
)
1461 gcc_assert (bitmap_bit_p (insns
, DF_REF_INSN_UID (ref
))
1462 || bitmap_bit_p (candidates
, DF_REF_INSN_UID (ref
)));
1463 add_to_queue (DF_REF_INSN_UID (ref
));
1465 for (chain
= DF_REF_CHAIN (ref
); chain
; chain
= chain
->next
)
1467 unsigned uid
= DF_REF_INSN_UID (chain
->ref
);
1469 if (!NONDEBUG_INSN_P (DF_REF_INSN (chain
->ref
)))
1472 if (!DF_REF_REG_MEM_P (chain
->ref
))
1474 if (bitmap_bit_p (insns
, uid
))
1477 if (bitmap_bit_p (candidates
, uid
))
1484 if (DF_REF_REG_DEF_P (chain
->ref
))
1487 fprintf (dump_file
, " r%d def in insn %d isn't convertible\n",
1488 DF_REF_REGNO (chain
->ref
), uid
);
1489 mark_dual_mode_def (chain
->ref
);
1494 fprintf (dump_file
, " r%d use in insn %d isn't convertible\n",
1495 DF_REF_REGNO (chain
->ref
), uid
);
1496 mark_dual_mode_def (ref
);
1501 /* Add instruction into a chain. */
1504 scalar_chain::add_insn (bitmap candidates
, unsigned int insn_uid
)
1506 if (bitmap_bit_p (insns
, insn_uid
))
1510 fprintf (dump_file
, " Adding insn %d to chain #%d\n", insn_uid
, chain_id
);
1512 bitmap_set_bit (insns
, insn_uid
);
1514 rtx_insn
*insn
= DF_INSN_UID_GET (insn_uid
)->insn
;
1515 rtx def_set
= single_set (insn
);
1516 if (def_set
&& REG_P (SET_DEST (def_set
))
1517 && !HARD_REGISTER_P (SET_DEST (def_set
)))
1518 bitmap_set_bit (defs
, REGNO (SET_DEST (def_set
)));
1522 for (ref
= DF_INSN_UID_DEFS (insn_uid
); ref
; ref
= DF_REF_NEXT_LOC (ref
))
1523 if (!HARD_REGISTER_P (DF_REF_REG (ref
)))
1524 for (def
= DF_REG_DEF_CHAIN (DF_REF_REGNO (ref
));
1526 def
= DF_REF_NEXT_REG (def
))
1527 analyze_register_chain (candidates
, def
);
1528 for (ref
= DF_INSN_UID_USES (insn_uid
); ref
; ref
= DF_REF_NEXT_LOC (ref
))
1529 if (!DF_REF_REG_MEM_P (ref
))
1530 analyze_register_chain (candidates
, ref
);
1533 /* Build new chain starting from insn INSN_UID recursively
1534 adding all dependent uses and definitions. */
1537 scalar_chain::build (bitmap candidates
, unsigned insn_uid
)
1539 queue
= BITMAP_ALLOC (NULL
);
1540 bitmap_set_bit (queue
, insn_uid
);
1543 fprintf (dump_file
, "Building chain #%d...\n", chain_id
);
1545 while (!bitmap_empty_p (queue
))
1547 insn_uid
= bitmap_first_set_bit (queue
);
1548 bitmap_clear_bit (queue
, insn_uid
);
1549 bitmap_clear_bit (candidates
, insn_uid
);
1550 add_insn (candidates
, insn_uid
);
1555 fprintf (dump_file
, "Collected chain #%d...\n", chain_id
);
1556 fprintf (dump_file
, " insns: ");
1557 dump_bitmap (dump_file
, insns
);
1558 if (!bitmap_empty_p (defs_conv
))
1562 const char *comma
= "";
1563 fprintf (dump_file
, " defs to convert: ");
1564 EXECUTE_IF_SET_IN_BITMAP (defs_conv
, 0, id
, bi
)
1566 fprintf (dump_file
, "%sr%d", comma
, id
);
1569 fprintf (dump_file
, "\n");
1573 BITMAP_FREE (queue
);
1576 /* Return a cost of building a vector costant
1577 instead of using a scalar one. */
1580 dimode_scalar_chain::vector_const_cost (rtx exp
)
1582 gcc_assert (CONST_INT_P (exp
));
1584 if (standard_sse_constant_p (exp
, V2DImode
))
1585 return COSTS_N_INSNS (1);
1586 return ix86_cost
->sse_load
[1];
1589 /* Compute a gain for chain conversion. */
1592 dimode_scalar_chain::compute_convert_gain ()
1600 fprintf (dump_file
, "Computing gain for chain #%d...\n", chain_id
);
1602 EXECUTE_IF_SET_IN_BITMAP (insns
, 0, insn_uid
, bi
)
1604 rtx_insn
*insn
= DF_INSN_UID_GET (insn_uid
)->insn
;
1605 rtx def_set
= single_set (insn
);
1606 rtx src
= SET_SRC (def_set
);
1607 rtx dst
= SET_DEST (def_set
);
1609 if (REG_P (src
) && REG_P (dst
))
1610 gain
+= COSTS_N_INSNS (2) - ix86_cost
->xmm_move
;
1611 else if (REG_P (src
) && MEM_P (dst
))
1612 gain
+= 2 * ix86_cost
->int_store
[2] - ix86_cost
->sse_store
[1];
1613 else if (MEM_P (src
) && REG_P (dst
))
1614 gain
+= 2 * ix86_cost
->int_load
[2] - ix86_cost
->sse_load
[1];
1615 else if (GET_CODE (src
) == ASHIFT
1616 || GET_CODE (src
) == ASHIFTRT
1617 || GET_CODE (src
) == LSHIFTRT
)
1619 if (CONST_INT_P (XEXP (src
, 0)))
1620 gain
-= vector_const_cost (XEXP (src
, 0));
1621 if (CONST_INT_P (XEXP (src
, 1)))
1623 gain
+= ix86_cost
->shift_const
;
1624 if (INTVAL (XEXP (src
, 1)) >= 32)
1625 gain
-= COSTS_N_INSNS (1);
1628 /* Additional gain for omitting two CMOVs. */
1629 gain
+= ix86_cost
->shift_var
+ COSTS_N_INSNS (2);
1631 else if (GET_CODE (src
) == PLUS
1632 || GET_CODE (src
) == MINUS
1633 || GET_CODE (src
) == IOR
1634 || GET_CODE (src
) == XOR
1635 || GET_CODE (src
) == AND
)
1637 gain
+= ix86_cost
->add
;
1638 /* Additional gain for andnot for targets without BMI. */
1639 if (GET_CODE (XEXP (src
, 0)) == NOT
1641 gain
+= 2 * ix86_cost
->add
;
1643 if (CONST_INT_P (XEXP (src
, 0)))
1644 gain
-= vector_const_cost (XEXP (src
, 0));
1645 if (CONST_INT_P (XEXP (src
, 1)))
1646 gain
-= vector_const_cost (XEXP (src
, 1));
1648 else if (GET_CODE (src
) == NEG
1649 || GET_CODE (src
) == NOT
)
1650 gain
+= ix86_cost
->add
- COSTS_N_INSNS (1);
1651 else if (GET_CODE (src
) == COMPARE
)
1653 /* Assume comparison cost is the same. */
1655 else if (CONST_INT_P (src
))
1658 gain
+= COSTS_N_INSNS (2);
1659 else if (MEM_P (dst
))
1660 gain
+= 2 * ix86_cost
->int_store
[2] - ix86_cost
->sse_store
[1];
1661 gain
-= vector_const_cost (src
);
1668 fprintf (dump_file
, " Instruction conversion gain: %d\n", gain
);
1670 EXECUTE_IF_SET_IN_BITMAP (defs_conv
, 0, insn_uid
, bi
)
1671 cost
+= DF_REG_DEF_COUNT (insn_uid
) * ix86_cost
->mmxsse_to_integer
;
1674 fprintf (dump_file
, " Registers conversion cost: %d\n", cost
);
1679 fprintf (dump_file
, " Total gain: %d\n", gain
);
1684 /* Replace REG in X with a V2DI subreg of NEW_REG. */
1687 dimode_scalar_chain::replace_with_subreg (rtx x
, rtx reg
, rtx new_reg
)
1690 return gen_rtx_SUBREG (V2DImode
, new_reg
, 0);
1692 const char *fmt
= GET_RTX_FORMAT (GET_CODE (x
));
1694 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
1697 XEXP (x
, i
) = replace_with_subreg (XEXP (x
, i
), reg
, new_reg
);
1698 else if (fmt
[i
] == 'E')
1699 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
1700 XVECEXP (x
, i
, j
) = replace_with_subreg (XVECEXP (x
, i
, j
),
1707 /* Replace REG in INSN with a V2DI subreg of NEW_REG. */
1710 dimode_scalar_chain::replace_with_subreg_in_insn (rtx_insn
*insn
,
1711 rtx reg
, rtx new_reg
)
1713 replace_with_subreg (single_set (insn
), reg
, new_reg
);
1716 /* Insert generated conversion instruction sequence INSNS
1717 after instruction AFTER. New BB may be required in case
1718 instruction has EH region attached. */
1721 scalar_chain::emit_conversion_insns (rtx insns
, rtx_insn
*after
)
1723 if (!control_flow_insn_p (after
))
1725 emit_insn_after (insns
, after
);
1729 basic_block bb
= BLOCK_FOR_INSN (after
);
1730 edge e
= find_fallthru_edge (bb
->succs
);
1733 basic_block new_bb
= split_edge (e
);
1734 emit_insn_after (insns
, BB_HEAD (new_bb
));
1737 /* Make vector copies for all register REGNO definitions
1738 and replace its uses in a chain. */
1741 dimode_scalar_chain::make_vector_copies (unsigned regno
)
1743 rtx reg
= regno_reg_rtx
[regno
];
1744 rtx vreg
= gen_reg_rtx (DImode
);
1745 bool count_reg
= false;
1748 for (ref
= DF_REG_DEF_CHAIN (regno
); ref
; ref
= DF_REF_NEXT_REG (ref
))
1749 if (!bitmap_bit_p (insns
, DF_REF_INSN_UID (ref
)))
1753 /* Detect the count register of a shift instruction. */
1754 for (use
= DF_REG_USE_CHAIN (regno
); use
; use
= DF_REF_NEXT_REG (use
))
1755 if (bitmap_bit_p (insns
, DF_REF_INSN_UID (use
)))
1757 rtx_insn
*insn
= DF_REF_INSN (use
);
1758 rtx def_set
= single_set (insn
);
1760 gcc_assert (def_set
);
1762 rtx src
= SET_SRC (def_set
);
1764 if ((GET_CODE (src
) == ASHIFT
1765 || GET_CODE (src
) == ASHIFTRT
1766 || GET_CODE (src
) == LSHIFTRT
)
1767 && !CONST_INT_P (XEXP (src
, 1))
1768 && reg_or_subregno (XEXP (src
, 1)) == regno
)
1775 rtx qreg
= gen_lowpart (QImode
, reg
);
1776 rtx tmp
= gen_reg_rtx (SImode
);
1778 if (TARGET_ZERO_EXTEND_WITH_AND
1779 && optimize_function_for_speed_p (cfun
))
1781 emit_move_insn (tmp
, const0_rtx
);
1782 emit_insn (gen_movstrictqi
1783 (gen_lowpart (QImode
, tmp
), qreg
));
1786 emit_insn (gen_rtx_SET
1787 (tmp
, gen_rtx_ZERO_EXTEND (SImode
, qreg
)));
1789 if (!TARGET_INTER_UNIT_MOVES_TO_VEC
)
1791 rtx slot
= assign_386_stack_local (SImode
, SLOT_STV_TEMP
);
1792 emit_move_insn (slot
, tmp
);
1793 tmp
= copy_rtx (slot
);
1796 emit_insn (gen_zero_extendsidi2 (vreg
, tmp
));
1798 else if (!TARGET_INTER_UNIT_MOVES_TO_VEC
)
1800 rtx tmp
= assign_386_stack_local (DImode
, SLOT_STV_TEMP
);
1801 emit_move_insn (adjust_address (tmp
, SImode
, 0),
1802 gen_rtx_SUBREG (SImode
, reg
, 0));
1803 emit_move_insn (adjust_address (tmp
, SImode
, 4),
1804 gen_rtx_SUBREG (SImode
, reg
, 4));
1805 emit_move_insn (vreg
, tmp
);
1807 else if (TARGET_SSE4_1
)
1809 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode
, vreg
, 0),
1810 CONST0_RTX (V4SImode
),
1811 gen_rtx_SUBREG (SImode
, reg
, 0)));
1812 emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode
, vreg
, 0),
1813 gen_rtx_SUBREG (V4SImode
, vreg
, 0),
1814 gen_rtx_SUBREG (SImode
, reg
, 4),
1819 rtx tmp
= gen_reg_rtx (DImode
);
1820 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode
, vreg
, 0),
1821 CONST0_RTX (V4SImode
),
1822 gen_rtx_SUBREG (SImode
, reg
, 0)));
1823 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode
, tmp
, 0),
1824 CONST0_RTX (V4SImode
),
1825 gen_rtx_SUBREG (SImode
, reg
, 4)));
1826 emit_insn (gen_vec_interleave_lowv4si
1827 (gen_rtx_SUBREG (V4SImode
, vreg
, 0),
1828 gen_rtx_SUBREG (V4SImode
, vreg
, 0),
1829 gen_rtx_SUBREG (V4SImode
, tmp
, 0)));
1831 rtx_insn
*seq
= get_insns ();
1833 rtx_insn
*insn
= DF_REF_INSN (ref
);
1834 emit_conversion_insns (seq
, insn
);
1838 " Copied r%d to a vector register r%d for insn %d\n",
1839 regno
, REGNO (vreg
), INSN_UID (insn
));
1842 for (ref
= DF_REG_USE_CHAIN (regno
); ref
; ref
= DF_REF_NEXT_REG (ref
))
1843 if (bitmap_bit_p (insns
, DF_REF_INSN_UID (ref
)))
1845 rtx_insn
*insn
= DF_REF_INSN (ref
);
1848 rtx def_set
= single_set (insn
);
1849 gcc_assert (def_set
);
1851 rtx src
= SET_SRC (def_set
);
1853 if ((GET_CODE (src
) == ASHIFT
1854 || GET_CODE (src
) == ASHIFTRT
1855 || GET_CODE (src
) == LSHIFTRT
)
1856 && !CONST_INT_P (XEXP (src
, 1))
1857 && reg_or_subregno (XEXP (src
, 1)) == regno
)
1858 XEXP (src
, 1) = vreg
;
1861 replace_with_subreg_in_insn (insn
, reg
, vreg
);
1864 fprintf (dump_file
, " Replaced r%d with r%d in insn %d\n",
1865 regno
, REGNO (vreg
), INSN_UID (insn
));
1869 /* Convert all definitions of register REGNO
1870 and fix its uses. Scalar copies may be created
1871 in case register is used in not convertible insn. */
1874 dimode_scalar_chain::convert_reg (unsigned regno
)
1876 bool scalar_copy
= bitmap_bit_p (defs_conv
, regno
);
1877 rtx reg
= regno_reg_rtx
[regno
];
1878 rtx scopy
= NULL_RTX
;
1882 conv
= BITMAP_ALLOC (NULL
);
1883 bitmap_copy (conv
, insns
);
1886 scopy
= gen_reg_rtx (DImode
);
1888 for (ref
= DF_REG_DEF_CHAIN (regno
); ref
; ref
= DF_REF_NEXT_REG (ref
))
1890 rtx_insn
*insn
= DF_REF_INSN (ref
);
1891 rtx def_set
= single_set (insn
);
1892 rtx src
= SET_SRC (def_set
);
1893 rtx reg
= DF_REF_REG (ref
);
1897 replace_with_subreg_in_insn (insn
, reg
, reg
);
1898 bitmap_clear_bit (conv
, INSN_UID (insn
));
1904 if (!TARGET_INTER_UNIT_MOVES_FROM_VEC
)
1906 rtx tmp
= assign_386_stack_local (DImode
, SLOT_STV_TEMP
);
1907 emit_move_insn (tmp
, reg
);
1908 emit_move_insn (gen_rtx_SUBREG (SImode
, scopy
, 0),
1909 adjust_address (tmp
, SImode
, 0));
1910 emit_move_insn (gen_rtx_SUBREG (SImode
, scopy
, 4),
1911 adjust_address (tmp
, SImode
, 4));
1913 else if (TARGET_SSE4_1
)
1915 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
1918 (gen_rtx_SUBREG (SImode
, scopy
, 0),
1919 gen_rtx_VEC_SELECT (SImode
,
1920 gen_rtx_SUBREG (V4SImode
, reg
, 0), tmp
)));
1922 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const1_rtx
));
1925 (gen_rtx_SUBREG (SImode
, scopy
, 4),
1926 gen_rtx_VEC_SELECT (SImode
,
1927 gen_rtx_SUBREG (V4SImode
, reg
, 0), tmp
)));
1931 rtx vcopy
= gen_reg_rtx (V2DImode
);
1932 emit_move_insn (vcopy
, gen_rtx_SUBREG (V2DImode
, reg
, 0));
1933 emit_move_insn (gen_rtx_SUBREG (SImode
, scopy
, 0),
1934 gen_rtx_SUBREG (SImode
, vcopy
, 0));
1935 emit_move_insn (vcopy
,
1936 gen_rtx_LSHIFTRT (V2DImode
, vcopy
, GEN_INT (32)));
1937 emit_move_insn (gen_rtx_SUBREG (SImode
, scopy
, 4),
1938 gen_rtx_SUBREG (SImode
, vcopy
, 0));
1940 rtx_insn
*seq
= get_insns ();
1942 emit_conversion_insns (seq
, insn
);
1946 " Copied r%d to a scalar register r%d for insn %d\n",
1947 regno
, REGNO (scopy
), INSN_UID (insn
));
1951 for (ref
= DF_REG_USE_CHAIN (regno
); ref
; ref
= DF_REF_NEXT_REG (ref
))
1952 if (bitmap_bit_p (insns
, DF_REF_INSN_UID (ref
)))
1954 if (bitmap_bit_p (conv
, DF_REF_INSN_UID (ref
)))
1956 rtx_insn
*insn
= DF_REF_INSN (ref
);
1958 rtx def_set
= single_set (insn
);
1959 gcc_assert (def_set
);
1961 rtx src
= SET_SRC (def_set
);
1962 rtx dst
= SET_DEST (def_set
);
1964 if ((GET_CODE (src
) == ASHIFT
1965 || GET_CODE (src
) == ASHIFTRT
1966 || GET_CODE (src
) == LSHIFTRT
)
1967 && !CONST_INT_P (XEXP (src
, 1))
1968 && reg_or_subregno (XEXP (src
, 1)) == regno
)
1970 rtx tmp2
= gen_reg_rtx (V2DImode
);
1975 emit_insn (gen_sse4_1_zero_extendv2qiv2di2
1976 (tmp2
, gen_rtx_SUBREG (V16QImode
, reg
, 0)));
1980 = gen_rtx_CONST_VECTOR (V2DImode
,
1981 gen_rtvec (2, GEN_INT (0xff),
1984 = validize_mem (force_const_mem (V2DImode
, vec_cst
));
1986 emit_insn (gen_rtx_SET
1988 gen_rtx_AND (V2DImode
,
1989 gen_rtx_SUBREG (V2DImode
, reg
, 0),
1992 rtx_insn
*seq
= get_insns ();
1995 emit_insn_before (seq
, insn
);
1997 XEXP (src
, 1) = gen_rtx_SUBREG (DImode
, tmp2
, 0);
1999 else if (!MEM_P (dst
) || !REG_P (src
))
2000 replace_with_subreg_in_insn (insn
, reg
, reg
);
2002 bitmap_clear_bit (conv
, INSN_UID (insn
));
2005 /* Skip debug insns and uninitialized uses. */
2006 else if (DF_REF_CHAIN (ref
)
2007 && NONDEBUG_INSN_P (DF_REF_INSN (ref
)))
2010 replace_rtx (DF_REF_INSN (ref
), reg
, scopy
);
2011 df_insn_rescan (DF_REF_INSN (ref
));
2017 /* Convert operand OP in INSN. We should handle
2018 memory operands and uninitialized registers.
2019 All other register uses are converted during
2020 registers conversion. */
2023 dimode_scalar_chain::convert_op (rtx
*op
, rtx_insn
*insn
)
2025 *op
= copy_rtx_if_shared (*op
);
2027 if (GET_CODE (*op
) == NOT
)
2029 convert_op (&XEXP (*op
, 0), insn
);
2030 PUT_MODE (*op
, V2DImode
);
2032 else if (MEM_P (*op
))
2034 rtx tmp
= gen_reg_rtx (DImode
);
2036 emit_insn_before (gen_move_insn (tmp
, *op
), insn
);
2037 *op
= gen_rtx_SUBREG (V2DImode
, tmp
, 0);
2040 fprintf (dump_file
, " Preloading operand for insn %d into r%d\n",
2041 INSN_UID (insn
), REGNO (tmp
));
2043 else if (REG_P (*op
))
2045 /* We may have not converted register usage in case
2046 this register has no definition. Otherwise it
2047 should be converted in convert_reg. */
2049 FOR_EACH_INSN_USE (ref
, insn
)
2050 if (DF_REF_REGNO (ref
) == REGNO (*op
))
2052 gcc_assert (!DF_REF_CHAIN (ref
));
2055 *op
= gen_rtx_SUBREG (V2DImode
, *op
, 0);
2057 else if (CONST_INT_P (*op
))
2060 rtx tmp
= gen_rtx_SUBREG (V2DImode
, gen_reg_rtx (DImode
), 0);
2062 /* Prefer all ones vector in case of -1. */
2063 if (constm1_operand (*op
, GET_MODE (*op
)))
2064 vec_cst
= CONSTM1_RTX (V2DImode
);
2066 vec_cst
= gen_rtx_CONST_VECTOR (V2DImode
,
2067 gen_rtvec (2, *op
, const0_rtx
));
2069 if (!standard_sse_constant_p (vec_cst
, V2DImode
))
2072 vec_cst
= validize_mem (force_const_mem (V2DImode
, vec_cst
));
2073 rtx_insn
*seq
= get_insns ();
2075 emit_insn_before (seq
, insn
);
2078 emit_insn_before (gen_move_insn (copy_rtx (tmp
), vec_cst
), insn
);
2083 gcc_assert (SUBREG_P (*op
));
2084 gcc_assert (GET_MODE (*op
) == V2DImode
);
2088 /* Convert INSN to vector mode. */
2091 dimode_scalar_chain::convert_insn (rtx_insn
*insn
)
2093 rtx def_set
= single_set (insn
);
2094 rtx src
= SET_SRC (def_set
);
2095 rtx dst
= SET_DEST (def_set
);
2098 if (MEM_P (dst
) && !REG_P (src
))
2100 /* There are no scalar integer instructions and therefore
2101 temporary register usage is required. */
2102 rtx tmp
= gen_reg_rtx (DImode
);
2103 emit_conversion_insns (gen_move_insn (dst
, tmp
), insn
);
2104 dst
= gen_rtx_SUBREG (V2DImode
, tmp
, 0);
2107 switch (GET_CODE (src
))
2112 convert_op (&XEXP (src
, 0), insn
);
2113 PUT_MODE (src
, V2DImode
);
2121 convert_op (&XEXP (src
, 0), insn
);
2122 convert_op (&XEXP (src
, 1), insn
);
2123 PUT_MODE (src
, V2DImode
);
2127 src
= XEXP (src
, 0);
2128 convert_op (&src
, insn
);
2129 subreg
= gen_reg_rtx (V2DImode
);
2130 emit_insn_before (gen_move_insn (subreg
, CONST0_RTX (V2DImode
)), insn
);
2131 src
= gen_rtx_MINUS (V2DImode
, subreg
, src
);
2135 src
= XEXP (src
, 0);
2136 convert_op (&src
, insn
);
2137 subreg
= gen_reg_rtx (V2DImode
);
2138 emit_insn_before (gen_move_insn (subreg
, CONSTM1_RTX (V2DImode
)), insn
);
2139 src
= gen_rtx_XOR (V2DImode
, src
, subreg
);
2144 convert_op (&src
, insn
);
2149 convert_op (&src
, insn
);
2153 gcc_assert (GET_MODE (src
) == V2DImode
);
2157 src
= SUBREG_REG (XEXP (XEXP (src
, 0), 0));
2159 gcc_assert ((REG_P (src
) && GET_MODE (src
) == DImode
)
2160 || (SUBREG_P (src
) && GET_MODE (src
) == V2DImode
));
2163 subreg
= gen_rtx_SUBREG (V2DImode
, src
, 0);
2165 subreg
= copy_rtx_if_shared (src
);
2166 emit_insn_before (gen_vec_interleave_lowv2di (copy_rtx_if_shared (subreg
),
2167 copy_rtx_if_shared (subreg
),
2168 copy_rtx_if_shared (subreg
)),
2170 dst
= gen_rtx_REG (CCmode
, FLAGS_REG
);
2171 src
= gen_rtx_UNSPEC (CCmode
, gen_rtvec (2, copy_rtx_if_shared (src
),
2172 copy_rtx_if_shared (src
)),
2177 convert_op (&src
, insn
);
2184 SET_SRC (def_set
) = src
;
2185 SET_DEST (def_set
) = dst
;
2187 /* Drop possible dead definitions. */
2188 PATTERN (insn
) = def_set
;
2190 INSN_CODE (insn
) = -1;
2191 recog_memoized (insn
);
2192 df_insn_rescan (insn
);
2195 /* Fix uses of converted REG in debug insns. */
2198 timode_scalar_chain::fix_debug_reg_uses (rtx reg
)
2200 if (!flag_var_tracking
)
2204 for (ref
= DF_REG_USE_CHAIN (REGNO (reg
)); ref
; ref
= next
)
2206 rtx_insn
*insn
= DF_REF_INSN (ref
);
2207 /* Make sure the next ref is for a different instruction,
2208 so that we're not affected by the rescan. */
2209 next
= DF_REF_NEXT_REG (ref
);
2210 while (next
&& DF_REF_INSN (next
) == insn
)
2211 next
= DF_REF_NEXT_REG (next
);
2213 if (DEBUG_INSN_P (insn
))
2215 /* It may be a debug insn with a TImode variable in
2217 bool changed
= false;
2218 for (; ref
!= next
; ref
= DF_REF_NEXT_REG (ref
))
2220 rtx
*loc
= DF_REF_LOC (ref
);
2221 if (REG_P (*loc
) && GET_MODE (*loc
) == V1TImode
)
2223 *loc
= gen_rtx_SUBREG (TImode
, *loc
, 0);
2228 df_insn_rescan (insn
);
2233 /* Convert INSN from TImode to V1T1mode. */
2236 timode_scalar_chain::convert_insn (rtx_insn
*insn
)
2238 rtx def_set
= single_set (insn
);
2239 rtx src
= SET_SRC (def_set
);
2240 rtx dst
= SET_DEST (def_set
);
2242 switch (GET_CODE (dst
))
2246 rtx tmp
= find_reg_equal_equiv_note (insn
);
2248 PUT_MODE (XEXP (tmp
, 0), V1TImode
);
2249 PUT_MODE (dst
, V1TImode
);
2250 fix_debug_reg_uses (dst
);
2254 PUT_MODE (dst
, V1TImode
);
2261 switch (GET_CODE (src
))
2264 PUT_MODE (src
, V1TImode
);
2265 /* Call fix_debug_reg_uses only if SRC is never defined. */
2266 if (!DF_REG_DEF_CHAIN (REGNO (src
)))
2267 fix_debug_reg_uses (src
);
2271 PUT_MODE (src
, V1TImode
);
2274 case CONST_WIDE_INT
:
2275 if (NONDEBUG_INSN_P (insn
))
2277 /* Since there are no instructions to store 128-bit constant,
2278 temporary register usage is required. */
2279 rtx tmp
= gen_reg_rtx (V1TImode
);
2281 src
= gen_rtx_CONST_VECTOR (V1TImode
, gen_rtvec (1, src
));
2282 src
= validize_mem (force_const_mem (V1TImode
, src
));
2283 rtx_insn
*seq
= get_insns ();
2286 emit_insn_before (seq
, insn
);
2287 emit_conversion_insns (gen_rtx_SET (dst
, tmp
), insn
);
2293 switch (standard_sse_constant_p (src
, TImode
))
2296 src
= CONST0_RTX (GET_MODE (dst
));
2299 src
= CONSTM1_RTX (GET_MODE (dst
));
2304 if (NONDEBUG_INSN_P (insn
))
2306 rtx tmp
= gen_reg_rtx (V1TImode
);
2307 /* Since there are no instructions to store standard SSE
2308 constant, temporary register usage is required. */
2309 emit_conversion_insns (gen_rtx_SET (dst
, tmp
), insn
);
2318 SET_SRC (def_set
) = src
;
2319 SET_DEST (def_set
) = dst
;
2321 /* Drop possible dead definitions. */
2322 PATTERN (insn
) = def_set
;
2324 INSN_CODE (insn
) = -1;
2325 recog_memoized (insn
);
2326 df_insn_rescan (insn
);
2330 dimode_scalar_chain::convert_registers ()
2335 EXECUTE_IF_SET_IN_BITMAP (defs
, 0, id
, bi
)
2338 EXECUTE_IF_AND_COMPL_IN_BITMAP (defs_conv
, defs
, 0, id
, bi
)
2339 make_vector_copies (id
);
2342 /* Convert whole chain creating required register
2343 conversions and copies. */
2346 scalar_chain::convert ()
2350 int converted_insns
= 0;
2352 if (!dbg_cnt (stv_conversion
))
2356 fprintf (dump_file
, "Converting chain #%d...\n", chain_id
);
2358 convert_registers ();
2360 EXECUTE_IF_SET_IN_BITMAP (insns
, 0, id
, bi
)
2362 convert_insn (DF_INSN_UID_GET (id
)->insn
);
2366 return converted_insns
;
2369 /* Main STV pass function. Find and convert scalar
2370 instructions into vector mode when profitable. */
2373 convert_scalars_to_vector ()
2377 int converted_insns
= 0;
2379 bitmap_obstack_initialize (NULL
);
2380 candidates
= BITMAP_ALLOC (NULL
);
2382 calculate_dominance_info (CDI_DOMINATORS
);
2383 df_set_flags (DF_DEFER_INSN_RESCAN
);
2384 df_chain_add_problem (DF_DU_CHAIN
| DF_UD_CHAIN
);
2385 df_md_add_problem ();
2388 /* Find all instructions we want to convert into vector mode. */
2390 fprintf (dump_file
, "Searching for mode conversion candidates...\n");
2392 FOR_EACH_BB_FN (bb
, cfun
)
2395 FOR_BB_INSNS (bb
, insn
)
2396 if (scalar_to_vector_candidate_p (insn
))
2399 fprintf (dump_file
, " insn %d is marked as a candidate\n",
2402 bitmap_set_bit (candidates
, INSN_UID (insn
));
2406 remove_non_convertible_regs (candidates
);
2408 if (bitmap_empty_p (candidates
))
2410 fprintf (dump_file
, "There are no candidates for optimization.\n");
2412 while (!bitmap_empty_p (candidates
))
2414 unsigned uid
= bitmap_first_set_bit (candidates
);
2415 scalar_chain
*chain
;
2418 chain
= new timode_scalar_chain
;
2420 chain
= new dimode_scalar_chain
;
2422 /* Find instructions chain we want to convert to vector mode.
2423 Check all uses and definitions to estimate all required
2425 chain
->build (candidates
, uid
);
2427 if (chain
->compute_convert_gain () > 0)
2428 converted_insns
+= chain
->convert ();
2431 fprintf (dump_file
, "Chain #%d conversion is not profitable\n",
2438 fprintf (dump_file
, "Total insns converted: %d\n", converted_insns
);
2440 BITMAP_FREE (candidates
);
2441 bitmap_obstack_release (NULL
);
2442 df_process_deferred_rescans ();
2444 /* Conversion means we may have 128bit register spills/fills
2445 which require aligned stack. */
2446 if (converted_insns
)
2448 if (crtl
->stack_alignment_needed
< 128)
2449 crtl
->stack_alignment_needed
= 128;
2450 if (crtl
->stack_alignment_estimated
< 128)
2451 crtl
->stack_alignment_estimated
= 128;
2452 /* Fix up DECL_RTL/DECL_INCOMING_RTL of arguments. */
2454 for (tree parm
= DECL_ARGUMENTS (current_function_decl
);
2455 parm
; parm
= DECL_CHAIN (parm
))
2457 if (TYPE_MODE (TREE_TYPE (parm
)) != TImode
)
2459 if (DECL_RTL_SET_P (parm
)
2460 && GET_MODE (DECL_RTL (parm
)) == V1TImode
)
2462 rtx r
= DECL_RTL (parm
);
2464 SET_DECL_RTL (parm
, gen_rtx_SUBREG (TImode
, r
, 0));
2466 if (DECL_INCOMING_RTL (parm
)
2467 && GET_MODE (DECL_INCOMING_RTL (parm
)) == V1TImode
)
2469 rtx r
= DECL_INCOMING_RTL (parm
);
2471 DECL_INCOMING_RTL (parm
) = gen_rtx_SUBREG (TImode
, r
, 0);
2481 const pass_data pass_data_insert_vzeroupper
=
2483 RTL_PASS
, /* type */
2484 "vzeroupper", /* name */
2485 OPTGROUP_NONE
, /* optinfo_flags */
2486 TV_MACH_DEP
, /* tv_id */
2487 0, /* properties_required */
2488 0, /* properties_provided */
2489 0, /* properties_destroyed */
2490 0, /* todo_flags_start */
2491 TODO_df_finish
, /* todo_flags_finish */
2494 class pass_insert_vzeroupper
: public rtl_opt_pass
2497 pass_insert_vzeroupper(gcc::context
*ctxt
)
2498 : rtl_opt_pass(pass_data_insert_vzeroupper
, ctxt
)
2501 /* opt_pass methods: */
2502 virtual bool gate (function
*)
2505 && TARGET_VZEROUPPER
&& flag_expensive_optimizations
2509 virtual unsigned int execute (function
*)
2511 return rest_of_handle_insert_vzeroupper ();
2514 }; // class pass_insert_vzeroupper
2516 const pass_data pass_data_stv
=
2518 RTL_PASS
, /* type */
2520 OPTGROUP_NONE
, /* optinfo_flags */
2521 TV_MACH_DEP
, /* tv_id */
2522 0, /* properties_required */
2523 0, /* properties_provided */
2524 0, /* properties_destroyed */
2525 0, /* todo_flags_start */
2526 TODO_df_finish
, /* todo_flags_finish */
2529 class pass_stv
: public rtl_opt_pass
2532 pass_stv (gcc::context
*ctxt
)
2533 : rtl_opt_pass (pass_data_stv
, ctxt
),
2537 /* opt_pass methods: */
2538 virtual bool gate (function
*)
2540 return (timode_p
== !!TARGET_64BIT
2541 && TARGET_STV
&& TARGET_SSE2
&& optimize
> 1);
2544 virtual unsigned int execute (function
*)
2546 return convert_scalars_to_vector ();
2551 return new pass_stv (m_ctxt
);
2554 void set_pass_param (unsigned int n
, bool param
)
2556 gcc_assert (n
== 0);
2562 }; // class pass_stv
2567 make_pass_insert_vzeroupper (gcc::context
*ctxt
)
2569 return new pass_insert_vzeroupper (ctxt
);
2573 make_pass_stv (gcc::context
*ctxt
)
2575 return new pass_stv (ctxt
);
2578 /* Inserting ENDBRANCH instructions. */
2581 rest_of_insert_endbranch (void)
2583 timevar_push (TV_MACH_DEP
);
2589 /* Currently emit EB if it's a tracking function, i.e. 'nocf_check' is
2590 absent among function attributes. Later an optimization will be
2591 introduced to make analysis if an address of a static function is
2592 taken. A static function whose address is not taken will get a
2593 nocf_check attribute. This will allow to reduce the number of EB. */
2595 if (!lookup_attribute ("nocf_check",
2596 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
2597 && !cgraph_node::get (cfun
->decl
)->only_called_directly_p ())
2599 cet_eb
= gen_nop_endbr ();
2601 bb
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->next_bb
;
2602 insn
= BB_HEAD (bb
);
2603 emit_insn_before (cet_eb
, insn
);
2607 FOR_EACH_BB_FN (bb
, cfun
)
2609 for (insn
= BB_HEAD (bb
); insn
!= NEXT_INSN (BB_END (bb
));
2610 insn
= NEXT_INSN (insn
))
2612 if (INSN_P (insn
) && GET_CODE (insn
) == CALL_INSN
)
2614 if (find_reg_note (insn
, REG_SETJMP
, NULL
) == NULL
)
2616 /* Generate ENDBRANCH after CALL, which can return more than
2617 twice, setjmp-like functions. */
2619 /* Skip notes and debug insns that must be next to the
2620 call insn. ??? This might skip a lot more than
2621 that... ??? Skipping barriers and emitting code
2622 after them surely looks like a mistake; we probably
2623 won't ever hit it, for we'll hit BB_END first. */
2624 rtx_insn
*next_insn
= insn
;
2625 while ((next_insn
!= BB_END (bb
))
2626 && (DEBUG_INSN_P (NEXT_INSN (next_insn
))
2627 || NOTE_P (NEXT_INSN (next_insn
))
2628 || BARRIER_P (NEXT_INSN (next_insn
))))
2629 next_insn
= NEXT_INSN (next_insn
);
2631 cet_eb
= gen_nop_endbr ();
2632 emit_insn_after_setloc (cet_eb
, next_insn
, INSN_LOCATION (insn
));
2636 if (INSN_P (insn
) && JUMP_P (insn
) && flag_cet_switch
)
2638 rtx target
= JUMP_LABEL (insn
);
2639 if (target
== NULL_RTX
|| ANY_RETURN_P (target
))
2642 /* Check the jump is a switch table. */
2643 rtx_insn
*label
= as_a
<rtx_insn
*> (target
);
2644 rtx_insn
*table
= next_insn (label
);
2645 if (table
== NULL_RTX
|| !JUMP_TABLE_DATA_P (table
))
2648 /* For the indirect jump find out all places it jumps and insert
2649 ENDBRANCH there. It should be done under a special flag to
2650 control ENDBRANCH generation for switch stmts. */
2653 basic_block dest_blk
;
2655 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2660 insn
= BB_HEAD (dest_blk
);
2661 gcc_assert (LABEL_P (insn
));
2662 cet_eb
= gen_nop_endbr ();
2663 emit_insn_after (cet_eb
, insn
);
2668 if ((LABEL_P (insn
) && LABEL_PRESERVE_P (insn
))
2670 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
))
2671 /* TODO. Check /s bit also. */
2673 cet_eb
= gen_nop_endbr ();
2674 emit_insn_after (cet_eb
, insn
);
2680 timevar_pop (TV_MACH_DEP
);
2686 const pass_data pass_data_insert_endbranch
=
2688 RTL_PASS
, /* type. */
2690 OPTGROUP_NONE
, /* optinfo_flags. */
2691 TV_MACH_DEP
, /* tv_id. */
2692 0, /* properties_required. */
2693 0, /* properties_provided. */
2694 0, /* properties_destroyed. */
2695 0, /* todo_flags_start. */
2696 0, /* todo_flags_finish. */
2699 class pass_insert_endbranch
: public rtl_opt_pass
2702 pass_insert_endbranch (gcc::context
*ctxt
)
2703 : rtl_opt_pass (pass_data_insert_endbranch
, ctxt
)
2706 /* opt_pass methods: */
2707 virtual bool gate (function
*)
2709 return ((flag_cf_protection
& CF_BRANCH
) && TARGET_IBT
);
2712 virtual unsigned int execute (function
*)
2714 return rest_of_insert_endbranch ();
2717 }; // class pass_insert_endbranch
2722 make_pass_insert_endbranch (gcc::context
*ctxt
)
2724 return new pass_insert_endbranch (ctxt
);
2727 /* Return true if a red-zone is in use. */
2730 ix86_using_red_zone (void)
2732 return TARGET_RED_ZONE
&& !TARGET_64BIT_MS_ABI
;
2735 /* Return a string that documents the current -m options. The caller is
2736 responsible for freeing the string. */
2739 ix86_target_string (HOST_WIDE_INT isa
, HOST_WIDE_INT isa2
,
2740 int flags
, int flags2
,
2741 const char *arch
, const char *tune
,
2742 enum fpmath_unit fpmath
, bool add_nl_p
)
2744 struct ix86_target_opts
2746 const char *option
; /* option string */
2747 HOST_WIDE_INT mask
; /* isa mask options */
2750 /* This table is ordered so that options like -msse4.2 that imply other
2751 ISAs come first. Target string will be displayed in the same order. */
2752 static struct ix86_target_opts isa2_opts
[] =
2754 { "-mcx16", OPTION_MASK_ISA_CX16
},
2755 { "-mmpx", OPTION_MASK_ISA_MPX
},
2756 { "-mavx512vbmi2", OPTION_MASK_ISA_AVX512VBMI2
},
2757 { "-mavx512vnni", OPTION_MASK_ISA_AVX512VNNI
},
2758 { "-mvaes", OPTION_MASK_ISA_VAES
},
2759 { "-mrdpid", OPTION_MASK_ISA_RDPID
},
2760 { "-msgx", OPTION_MASK_ISA_SGX
},
2761 { "-mavx5124vnniw", OPTION_MASK_ISA_AVX5124VNNIW
},
2762 { "-mavx5124fmaps", OPTION_MASK_ISA_AVX5124FMAPS
},
2763 { "-mavx512vpopcntdq", OPTION_MASK_ISA_AVX512VPOPCNTDQ
},
2764 { "-mibt", OPTION_MASK_ISA_IBT
},
2765 { "-mshstk", OPTION_MASK_ISA_SHSTK
}
2767 static struct ix86_target_opts isa_opts
[] =
2769 { "-mvpclmulqdq", OPTION_MASK_ISA_VPCLMULQDQ
},
2770 { "-mgfni", OPTION_MASK_ISA_GFNI
},
2771 { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI
},
2772 { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA
},
2773 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL
},
2774 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW
},
2775 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ
},
2776 { "-mavx512er", OPTION_MASK_ISA_AVX512ER
},
2777 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF
},
2778 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD
},
2779 { "-mavx512f", OPTION_MASK_ISA_AVX512F
},
2780 { "-mavx2", OPTION_MASK_ISA_AVX2
},
2781 { "-mfma", OPTION_MASK_ISA_FMA
},
2782 { "-mxop", OPTION_MASK_ISA_XOP
},
2783 { "-mfma4", OPTION_MASK_ISA_FMA4
},
2784 { "-mf16c", OPTION_MASK_ISA_F16C
},
2785 { "-mavx", OPTION_MASK_ISA_AVX
},
2786 /* { "-msse4" OPTION_MASK_ISA_SSE4 }, */
2787 { "-msse4.2", OPTION_MASK_ISA_SSE4_2
},
2788 { "-msse4.1", OPTION_MASK_ISA_SSE4_1
},
2789 { "-msse4a", OPTION_MASK_ISA_SSE4A
},
2790 { "-mssse3", OPTION_MASK_ISA_SSSE3
},
2791 { "-msse3", OPTION_MASK_ISA_SSE3
},
2792 { "-maes", OPTION_MASK_ISA_AES
},
2793 { "-msha", OPTION_MASK_ISA_SHA
},
2794 { "-mpclmul", OPTION_MASK_ISA_PCLMUL
},
2795 { "-msse2", OPTION_MASK_ISA_SSE2
},
2796 { "-msse", OPTION_MASK_ISA_SSE
},
2797 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A
},
2798 { "-m3dnow", OPTION_MASK_ISA_3DNOW
},
2799 { "-mmmx", OPTION_MASK_ISA_MMX
},
2800 { "-mrtm", OPTION_MASK_ISA_RTM
},
2801 { "-mprfchw", OPTION_MASK_ISA_PRFCHW
},
2802 { "-mrdseed", OPTION_MASK_ISA_RDSEED
},
2803 { "-madx", OPTION_MASK_ISA_ADX
},
2804 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1
},
2805 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT
},
2806 { "-mxsaves", OPTION_MASK_ISA_XSAVES
},
2807 { "-mxsavec", OPTION_MASK_ISA_XSAVEC
},
2808 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT
},
2809 { "-mxsave", OPTION_MASK_ISA_XSAVE
},
2810 { "-mabm", OPTION_MASK_ISA_ABM
},
2811 { "-mbmi", OPTION_MASK_ISA_BMI
},
2812 { "-mbmi2", OPTION_MASK_ISA_BMI2
},
2813 { "-mlzcnt", OPTION_MASK_ISA_LZCNT
},
2814 { "-mtbm", OPTION_MASK_ISA_TBM
},
2815 { "-mpopcnt", OPTION_MASK_ISA_POPCNT
},
2816 { "-msahf", OPTION_MASK_ISA_SAHF
},
2817 { "-mmovbe", OPTION_MASK_ISA_MOVBE
},
2818 { "-mcrc32", OPTION_MASK_ISA_CRC32
},
2819 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE
},
2820 { "-mrdrnd", OPTION_MASK_ISA_RDRND
},
2821 { "-mmwaitx", OPTION_MASK_ISA_MWAITX
},
2822 { "-mclzero", OPTION_MASK_ISA_CLZERO
},
2823 { "-mpku", OPTION_MASK_ISA_PKU
},
2824 { "-mlwp", OPTION_MASK_ISA_LWP
},
2825 { "-mhle", OPTION_MASK_ISA_HLE
},
2826 { "-mfxsr", OPTION_MASK_ISA_FXSR
},
2827 { "-mclwb", OPTION_MASK_ISA_CLWB
}
2831 static struct ix86_target_opts flag_opts
[] =
2833 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE
},
2834 { "-mlong-double-128", MASK_LONG_DOUBLE_128
},
2835 { "-mlong-double-64", MASK_LONG_DOUBLE_64
},
2836 { "-m80387", MASK_80387
},
2837 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS
},
2838 { "-malign-double", MASK_ALIGN_DOUBLE
},
2839 { "-mcld", MASK_CLD
},
2840 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS
},
2841 { "-mieee-fp", MASK_IEEE_FP
},
2842 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS
},
2843 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY
},
2844 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT
},
2845 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS
},
2846 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387
},
2847 { "-mno-push-args", MASK_NO_PUSH_ARGS
},
2848 { "-mno-red-zone", MASK_NO_RED_ZONE
},
2849 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER
},
2850 { "-mrecip", MASK_RECIP
},
2851 { "-mrtd", MASK_RTD
},
2852 { "-msseregparm", MASK_SSEREGPARM
},
2853 { "-mstack-arg-probe", MASK_STACK_PROBE
},
2854 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS
},
2855 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS
},
2856 { "-m8bit-idiv", MASK_USE_8BIT_IDIV
},
2857 { "-mvzeroupper", MASK_VZEROUPPER
},
2858 { "-mstv", MASK_STV
},
2859 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD
},
2860 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE
},
2861 { "-mcall-ms2sysv-xlogues", MASK_CALL_MS2SYSV_XLOGUES
}
2864 /* Additional flag options. */
2865 static struct ix86_target_opts flag2_opts
[] =
2867 { "-mgeneral-regs-only", OPTION_MASK_GENERAL_REGS_ONLY
}
2870 const char *opts
[ARRAY_SIZE (isa_opts
) + ARRAY_SIZE (isa2_opts
)
2871 + ARRAY_SIZE (flag_opts
) + ARRAY_SIZE (flag2_opts
) + 6][2];
2874 char isa2_other
[40];
2875 char flags_other
[40];
2876 char flags2_other
[40];
2886 memset (opts
, '\0', sizeof (opts
));
2888 /* Add -march= option. */
2891 opts
[num
][0] = "-march=";
2892 opts
[num
++][1] = arch
;
2895 /* Add -mtune= option. */
2898 opts
[num
][0] = "-mtune=";
2899 opts
[num
++][1] = tune
;
2902 /* Add -m32/-m64/-mx32. */
2903 if ((isa
& OPTION_MASK_ISA_64BIT
) != 0)
2905 if ((isa
& OPTION_MASK_ABI_64
) != 0)
2909 isa
&= ~ (OPTION_MASK_ISA_64BIT
2910 | OPTION_MASK_ABI_64
2911 | OPTION_MASK_ABI_X32
);
2915 opts
[num
++][0] = abi
;
2917 /* Pick out the options in isa2 options. */
2918 for (i
= 0; i
< ARRAY_SIZE (isa2_opts
); i
++)
2920 if ((isa2
& isa2_opts
[i
].mask
) != 0)
2922 opts
[num
++][0] = isa2_opts
[i
].option
;
2923 isa2
&= ~ isa2_opts
[i
].mask
;
2927 if (isa2
&& add_nl_p
)
2929 opts
[num
++][0] = isa2_other
;
2930 sprintf (isa2_other
, "(other isa2: %#" HOST_WIDE_INT_PRINT
"x)", isa2
);
2933 /* Pick out the options in isa options. */
2934 for (i
= 0; i
< ARRAY_SIZE (isa_opts
); i
++)
2936 if ((isa
& isa_opts
[i
].mask
) != 0)
2938 opts
[num
++][0] = isa_opts
[i
].option
;
2939 isa
&= ~ isa_opts
[i
].mask
;
2943 if (isa
&& add_nl_p
)
2945 opts
[num
++][0] = isa_other
;
2946 sprintf (isa_other
, "(other isa: %#" HOST_WIDE_INT_PRINT
"x)", isa
);
2949 /* Add flag options. */
2950 for (i
= 0; i
< ARRAY_SIZE (flag_opts
); i
++)
2952 if ((flags
& flag_opts
[i
].mask
) != 0)
2954 opts
[num
++][0] = flag_opts
[i
].option
;
2955 flags
&= ~ flag_opts
[i
].mask
;
2959 if (flags
&& add_nl_p
)
2961 opts
[num
++][0] = flags_other
;
2962 sprintf (flags_other
, "(other flags: %#x)", flags
);
2965 /* Add additional flag options. */
2966 for (i
= 0; i
< ARRAY_SIZE (flag2_opts
); i
++)
2968 if ((flags2
& flag2_opts
[i
].mask
) != 0)
2970 opts
[num
++][0] = flag2_opts
[i
].option
;
2971 flags2
&= ~ flag2_opts
[i
].mask
;
2975 if (flags2
&& add_nl_p
)
2977 opts
[num
++][0] = flags2_other
;
2978 sprintf (flags2_other
, "(other flags2: %#x)", flags2
);
2981 /* Add -fpmath= option. */
2984 opts
[num
][0] = "-mfpmath=";
2985 switch ((int) fpmath
)
2988 opts
[num
++][1] = "387";
2992 opts
[num
++][1] = "sse";
2995 case FPMATH_387
| FPMATH_SSE
:
2996 opts
[num
++][1] = "sse+387";
3008 gcc_assert (num
< ARRAY_SIZE (opts
));
3010 /* Size the string. */
3012 sep_len
= (add_nl_p
) ? 3 : 1;
3013 for (i
= 0; i
< num
; i
++)
3016 for (j
= 0; j
< 2; j
++)
3018 len
+= strlen (opts
[i
][j
]);
3021 /* Build the string. */
3022 ret
= ptr
= (char *) xmalloc (len
);
3025 for (i
= 0; i
< num
; i
++)
3029 for (j
= 0; j
< 2; j
++)
3030 len2
[j
] = (opts
[i
][j
]) ? strlen (opts
[i
][j
]) : 0;
3037 if (add_nl_p
&& line_len
+ len2
[0] + len2
[1] > 70)
3045 for (j
= 0; j
< 2; j
++)
3048 memcpy (ptr
, opts
[i
][j
], len2
[j
]);
3050 line_len
+= len2
[j
];
3055 gcc_assert (ret
+ len
>= ptr
);
3060 /* Return true, if profiling code should be emitted before
3061 prologue. Otherwise it returns false.
3062 Note: For x86 with "hotfix" it is sorried. */
3064 ix86_profile_before_prologue (void)
3066 return flag_fentry
!= 0;
3069 /* Function that is callable from the debugger to print the current
3071 void ATTRIBUTE_UNUSED
3072 ix86_debug_options (void)
3074 char *opts
= ix86_target_string (ix86_isa_flags
, ix86_isa_flags2
,
3075 target_flags
, ix86_target_flags
,
3076 ix86_arch_string
,ix86_tune_string
,
3081 fprintf (stderr
, "%s\n\n", opts
);
3085 fputs ("<no options>\n\n", stderr
);
3090 /* Return true if T is one of the bytes we should avoid with
3094 ix86_rop_should_change_byte_p (int t
)
3096 return t
== 0xc2 || t
== 0xc3 || t
== 0xca || t
== 0xcb;
3099 static const char *stringop_alg_names
[] = {
3101 #define DEF_ALG(alg, name) #name,
3102 #include "stringop.def"
3107 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
3108 The string is of the following form (or comma separated list of it):
3110 strategy_alg:max_size:[align|noalign]
3112 where the full size range for the strategy is either [0, max_size] or
3113 [min_size, max_size], in which min_size is the max_size + 1 of the
3114 preceding range. The last size range must have max_size == -1.
3119 -mmemcpy-strategy=libcall:-1:noalign
3121 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
3125 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
3127 This is to tell the compiler to use the following strategy for memset
3128 1) when the expected size is between [1, 16], use rep_8byte strategy;
3129 2) when the size is between [17, 2048], use vector_loop;
3130 3) when the size is > 2048, use libcall. */
3132 struct stringop_size_range
3140 ix86_parse_stringop_strategy_string (char *strategy_str
, bool is_memset
)
3142 const struct stringop_algs
*default_algs
;
3143 stringop_size_range input_ranges
[MAX_STRINGOP_ALGS
];
3144 char *curr_range_str
, *next_range_str
;
3145 const char *opt
= is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=";
3149 default_algs
= &ix86_cost
->memset
[TARGET_64BIT
!= 0];
3151 default_algs
= &ix86_cost
->memcpy
[TARGET_64BIT
!= 0];
3153 curr_range_str
= strategy_str
;
3160 next_range_str
= strchr (curr_range_str
, ',');
3162 *next_range_str
++ = '\0';
3164 if (sscanf (curr_range_str
, "%20[^:]:%d:%10s", alg_name
, &maxs
,
3167 error ("wrong argument %qs to option %qs", curr_range_str
, opt
);
3171 if (n
> 0 && (maxs
< (input_ranges
[n
- 1].max
+ 1) && maxs
!= -1))
3173 error ("size ranges of option %qs should be increasing", opt
);
3177 for (i
= 0; i
< last_alg
; i
++)
3178 if (!strcmp (alg_name
, stringop_alg_names
[i
]))
3183 error ("wrong strategy name %qs specified for option %qs",
3186 auto_vec
<const char *> candidates
;
3187 for (i
= 0; i
< last_alg
; i
++)
3188 if ((stringop_alg
) i
!= rep_prefix_8_byte
|| TARGET_64BIT
)
3189 candidates
.safe_push (stringop_alg_names
[i
]);
3193 = candidates_list_and_hint (alg_name
, s
, candidates
);
3195 inform (input_location
,
3196 "valid arguments to %qs are: %s; did you mean %qs?",
3199 inform (input_location
, "valid arguments to %qs are: %s",
3205 if ((stringop_alg
) i
== rep_prefix_8_byte
3208 /* rep; movq isn't available in 32-bit code. */
3209 error ("strategy name %qs specified for option %qs "
3210 "not supported for 32-bit code", alg_name
, opt
);
3214 input_ranges
[n
].max
= maxs
;
3215 input_ranges
[n
].alg
= (stringop_alg
) i
;
3216 if (!strcmp (align
, "align"))
3217 input_ranges
[n
].noalign
= false;
3218 else if (!strcmp (align
, "noalign"))
3219 input_ranges
[n
].noalign
= true;
3222 error ("unknown alignment %qs specified for option %qs", align
, opt
);
3226 curr_range_str
= next_range_str
;
3228 while (curr_range_str
);
3230 if (input_ranges
[n
- 1].max
!= -1)
3232 error ("the max value for the last size range should be -1"
3233 " for option %qs", opt
);
3237 if (n
> MAX_STRINGOP_ALGS
)
3239 error ("too many size ranges specified in option %qs", opt
);
3243 /* Now override the default algs array. */
3244 for (i
= 0; i
< n
; i
++)
3246 *const_cast<int *>(&default_algs
->size
[i
].max
) = input_ranges
[i
].max
;
3247 *const_cast<stringop_alg
*>(&default_algs
->size
[i
].alg
)
3248 = input_ranges
[i
].alg
;
3249 *const_cast<int *>(&default_algs
->size
[i
].noalign
)
3250 = input_ranges
[i
].noalign
;
3255 /* parse -mtune-ctrl= option. When DUMP is true,
3256 print the features that are explicitly set. */
3259 parse_mtune_ctrl_str (bool dump
)
3261 if (!ix86_tune_ctrl_string
)
3264 char *next_feature_string
= NULL
;
3265 char *curr_feature_string
= xstrdup (ix86_tune_ctrl_string
);
3266 char *orig
= curr_feature_string
;
3272 next_feature_string
= strchr (curr_feature_string
, ',');
3273 if (next_feature_string
)
3274 *next_feature_string
++ = '\0';
3275 if (*curr_feature_string
== '^')
3277 curr_feature_string
++;
3280 for (i
= 0; i
< X86_TUNE_LAST
; i
++)
3282 if (!strcmp (curr_feature_string
, ix86_tune_feature_names
[i
]))
3284 ix86_tune_features
[i
] = !clear
;
3286 fprintf (stderr
, "Explicitly %s feature %s\n",
3287 clear
? "clear" : "set", ix86_tune_feature_names
[i
]);
3291 if (i
== X86_TUNE_LAST
)
3292 error ("unknown parameter to option -mtune-ctrl: %s",
3293 clear
? curr_feature_string
- 1 : curr_feature_string
);
3294 curr_feature_string
= next_feature_string
;
3296 while (curr_feature_string
);
3300 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3304 set_ix86_tune_features (enum processor_type ix86_tune
, bool dump
)
3306 unsigned int ix86_tune_mask
= 1u << ix86_tune
;
3309 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
3311 if (ix86_tune_no_default
)
3312 ix86_tune_features
[i
] = 0;
3314 ix86_tune_features
[i
] = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
3319 fprintf (stderr
, "List of x86 specific tuning parameter names:\n");
3320 for (i
= 0; i
< X86_TUNE_LAST
; i
++)
3321 fprintf (stderr
, "%s : %s\n", ix86_tune_feature_names
[i
],
3322 ix86_tune_features
[i
] ? "on" : "off");
3325 parse_mtune_ctrl_str (dump
);
3329 /* Default align_* from the processor table. */
3332 ix86_default_align (struct gcc_options
*opts
)
3334 if (opts
->x_align_loops
== 0)
3336 opts
->x_align_loops
= processor_target_table
[ix86_tune
].align_loop
;
3337 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
3339 if (opts
->x_align_jumps
== 0)
3341 opts
->x_align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
3342 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
3344 if (opts
->x_align_functions
== 0)
3346 opts
->x_align_functions
= processor_target_table
[ix86_tune
].align_func
;
3350 /* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook. */
3353 ix86_override_options_after_change (void)
3355 ix86_default_align (&global_options
);
3358 /* Override various settings based on options. If MAIN_ARGS_P, the
3359 options are from the command line, otherwise they are from
3360 attributes. Return true if there's an error related to march
3364 ix86_option_override_internal (bool main_args_p
,
3365 struct gcc_options
*opts
,
3366 struct gcc_options
*opts_set
)
3369 unsigned int ix86_arch_mask
;
3370 const bool ix86_tune_specified
= (opts
->x_ix86_tune_string
!= NULL
);
3372 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3373 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3374 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3375 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3376 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3377 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3378 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3379 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3380 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3381 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3382 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3383 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3384 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3385 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3386 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3387 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3388 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3389 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3390 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3391 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3392 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3393 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3394 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3395 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3396 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3397 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3398 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3399 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3400 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3401 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3402 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3403 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3404 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3405 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3406 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3407 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3408 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3409 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3410 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3411 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3412 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3413 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3414 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3415 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3416 #define PTA_MPX (HOST_WIDE_INT_1 << 44)
3417 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3418 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3419 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3420 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3421 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3422 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3423 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
3424 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
3425 #define PTA_AVX512IFMA (HOST_WIDE_INT_1 << 53)
3426 #define PTA_AVX512VBMI (HOST_WIDE_INT_1 << 54)
3427 #define PTA_CLWB (HOST_WIDE_INT_1 << 55)
3428 #define PTA_MWAITX (HOST_WIDE_INT_1 << 56)
3429 #define PTA_CLZERO (HOST_WIDE_INT_1 << 57)
3430 #define PTA_NO_80387 (HOST_WIDE_INT_1 << 58)
3431 #define PTA_PKU (HOST_WIDE_INT_1 << 59)
3432 #define PTA_AVX5124VNNIW (HOST_WIDE_INT_1 << 60)
3433 #define PTA_AVX5124FMAPS (HOST_WIDE_INT_1 << 61)
3434 #define PTA_AVX512VPOPCNTDQ (HOST_WIDE_INT_1 << 62)
3435 #define PTA_SGX (HOST_WIDE_INT_1 << 63)
3438 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3439 | PTA_CX16 | PTA_FXSR)
3440 #define PTA_NEHALEM \
3441 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3442 #define PTA_WESTMERE \
3443 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3444 #define PTA_SANDYBRIDGE \
3445 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3446 #define PTA_IVYBRIDGE \
3447 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3448 #define PTA_HASWELL \
3449 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3450 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3451 #define PTA_BROADWELL \
3452 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3453 #define PTA_SKYLAKE \
3454 (PTA_BROADWELL | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES)
3455 #define PTA_SKYLAKE_AVX512 \
3456 (PTA_SKYLAKE | PTA_AVX512F | PTA_AVX512CD | PTA_AVX512VL \
3457 | PTA_AVX512BW | PTA_AVX512DQ | PTA_PKU | PTA_CLWB)
3458 #define PTA_CANNONLAKE \
3459 (PTA_SKYLAKE_AVX512 | PTA_AVX512VBMI | PTA_AVX512IFMA | PTA_SHA)
3461 (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD)
3462 #define PTA_BONNELL \
3463 (PTA_CORE2 | PTA_MOVBE)
3464 #define PTA_SILVERMONT \
3465 (PTA_WESTMERE | PTA_MOVBE)
3467 (PTA_KNL | PTA_AVX5124VNNIW | PTA_AVX5124FMAPS | PTA_AVX512VPOPCNTDQ)
3469 /* if this reaches 64, need to widen struct pta flags below */
3473 const char *const name
; /* processor name or nickname. */
3474 const enum processor_type processor
;
3475 const enum attr_cpu schedule
;
3476 const unsigned HOST_WIDE_INT flags
;
3478 const processor_alias_table
[] =
3480 {"i386", PROCESSOR_I386
, CPU_NONE
, 0},
3481 {"i486", PROCESSOR_I486
, CPU_NONE
, 0},
3482 {"i586", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
3483 {"pentium", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
3484 {"lakemont", PROCESSOR_LAKEMONT
, CPU_PENTIUM
, PTA_NO_80387
},
3485 {"pentium-mmx", PROCESSOR_PENTIUM
, CPU_PENTIUM
, PTA_MMX
},
3486 {"winchip-c6", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
},
3487 {"winchip2", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
3488 {"c3", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
3489 {"samuel-2", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
3490 {"c3-2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3491 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
3492 {"nehemiah", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3493 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
3494 {"c7", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3495 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_FXSR
},
3496 {"esther", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3497 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_FXSR
},
3498 {"i686", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
3499 {"pentiumpro", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
3500 {"pentium2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
| PTA_FXSR
},
3501 {"pentium3", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3502 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
3503 {"pentium3m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3504 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
3505 {"pentium-m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3506 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
3507 {"pentium4", PROCESSOR_PENTIUM4
, CPU_NONE
,
3508 PTA_MMX
|PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
3509 {"pentium4m", PROCESSOR_PENTIUM4
, CPU_NONE
,
3510 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
3511 {"prescott", PROCESSOR_NOCONA
, CPU_NONE
,
3512 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_FXSR
},
3513 {"nocona", PROCESSOR_NOCONA
, CPU_NONE
,
3514 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3515 | PTA_CX16
| PTA_NO_SAHF
| PTA_FXSR
},
3516 {"core2", PROCESSOR_CORE2
, CPU_CORE2
, PTA_CORE2
},
3517 {"nehalem", PROCESSOR_NEHALEM
, CPU_NEHALEM
, PTA_NEHALEM
},
3518 {"corei7", PROCESSOR_NEHALEM
, CPU_NEHALEM
, PTA_NEHALEM
},
3519 {"westmere", PROCESSOR_NEHALEM
, CPU_NEHALEM
, PTA_WESTMERE
},
3520 {"sandybridge", PROCESSOR_SANDYBRIDGE
, CPU_NEHALEM
,
3522 {"corei7-avx", PROCESSOR_SANDYBRIDGE
, CPU_NEHALEM
,
3524 {"ivybridge", PROCESSOR_SANDYBRIDGE
, CPU_NEHALEM
,
3526 {"core-avx-i", PROCESSOR_SANDYBRIDGE
, CPU_NEHALEM
,
3528 {"haswell", PROCESSOR_HASWELL
, CPU_HASWELL
, PTA_HASWELL
},
3529 {"core-avx2", PROCESSOR_HASWELL
, CPU_HASWELL
, PTA_HASWELL
},
3530 {"broadwell", PROCESSOR_HASWELL
, CPU_HASWELL
, PTA_BROADWELL
},
3531 {"skylake", PROCESSOR_HASWELL
, CPU_HASWELL
, PTA_SKYLAKE
},
3532 {"skylake-avx512", PROCESSOR_SKYLAKE_AVX512
, CPU_HASWELL
,
3533 PTA_SKYLAKE_AVX512
},
3534 {"cannonlake", PROCESSOR_HASWELL
, CPU_HASWELL
, PTA_CANNONLAKE
},
3535 {"bonnell", PROCESSOR_BONNELL
, CPU_ATOM
, PTA_BONNELL
},
3536 {"atom", PROCESSOR_BONNELL
, CPU_ATOM
, PTA_BONNELL
},
3537 {"silvermont", PROCESSOR_SILVERMONT
, CPU_SLM
, PTA_SILVERMONT
},
3538 {"slm", PROCESSOR_SILVERMONT
, CPU_SLM
, PTA_SILVERMONT
},
3539 {"knl", PROCESSOR_KNL
, CPU_SLM
, PTA_KNL
},
3540 {"knm", PROCESSOR_KNM
, CPU_SLM
, PTA_KNM
},
3541 {"intel", PROCESSOR_INTEL
, CPU_SLM
, PTA_NEHALEM
},
3542 {"geode", PROCESSOR_GEODE
, CPU_GEODE
,
3543 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
3544 {"k6", PROCESSOR_K6
, CPU_K6
, PTA_MMX
},
3545 {"k6-2", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
3546 {"k6-3", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
3547 {"athlon", PROCESSOR_ATHLON
, CPU_ATHLON
,
3548 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
3549 {"athlon-tbird", PROCESSOR_ATHLON
, CPU_ATHLON
,
3550 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
3551 {"athlon-4", PROCESSOR_ATHLON
, CPU_ATHLON
,
3552 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_FXSR
},
3553 {"athlon-xp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3554 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_FXSR
},
3555 {"athlon-mp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3556 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_FXSR
},
3557 {"x86-64", PROCESSOR_K8
, CPU_K8
,
3558 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_NO_SAHF
| PTA_FXSR
},
3559 {"eden-x2", PROCESSOR_K8
, CPU_K8
,
3560 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_FXSR
},
3561 {"nano", PROCESSOR_K8
, CPU_K8
,
3562 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3563 | PTA_SSSE3
| PTA_FXSR
},
3564 {"nano-1000", PROCESSOR_K8
, CPU_K8
,
3565 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3566 | PTA_SSSE3
| PTA_FXSR
},
3567 {"nano-2000", PROCESSOR_K8
, CPU_K8
,
3568 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3569 | PTA_SSSE3
| PTA_FXSR
},
3570 {"nano-3000", PROCESSOR_K8
, CPU_K8
,
3571 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3572 | PTA_SSSE3
| PTA_SSE4_1
| PTA_FXSR
},
3573 {"nano-x2", PROCESSOR_K8
, CPU_K8
,
3574 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3575 | PTA_SSSE3
| PTA_SSE4_1
| PTA_FXSR
},
3576 {"eden-x4", PROCESSOR_K8
, CPU_K8
,
3577 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3578 | PTA_SSSE3
| PTA_SSE4_1
| PTA_FXSR
},
3579 {"nano-x4", PROCESSOR_K8
, CPU_K8
,
3580 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3581 | PTA_SSSE3
| PTA_SSE4_1
| PTA_FXSR
},
3582 {"k8", PROCESSOR_K8
, CPU_K8
,
3583 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3584 | PTA_SSE2
| PTA_NO_SAHF
| PTA_FXSR
},
3585 {"k8-sse3", PROCESSOR_K8
, CPU_K8
,
3586 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3587 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
| PTA_FXSR
},
3588 {"opteron", PROCESSOR_K8
, CPU_K8
,
3589 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3590 | PTA_SSE2
| PTA_NO_SAHF
| PTA_FXSR
},
3591 {"opteron-sse3", PROCESSOR_K8
, CPU_K8
,
3592 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3593 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
| PTA_FXSR
},
3594 {"athlon64", PROCESSOR_K8
, CPU_K8
,
3595 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3596 | PTA_SSE2
| PTA_NO_SAHF
| PTA_FXSR
},
3597 {"athlon64-sse3", PROCESSOR_K8
, CPU_K8
,
3598 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3599 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
| PTA_FXSR
},
3600 {"athlon-fx", PROCESSOR_K8
, CPU_K8
,
3601 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3602 | PTA_SSE2
| PTA_NO_SAHF
| PTA_FXSR
},
3603 {"amdfam10", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3604 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
3605 | PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_PRFCHW
| PTA_FXSR
},
3606 {"barcelona", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3607 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
3608 | PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_PRFCHW
| PTA_FXSR
},
3609 {"bdver1", PROCESSOR_BDVER1
, CPU_BDVER1
,
3610 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3611 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3612 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
3613 | PTA_XOP
| PTA_LWP
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
},
3614 {"bdver2", PROCESSOR_BDVER2
, CPU_BDVER2
,
3615 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3616 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3617 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
3618 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
3619 | PTA_FMA
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
},
3620 {"bdver3", PROCESSOR_BDVER3
, CPU_BDVER3
,
3621 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3622 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3623 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
3624 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
3625 | PTA_FMA
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
3626 | PTA_XSAVEOPT
| PTA_FSGSBASE
},
3627 {"bdver4", PROCESSOR_BDVER4
, CPU_BDVER4
,
3628 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3629 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3630 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_AVX2
3631 | PTA_FMA4
| PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_BMI2
3632 | PTA_TBM
| PTA_F16C
| PTA_FMA
| PTA_PRFCHW
| PTA_FXSR
3633 | PTA_XSAVE
| PTA_XSAVEOPT
| PTA_FSGSBASE
| PTA_RDRND
3634 | PTA_MOVBE
| PTA_MWAITX
},
3635 {"znver1", PROCESSOR_ZNVER1
, CPU_ZNVER1
,
3636 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3637 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3638 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_AVX2
3639 | PTA_BMI
| PTA_BMI2
| PTA_F16C
| PTA_FMA
| PTA_PRFCHW
3640 | PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
| PTA_FSGSBASE
3641 | PTA_RDRND
| PTA_MOVBE
| PTA_MWAITX
| PTA_ADX
| PTA_RDSEED
3642 | PTA_CLZERO
| PTA_CLFLUSHOPT
| PTA_XSAVEC
| PTA_XSAVES
3643 | PTA_SHA
| PTA_LZCNT
| PTA_POPCNT
},
3644 {"btver1", PROCESSOR_BTVER1
, CPU_GENERIC
,
3645 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3646 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
| PTA_PRFCHW
3647 | PTA_FXSR
| PTA_XSAVE
},
3648 {"btver2", PROCESSOR_BTVER2
, CPU_BTVER2
,
3649 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3650 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
| PTA_SSE4_1
3651 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
3652 | PTA_BMI
| PTA_F16C
| PTA_MOVBE
| PTA_PRFCHW
3653 | PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
3655 {"generic", PROCESSOR_GENERIC
, CPU_GENERIC
,
3657 | PTA_HLE
/* flags are only used for -march switch. */ },
3660 /* -mrecip options. */
3663 const char *string
; /* option name */
3664 unsigned int mask
; /* mask bits to set */
3666 const recip_options
[] =
3668 { "all", RECIP_MASK_ALL
},
3669 { "none", RECIP_MASK_NONE
},
3670 { "div", RECIP_MASK_DIV
},
3671 { "sqrt", RECIP_MASK_SQRT
},
3672 { "vec-div", RECIP_MASK_VEC_DIV
},
3673 { "vec-sqrt", RECIP_MASK_VEC_SQRT
},
3676 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
3678 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3679 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3680 if (TARGET_64BIT_DEFAULT
&& !TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3681 opts
->x_ix86_isa_flags
&= ~(OPTION_MASK_ABI_64
| OPTION_MASK_ABI_X32
);
3682 #ifdef TARGET_BI_ARCH
3685 #if TARGET_BI_ARCH == 1
3686 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3687 is on and OPTION_MASK_ABI_X32 is off. We turn off
3688 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3690 if (TARGET_X32_P (opts
->x_ix86_isa_flags
))
3691 opts
->x_ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
3693 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3694 on and OPTION_MASK_ABI_64 is off. We turn off
3695 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3696 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
3697 if (TARGET_LP64_P (opts
->x_ix86_isa_flags
)
3698 || TARGET_16BIT_P (opts
->x_ix86_isa_flags
))
3699 opts
->x_ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
3701 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
3702 && TARGET_IAMCU_P (opts
->x_target_flags
))
3703 sorry ("Intel MCU psABI isn%'t supported in %s mode",
3704 TARGET_X32_P (opts
->x_ix86_isa_flags
) ? "x32" : "64-bit");
3708 if (TARGET_X32_P (opts
->x_ix86_isa_flags
))
3710 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3711 OPTION_MASK_ABI_64 for TARGET_X32. */
3712 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3713 opts
->x_ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
3715 else if (TARGET_16BIT_P (opts
->x_ix86_isa_flags
))
3716 opts
->x_ix86_isa_flags
&= ~(OPTION_MASK_ISA_64BIT
3717 | OPTION_MASK_ABI_X32
3718 | OPTION_MASK_ABI_64
);
3719 else if (TARGET_LP64_P (opts
->x_ix86_isa_flags
))
3721 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3722 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3723 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3724 opts
->x_ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
3727 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3728 SUBTARGET_OVERRIDE_OPTIONS
;
3731 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3732 SUBSUBTARGET_OVERRIDE_OPTIONS
;
3735 /* -fPIC is the default for x86_64. */
3736 if (TARGET_MACHO
&& TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3737 opts
->x_flag_pic
= 2;
3739 /* Need to check -mtune=generic first. */
3740 if (opts
->x_ix86_tune_string
)
3742 /* As special support for cross compilers we read -mtune=native
3743 as -mtune=generic. With native compilers we won't see the
3744 -mtune=native, as it was changed by the driver. */
3745 if (!strcmp (opts
->x_ix86_tune_string
, "native"))
3747 opts
->x_ix86_tune_string
= "generic";
3749 else if (!strcmp (opts
->x_ix86_tune_string
, "x86-64"))
3750 warning (OPT_Wdeprecated
,
3752 ? G_("%<-mtune=x86-64%> is deprecated; use %<-mtune=k8%> "
3753 "or %<-mtune=generic%> instead as appropriate")
3754 : G_("%<target(\"tune=x86-64\")%> is deprecated; use "
3755 "%<target(\"tune=k8\")%> or %<target(\"tune=generic\")%>"
3756 " instead as appropriate"));
3760 if (opts
->x_ix86_arch_string
)
3761 opts
->x_ix86_tune_string
= opts
->x_ix86_arch_string
;
3762 if (!opts
->x_ix86_tune_string
)
3764 opts
->x_ix86_tune_string
3765 = processor_target_table
[TARGET_CPU_DEFAULT
].name
;
3766 ix86_tune_defaulted
= 1;
3769 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3770 or defaulted. We need to use a sensible tune option. */
3771 if (!strcmp (opts
->x_ix86_tune_string
, "x86-64"))
3773 opts
->x_ix86_tune_string
= "generic";
3777 if (opts
->x_ix86_stringop_alg
== rep_prefix_8_byte
3778 && !TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3780 /* rep; movq isn't available in 32-bit code. */
3781 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3782 opts
->x_ix86_stringop_alg
= no_stringop
;
3785 if (!opts
->x_ix86_arch_string
)
3786 opts
->x_ix86_arch_string
3787 = TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
3788 ? "x86-64" : SUBTARGET32_DEFAULT_CPU
;
3790 ix86_arch_specified
= 1;
3792 if (opts_set
->x_ix86_pmode
)
3794 if ((TARGET_LP64_P (opts
->x_ix86_isa_flags
)
3795 && opts
->x_ix86_pmode
== PMODE_SI
)
3796 || (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
3797 && opts
->x_ix86_pmode
== PMODE_DI
))
3798 error ("address mode %qs not supported in the %s bit mode",
3799 TARGET_64BIT_P (opts
->x_ix86_isa_flags
) ? "short" : "long",
3800 TARGET_64BIT_P (opts
->x_ix86_isa_flags
) ? "64" : "32");
3803 opts
->x_ix86_pmode
= TARGET_LP64_P (opts
->x_ix86_isa_flags
)
3804 ? PMODE_DI
: PMODE_SI
;
3806 if (!opts_set
->x_ix86_abi
)
3807 opts
->x_ix86_abi
= DEFAULT_ABI
;
3809 if (opts
->x_ix86_abi
== MS_ABI
&& TARGET_X32_P (opts
->x_ix86_isa_flags
))
3810 error ("-mabi=ms not supported with X32 ABI");
3811 gcc_assert (opts
->x_ix86_abi
== SYSV_ABI
|| opts
->x_ix86_abi
== MS_ABI
);
3813 /* For targets using ms ABI enable ms-extensions, if not
3814 explicit turned off. For non-ms ABI we turn off this
3816 if (!opts_set
->x_flag_ms_extensions
)
3817 opts
->x_flag_ms_extensions
= (MS_ABI
== DEFAULT_ABI
);
3819 if (opts_set
->x_ix86_cmodel
)
3821 switch (opts
->x_ix86_cmodel
)
3825 if (opts
->x_flag_pic
)
3826 opts
->x_ix86_cmodel
= CM_SMALL_PIC
;
3827 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3828 error ("code model %qs not supported in the %s bit mode",
3834 if (opts
->x_flag_pic
)
3835 opts
->x_ix86_cmodel
= CM_MEDIUM_PIC
;
3836 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3837 error ("code model %qs not supported in the %s bit mode",
3839 else if (TARGET_X32_P (opts
->x_ix86_isa_flags
))
3840 error ("code model %qs not supported in x32 mode",
3846 if (opts
->x_flag_pic
)
3847 opts
->x_ix86_cmodel
= CM_LARGE_PIC
;
3848 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3849 error ("code model %qs not supported in the %s bit mode",
3851 else if (TARGET_X32_P (opts
->x_ix86_isa_flags
))
3852 error ("code model %qs not supported in x32 mode",
3857 if (opts
->x_flag_pic
)
3858 error ("code model %s does not support PIC mode", "32");
3859 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3860 error ("code model %qs not supported in the %s bit mode",
3865 if (opts
->x_flag_pic
)
3867 error ("code model %s does not support PIC mode", "kernel");
3868 opts
->x_ix86_cmodel
= CM_32
;
3870 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3871 error ("code model %qs not supported in the %s bit mode",
3881 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3882 use of rip-relative addressing. This eliminates fixups that
3883 would otherwise be needed if this object is to be placed in a
3884 DLL, and is essentially just as efficient as direct addressing. */
3885 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
3886 && (TARGET_RDOS
|| TARGET_PECOFF
))
3887 opts
->x_ix86_cmodel
= CM_MEDIUM_PIC
, opts
->x_flag_pic
= 1;
3888 else if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3889 opts
->x_ix86_cmodel
= opts
->x_flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
3891 opts
->x_ix86_cmodel
= CM_32
;
3893 if (TARGET_MACHO
&& opts
->x_ix86_asm_dialect
== ASM_INTEL
)
3895 error ("-masm=intel not supported in this configuration");
3896 opts
->x_ix86_asm_dialect
= ASM_ATT
;
3898 if ((TARGET_64BIT_P (opts
->x_ix86_isa_flags
) != 0)
3899 != ((opts
->x_ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) != 0))
3900 sorry ("%i-bit mode not compiled in",
3901 (opts
->x_ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) ? 64 : 32);
3903 for (i
= 0; i
< pta_size
; i
++)
3904 if (! strcmp (opts
->x_ix86_arch_string
, processor_alias_table
[i
].name
))
3906 if (!strcmp (opts
->x_ix86_arch_string
, "generic"))
3909 ? G_("%<generic%> CPU can be used only for %<-mtune=%> "
3911 : G_("%<generic%> CPU can be used only for "
3912 "%<target(\"tune=\")%> attribute"));
3915 else if (!strcmp (opts
->x_ix86_arch_string
, "intel"))
3918 ? G_("%<intel%> CPU can be used only for %<-mtune=%> "
3920 : G_("%<intel%> CPU can be used only for "
3921 "%<target(\"tune=\")%> attribute"));
3925 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
3926 && !(processor_alias_table
[i
].flags
& PTA_64BIT
))
3928 error ("CPU you selected does not support x86-64 "
3933 ix86_schedule
= processor_alias_table
[i
].schedule
;
3934 ix86_arch
= processor_alias_table
[i
].processor
;
3935 /* Default cpu tuning to the architecture. */
3936 ix86_tune
= ix86_arch
;
3938 if (processor_alias_table
[i
].flags
& PTA_MMX
3939 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_MMX
))
3940 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
3941 if (processor_alias_table
[i
].flags
& PTA_3DNOW
3942 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW
))
3943 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW
;
3944 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
3945 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW_A
))
3946 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW_A
;
3947 if (processor_alias_table
[i
].flags
& PTA_SSE
3948 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE
))
3949 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
3950 if (processor_alias_table
[i
].flags
& PTA_SSE2
3951 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE2
))
3952 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
3953 if (processor_alias_table
[i
].flags
& PTA_SSE3
3954 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE3
))
3955 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
3956 if (processor_alias_table
[i
].flags
& PTA_SSSE3
3957 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSSE3
))
3958 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
3959 if (processor_alias_table
[i
].flags
& PTA_SSE4_1
3960 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_1
))
3961 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
3962 if (processor_alias_table
[i
].flags
& PTA_SSE4_2
3963 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_2
))
3964 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_2
;
3965 if (processor_alias_table
[i
].flags
& PTA_AVX
3966 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX
))
3967 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX
;
3968 if (processor_alias_table
[i
].flags
& PTA_AVX2
3969 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX2
))
3970 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX2
;
3971 if (processor_alias_table
[i
].flags
& PTA_FMA
3972 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA
))
3973 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_FMA
;
3974 if (processor_alias_table
[i
].flags
& PTA_SSE4A
3975 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4A
))
3976 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE4A
;
3977 if (processor_alias_table
[i
].flags
& PTA_FMA4
3978 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA4
))
3979 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_FMA4
;
3980 if (processor_alias_table
[i
].flags
& PTA_XOP
3981 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_XOP
))
3982 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_XOP
;
3983 if (processor_alias_table
[i
].flags
& PTA_LWP
3984 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_LWP
))
3985 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_LWP
;
3986 if (processor_alias_table
[i
].flags
& PTA_ABM
3987 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_ABM
))
3988 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_ABM
;
3989 if (processor_alias_table
[i
].flags
& PTA_BMI
3990 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI
))
3991 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_BMI
;
3992 if (processor_alias_table
[i
].flags
& (PTA_LZCNT
| PTA_ABM
)
3993 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_LZCNT
))
3994 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
;
3995 if (processor_alias_table
[i
].flags
& PTA_TBM
3996 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_TBM
))
3997 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_TBM
;
3998 if (processor_alias_table
[i
].flags
& PTA_BMI2
3999 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI2
))
4000 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_BMI2
;
4001 if (processor_alias_table
[i
].flags
& PTA_CX16
4002 && !(opts
->x_ix86_isa_flags2_explicit
& OPTION_MASK_ISA_CX16
))
4003 opts
->x_ix86_isa_flags2
|= OPTION_MASK_ISA_CX16
;
4004 if (processor_alias_table
[i
].flags
& (PTA_POPCNT
| PTA_ABM
)
4005 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_POPCNT
))
4006 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
;
4007 if (!(TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
4008 && (processor_alias_table
[i
].flags
& PTA_NO_SAHF
))
4009 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SAHF
))
4010 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SAHF
;
4011 if (processor_alias_table
[i
].flags
& PTA_MOVBE
4012 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_MOVBE
))
4013 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_MOVBE
;
4014 if (processor_alias_table
[i
].flags
& PTA_AES
4015 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AES
))
4016 ix86_isa_flags
|= OPTION_MASK_ISA_AES
;
4017 if (processor_alias_table
[i
].flags
& PTA_SHA
4018 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SHA
))
4019 ix86_isa_flags
|= OPTION_MASK_ISA_SHA
;
4020 if (processor_alias_table
[i
].flags
& PTA_PCLMUL
4021 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_PCLMUL
))
4022 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_PCLMUL
;
4023 if (processor_alias_table
[i
].flags
& PTA_FSGSBASE
4024 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_FSGSBASE
))
4025 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_FSGSBASE
;
4026 if (processor_alias_table
[i
].flags
& PTA_RDRND
4027 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDRND
))
4028 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_RDRND
;
4029 if (processor_alias_table
[i
].flags
& PTA_F16C
4030 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_F16C
))
4031 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_F16C
;
4032 if (processor_alias_table
[i
].flags
& PTA_RTM
4033 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_RTM
))
4034 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_RTM
;
4035 if (processor_alias_table
[i
].flags
& PTA_HLE
4036 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_HLE
))
4037 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_HLE
;
4038 if (processor_alias_table
[i
].flags
& PTA_PRFCHW
4039 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_PRFCHW
))
4040 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_PRFCHW
;
4041 if (processor_alias_table
[i
].flags
& PTA_RDSEED
4042 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDSEED
))
4043 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_RDSEED
;
4044 if (processor_alias_table
[i
].flags
& PTA_ADX
4045 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_ADX
))
4046 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_ADX
;
4047 if (processor_alias_table
[i
].flags
& PTA_FXSR
4048 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_FXSR
))
4049 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_FXSR
;
4050 if (processor_alias_table
[i
].flags
& PTA_XSAVE
4051 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_XSAVE
))
4052 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_XSAVE
;
4053 if (processor_alias_table
[i
].flags
& PTA_XSAVEOPT
4054 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_XSAVEOPT
))
4055 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_XSAVEOPT
;
4056 if (processor_alias_table
[i
].flags
& PTA_AVX512F
4057 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512F
))
4058 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX512F
;
4059 if (processor_alias_table
[i
].flags
& PTA_AVX512ER
4060 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512ER
))
4061 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX512ER
;
4062 if (processor_alias_table
[i
].flags
& PTA_AVX512PF
4063 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512PF
))
4064 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX512PF
;
4065 if (processor_alias_table
[i
].flags
& PTA_AVX512CD
4066 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512CD
))
4067 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX512CD
;
4068 if (processor_alias_table
[i
].flags
& PTA_PREFETCHWT1
4069 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_PREFETCHWT1
))
4070 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_PREFETCHWT1
;
4071 if (processor_alias_table
[i
].flags
& PTA_CLWB
4072 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_CLWB
))
4073 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_CLWB
;
4074 if (processor_alias_table
[i
].flags
& PTA_CLFLUSHOPT
4075 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_CLFLUSHOPT
))
4076 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_CLFLUSHOPT
;
4077 if (processor_alias_table
[i
].flags
& PTA_CLZERO
4078 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_CLZERO
))
4079 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_CLZERO
;
4080 if (processor_alias_table
[i
].flags
& PTA_XSAVEC
4081 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_XSAVEC
))
4082 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_XSAVEC
;
4083 if (processor_alias_table
[i
].flags
& PTA_XSAVES
4084 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_XSAVES
))
4085 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_XSAVES
;
4086 if (processor_alias_table
[i
].flags
& PTA_AVX512DQ
4087 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512DQ
))
4088 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX512DQ
;
4089 if (processor_alias_table
[i
].flags
& PTA_AVX512BW
4090 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512BW
))
4091 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX512BW
;
4092 if (processor_alias_table
[i
].flags
& PTA_AVX512VL
4093 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512VL
))
4094 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX512VL
;
4095 if (processor_alias_table
[i
].flags
& PTA_MPX
4096 && !(opts
->x_ix86_isa_flags2_explicit
& OPTION_MASK_ISA_MPX
))
4097 opts
->x_ix86_isa_flags2
|= OPTION_MASK_ISA_MPX
;
4098 if (processor_alias_table
[i
].flags
& PTA_AVX512VBMI
4099 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512VBMI
))
4100 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX512VBMI
;
4101 if (processor_alias_table
[i
].flags
& PTA_AVX512IFMA
4102 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512IFMA
))
4103 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX512IFMA
;
4105 if (processor_alias_table
[i
].flags
& PTA_AVX5124VNNIW
4106 && !(opts
->x_ix86_isa_flags2_explicit
& OPTION_MASK_ISA_AVX5124VNNIW
))
4107 opts
->x_ix86_isa_flags2
|= OPTION_MASK_ISA_AVX5124VNNIW
;
4108 if (processor_alias_table
[i
].flags
& PTA_AVX5124FMAPS
4109 && !(opts
->x_ix86_isa_flags2_explicit
& OPTION_MASK_ISA_AVX5124FMAPS
))
4110 opts
->x_ix86_isa_flags2
|= OPTION_MASK_ISA_AVX5124FMAPS
;
4111 if (processor_alias_table
[i
].flags
& PTA_AVX512VPOPCNTDQ
4112 && !(opts
->x_ix86_isa_flags2_explicit
& OPTION_MASK_ISA_AVX512VPOPCNTDQ
))
4113 opts
->x_ix86_isa_flags2
|= OPTION_MASK_ISA_AVX512VPOPCNTDQ
;
4114 if (processor_alias_table
[i
].flags
& PTA_SGX
4115 && !(opts
->x_ix86_isa_flags2_explicit
& OPTION_MASK_ISA_SGX
))
4116 opts
->x_ix86_isa_flags2
|= OPTION_MASK_ISA_SGX
;
4118 if (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
))
4119 x86_prefetch_sse
= true;
4120 if (processor_alias_table
[i
].flags
& PTA_MWAITX
4121 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_MWAITX
))
4122 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_MWAITX
;
4123 if (processor_alias_table
[i
].flags
& PTA_PKU
4124 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_PKU
))
4125 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_PKU
;
4127 /* Don't enable x87 instructions if only
4128 general registers are allowed. */
4129 if (!(opts_set
->x_ix86_target_flags
& OPTION_MASK_GENERAL_REGS_ONLY
)
4130 && !(opts_set
->x_target_flags
& MASK_80387
))
4132 if (processor_alias_table
[i
].flags
& PTA_NO_80387
)
4133 opts
->x_target_flags
&= ~MASK_80387
;
4135 opts
->x_target_flags
|= MASK_80387
;
4140 if (TARGET_X32
&& (opts
->x_ix86_isa_flags2
& OPTION_MASK_ISA_MPX
))
4141 error ("Intel MPX does not support x32");
4143 if (TARGET_X32
&& (ix86_isa_flags2
& OPTION_MASK_ISA_MPX
))
4144 error ("Intel MPX does not support x32");
4149 ? G_("bad value (%qs) for %<-march=%> switch")
4150 : G_("bad value (%qs) for %<target(\"arch=\")%> attribute"),
4151 opts
->x_ix86_arch_string
);
4153 auto_vec
<const char *> candidates
;
4154 for (i
= 0; i
< pta_size
; i
++)
4155 if (strcmp (processor_alias_table
[i
].name
, "generic")
4156 && strcmp (processor_alias_table
[i
].name
, "intel")
4157 && (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
4158 || (processor_alias_table
[i
].flags
& PTA_64BIT
)))
4159 candidates
.safe_push (processor_alias_table
[i
].name
);
4163 = candidates_list_and_hint (opts
->x_ix86_arch_string
, s
, candidates
);
4165 inform (input_location
,
4167 ? G_("valid arguments to %<-march=%> switch are: "
4168 "%s; did you mean %qs?")
4169 : G_("valid arguments to %<target(\"arch=\")%> attribute are: "
4170 "%s; did you mean %qs?"), s
, hint
);
4172 inform (input_location
,
4174 ? G_("valid arguments to %<-march=%> switch are: %s")
4175 : G_("valid arguments to %<target(\"arch=\")%> attribute "
4180 ix86_arch_mask
= 1u << ix86_arch
;
4181 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
4182 ix86_arch_features
[i
] = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
4184 for (i
= 0; i
< pta_size
; i
++)
4185 if (! strcmp (opts
->x_ix86_tune_string
, processor_alias_table
[i
].name
))
4187 ix86_schedule
= processor_alias_table
[i
].schedule
;
4188 ix86_tune
= processor_alias_table
[i
].processor
;
4189 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
4191 if (!(processor_alias_table
[i
].flags
& PTA_64BIT
))
4193 if (ix86_tune_defaulted
)
4195 opts
->x_ix86_tune_string
= "x86-64";
4196 for (i
= 0; i
< pta_size
; i
++)
4197 if (! strcmp (opts
->x_ix86_tune_string
,
4198 processor_alias_table
[i
].name
))
4200 ix86_schedule
= processor_alias_table
[i
].schedule
;
4201 ix86_tune
= processor_alias_table
[i
].processor
;
4204 error ("CPU you selected does not support x86-64 "
4208 /* Intel CPUs have always interpreted SSE prefetch instructions as
4209 NOPs; so, we can enable SSE prefetch instructions even when
4210 -mtune (rather than -march) points us to a processor that has them.
4211 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
4212 higher processors. */
4214 && (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
)))
4215 x86_prefetch_sse
= true;
4219 if (ix86_tune_specified
&& i
== pta_size
)
4222 ? G_("bad value (%qs) for %<-mtune=%> switch")
4223 : G_("bad value (%qs) for %<target(\"tune=\")%> attribute"),
4224 opts
->x_ix86_tune_string
);
4226 auto_vec
<const char *> candidates
;
4227 for (i
= 0; i
< pta_size
; i
++)
4228 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
4229 || (processor_alias_table
[i
].flags
& PTA_64BIT
))
4230 candidates
.safe_push (processor_alias_table
[i
].name
);
4234 = candidates_list_and_hint (opts
->x_ix86_tune_string
, s
, candidates
);
4236 inform (input_location
,
4238 ? G_("valid arguments to %<-mtune=%> switch are: "
4239 "%s; did you mean %qs?")
4240 : G_("valid arguments to %<target(\"tune=\")%> attribute are: "
4241 "%s; did you mean %qs?"), s
, hint
);
4243 inform (input_location
,
4245 ? G_("valid arguments to %<-mtune=%> switch are: %s")
4246 : G_("valid arguments to %<target(\"tune=\")%> attribute "
4251 set_ix86_tune_features (ix86_tune
, opts
->x_ix86_dump_tunes
);
4253 #ifndef USE_IX86_FRAME_POINTER
4254 #define USE_IX86_FRAME_POINTER 0
4257 #ifndef USE_X86_64_FRAME_POINTER
4258 #define USE_X86_64_FRAME_POINTER 0
4261 /* Set the default values for switches whose default depends on TARGET_64BIT
4262 in case they weren't overwritten by command line options. */
4263 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
4265 if (opts
->x_optimize
>= 1 && !opts_set
->x_flag_omit_frame_pointer
)
4266 opts
->x_flag_omit_frame_pointer
= !USE_X86_64_FRAME_POINTER
;
4267 if (opts
->x_flag_asynchronous_unwind_tables
4268 && !opts_set
->x_flag_unwind_tables
4269 && TARGET_64BIT_MS_ABI
)
4270 opts
->x_flag_unwind_tables
= 1;
4271 if (opts
->x_flag_asynchronous_unwind_tables
== 2)
4272 opts
->x_flag_unwind_tables
4273 = opts
->x_flag_asynchronous_unwind_tables
= 1;
4274 if (opts
->x_flag_pcc_struct_return
== 2)
4275 opts
->x_flag_pcc_struct_return
= 0;
4279 if (opts
->x_optimize
>= 1 && !opts_set
->x_flag_omit_frame_pointer
)
4280 opts
->x_flag_omit_frame_pointer
4281 = !(USE_IX86_FRAME_POINTER
|| opts
->x_optimize_size
);
4282 if (opts
->x_flag_asynchronous_unwind_tables
== 2)
4283 opts
->x_flag_asynchronous_unwind_tables
= !USE_IX86_FRAME_POINTER
;
4284 if (opts
->x_flag_pcc_struct_return
== 2)
4286 /* Intel MCU psABI specifies that -freg-struct-return should
4287 be on. Instead of setting DEFAULT_PCC_STRUCT_RETURN to 1,
4288 we check -miamcu so that -freg-struct-return is always
4289 turned on if -miamcu is used. */
4290 if (TARGET_IAMCU_P (opts
->x_target_flags
))
4291 opts
->x_flag_pcc_struct_return
= 0;
4293 opts
->x_flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
4297 ix86_tune_cost
= processor_target_table
[ix86_tune
].cost
;
4298 /* TODO: ix86_cost should be chosen at instruction or function granuality
4299 so for cold code we use size_cost even in !optimize_size compilation. */
4300 if (opts
->x_optimize_size
)
4301 ix86_cost
= &ix86_size_cost
;
4303 ix86_cost
= ix86_tune_cost
;
4305 /* Arrange to set up i386_stack_locals for all functions. */
4306 init_machine_status
= ix86_init_machine_status
;
4308 /* Validate -mregparm= value. */
4309 if (opts_set
->x_ix86_regparm
)
4311 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
4312 warning (0, "-mregparm is ignored in 64-bit mode");
4313 else if (TARGET_IAMCU_P (opts
->x_target_flags
))
4314 warning (0, "-mregparm is ignored for Intel MCU psABI");
4315 if (opts
->x_ix86_regparm
> REGPARM_MAX
)
4317 error ("-mregparm=%d is not between 0 and %d",
4318 opts
->x_ix86_regparm
, REGPARM_MAX
);
4319 opts
->x_ix86_regparm
= 0;
4322 if (TARGET_IAMCU_P (opts
->x_target_flags
)
4323 || TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
4324 opts
->x_ix86_regparm
= REGPARM_MAX
;
4326 /* Default align_* from the processor table. */
4327 ix86_default_align (opts
);
4329 /* Provide default for -mbranch-cost= value. */
4330 if (!opts_set
->x_ix86_branch_cost
)
4331 opts
->x_ix86_branch_cost
= ix86_tune_cost
->branch_cost
;
4333 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
4335 opts
->x_target_flags
4336 |= TARGET_SUBTARGET64_DEFAULT
& ~opts_set
->x_target_flags
;
4338 /* Enable by default the SSE and MMX builtins. Do allow the user to
4339 explicitly disable any of these. In particular, disabling SSE and
4340 MMX for kernel code is extremely useful. */
4341 if (!ix86_arch_specified
)
4342 opts
->x_ix86_isa_flags
4343 |= ((OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_MMX
4344 | TARGET_SUBTARGET64_ISA_DEFAULT
)
4345 & ~opts
->x_ix86_isa_flags_explicit
);
4347 if (TARGET_RTD_P (opts
->x_target_flags
))
4350 ? G_("%<-mrtd%> is ignored in 64bit mode")
4351 : G_("%<target(\"rtd\")%> is ignored in 64bit mode"));
4355 opts
->x_target_flags
4356 |= TARGET_SUBTARGET32_DEFAULT
& ~opts_set
->x_target_flags
;
4358 if (!ix86_arch_specified
)
4359 opts
->x_ix86_isa_flags
4360 |= TARGET_SUBTARGET32_ISA_DEFAULT
& ~opts
->x_ix86_isa_flags_explicit
;
4362 /* i386 ABI does not specify red zone. It still makes sense to use it
4363 when programmer takes care to stack from being destroyed. */
4364 if (!(opts_set
->x_target_flags
& MASK_NO_RED_ZONE
))
4365 opts
->x_target_flags
|= MASK_NO_RED_ZONE
;
4368 /* Keep nonleaf frame pointers. */
4369 if (opts
->x_flag_omit_frame_pointer
)
4370 opts
->x_target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
4371 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts
->x_target_flags
))
4372 opts
->x_flag_omit_frame_pointer
= 1;
4374 /* If we're doing fast math, we don't care about comparison order
4375 wrt NaNs. This lets us use a shorter comparison sequence. */
4376 if (opts
->x_flag_finite_math_only
)
4377 opts
->x_target_flags
&= ~MASK_IEEE_FP
;
4379 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
4380 since the insns won't need emulation. */
4381 if (ix86_tune_features
[X86_TUNE_ALWAYS_FANCY_MATH_387
])
4382 opts
->x_target_flags
&= ~MASK_NO_FANCY_MATH_387
;
4384 /* Likewise, if the target doesn't have a 387, or we've specified
4385 software floating point, don't use 387 inline intrinsics. */
4386 if (!TARGET_80387_P (opts
->x_target_flags
))
4387 opts
->x_target_flags
|= MASK_NO_FANCY_MATH_387
;
4389 /* Turn on MMX builtins for -msse. */
4390 if (TARGET_SSE_P (opts
->x_ix86_isa_flags
))
4391 opts
->x_ix86_isa_flags
4392 |= OPTION_MASK_ISA_MMX
& ~opts
->x_ix86_isa_flags_explicit
;
4394 /* Enable SSE prefetch. */
4395 if (TARGET_SSE_P (opts
->x_ix86_isa_flags
)
4396 || (TARGET_PRFCHW_P (opts
->x_ix86_isa_flags
)
4397 && !TARGET_3DNOW_P (opts
->x_ix86_isa_flags
))
4398 || TARGET_PREFETCHWT1_P (opts
->x_ix86_isa_flags
))
4399 x86_prefetch_sse
= true;
4401 /* Enable popcnt instruction for -msse4.2 or -mabm. */
4402 if (TARGET_SSE4_2_P (opts
->x_ix86_isa_flags
)
4403 || TARGET_ABM_P (opts
->x_ix86_isa_flags
))
4404 opts
->x_ix86_isa_flags
4405 |= OPTION_MASK_ISA_POPCNT
& ~opts
->x_ix86_isa_flags_explicit
;
4407 /* Enable lzcnt instruction for -mabm. */
4408 if (TARGET_ABM_P(opts
->x_ix86_isa_flags
))
4409 opts
->x_ix86_isa_flags
4410 |= OPTION_MASK_ISA_LZCNT
& ~opts
->x_ix86_isa_flags_explicit
;
4412 /* Disable BMI, BMI2 and TBM instructions for -m16. */
4413 if (TARGET_16BIT_P(opts
->x_ix86_isa_flags
))
4414 opts
->x_ix86_isa_flags
4415 &= ~((OPTION_MASK_ISA_BMI
| OPTION_MASK_ISA_BMI2
| OPTION_MASK_ISA_TBM
)
4416 & ~opts
->x_ix86_isa_flags_explicit
);
4418 /* Validate -mpreferred-stack-boundary= value or default it to
4419 PREFERRED_STACK_BOUNDARY_DEFAULT. */
4420 ix86_preferred_stack_boundary
= PREFERRED_STACK_BOUNDARY_DEFAULT
;
4421 if (opts_set
->x_ix86_preferred_stack_boundary_arg
)
4423 int min
= TARGET_64BIT_P (opts
->x_ix86_isa_flags
)? 3 : 2;
4424 int max
= TARGET_SEH
? 4 : 12;
4426 if (opts
->x_ix86_preferred_stack_boundary_arg
< min
4427 || opts
->x_ix86_preferred_stack_boundary_arg
> max
)
4430 error ("-mpreferred-stack-boundary is not supported "
4433 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
4434 opts
->x_ix86_preferred_stack_boundary_arg
, min
, max
);
4437 ix86_preferred_stack_boundary
4438 = (1 << opts
->x_ix86_preferred_stack_boundary_arg
) * BITS_PER_UNIT
;
4441 /* Set the default value for -mstackrealign. */
4442 if (!opts_set
->x_ix86_force_align_arg_pointer
)
4443 opts
->x_ix86_force_align_arg_pointer
= STACK_REALIGN_DEFAULT
;
4445 ix86_default_incoming_stack_boundary
= PREFERRED_STACK_BOUNDARY
;
4447 /* Validate -mincoming-stack-boundary= value or default it to
4448 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
4449 ix86_incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
4450 if (opts_set
->x_ix86_incoming_stack_boundary_arg
)
4452 int min
= TARGET_64BIT_P (opts
->x_ix86_isa_flags
) ? 3 : 2;
4454 if (opts
->x_ix86_incoming_stack_boundary_arg
< min
4455 || opts
->x_ix86_incoming_stack_boundary_arg
> 12)
4456 error ("-mincoming-stack-boundary=%d is not between %d and 12",
4457 opts
->x_ix86_incoming_stack_boundary_arg
, min
);
4460 ix86_user_incoming_stack_boundary
4461 = (1 << opts
->x_ix86_incoming_stack_boundary_arg
) * BITS_PER_UNIT
;
4462 ix86_incoming_stack_boundary
4463 = ix86_user_incoming_stack_boundary
;
4467 #ifndef NO_PROFILE_COUNTERS
4468 if (flag_nop_mcount
)
4469 error ("-mnop-mcount is not compatible with this target");
4471 if (flag_nop_mcount
&& flag_pic
)
4472 error ("-mnop-mcount is not implemented for -fPIC");
4474 /* Accept -msseregparm only if at least SSE support is enabled. */
4475 if (TARGET_SSEREGPARM_P (opts
->x_target_flags
)
4476 && ! TARGET_SSE_P (opts
->x_ix86_isa_flags
))
4478 ? G_("%<-msseregparm%> used without SSE enabled")
4479 : G_("%<target(\"sseregparm\")%> used without SSE enabled"));
4481 if (opts_set
->x_ix86_fpmath
)
4483 if (opts
->x_ix86_fpmath
& FPMATH_SSE
)
4485 if (!TARGET_SSE_P (opts
->x_ix86_isa_flags
))
4487 if (TARGET_80387_P (opts
->x_target_flags
))
4489 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4490 opts
->x_ix86_fpmath
= FPMATH_387
;
4493 else if ((opts
->x_ix86_fpmath
& FPMATH_387
)
4494 && !TARGET_80387_P (opts
->x_target_flags
))
4496 warning (0, "387 instruction set disabled, using SSE arithmetics");
4497 opts
->x_ix86_fpmath
= FPMATH_SSE
;
4501 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4502 fpmath=387. The second is however default at many targets since the
4503 extra 80bit precision of temporaries is considered to be part of ABI.
4504 Overwrite the default at least for -ffast-math.
4505 TODO: -mfpmath=both seems to produce same performing code with bit
4506 smaller binaries. It is however not clear if register allocation is
4507 ready for this setting.
4508 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4509 codegen. We may switch to 387 with -ffast-math for size optimized
4511 else if (fast_math_flags_set_p (&global_options
)
4512 && TARGET_SSE2_P (opts
->x_ix86_isa_flags
))
4513 opts
->x_ix86_fpmath
= FPMATH_SSE
;
4515 opts
->x_ix86_fpmath
= TARGET_FPMATH_DEFAULT_P (opts
->x_ix86_isa_flags
);
4517 /* Use external vectorized library in vectorizing intrinsics. */
4518 if (opts_set
->x_ix86_veclibabi_type
)
4519 switch (opts
->x_ix86_veclibabi_type
)
4521 case ix86_veclibabi_type_svml
:
4522 ix86_veclib_handler
= ix86_veclibabi_svml
;
4525 case ix86_veclibabi_type_acml
:
4526 ix86_veclib_handler
= ix86_veclibabi_acml
;
4533 if (ix86_tune_features
[X86_TUNE_ACCUMULATE_OUTGOING_ARGS
]
4534 && !(opts_set
->x_target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
4535 opts
->x_target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
4537 /* If stack probes are required, the space used for large function
4538 arguments on the stack must also be probed, so enable
4539 -maccumulate-outgoing-args so this happens in the prologue. */
4540 if (TARGET_STACK_PROBE_P (opts
->x_target_flags
)
4541 && !(opts
->x_target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
4543 if (opts_set
->x_target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
)
4546 ? G_("stack probing requires %<-maccumulate-outgoing-args%> "
4548 : G_("stack probing requires "
4549 "%<target(\"accumulate-outgoing-args\")%> for "
4551 opts
->x_target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
4554 /* Stack realignment without -maccumulate-outgoing-args requires %ebp,
4555 so enable -maccumulate-outgoing-args when %ebp is fixed. */
4556 if (fixed_regs
[BP_REG
]
4557 && !(opts
->x_target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
4559 if (opts_set
->x_target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
)
4562 ? G_("fixed ebp register requires "
4563 "%<-maccumulate-outgoing-args%>")
4564 : G_("fixed ebp register requires "
4565 "%<target(\"accumulate-outgoing-args\")%>"));
4566 opts
->x_target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
4569 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4572 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
4573 p
= strchr (internal_label_prefix
, 'X');
4574 internal_label_prefix_len
= p
- internal_label_prefix
;
4578 /* When scheduling description is not available, disable scheduler pass
4579 so it won't slow down the compilation and make x87 code slower. */
4580 if (!TARGET_SCHEDULE
)
4581 opts
->x_flag_schedule_insns_after_reload
= opts
->x_flag_schedule_insns
= 0;
4583 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
4584 ix86_tune_cost
->simultaneous_prefetches
,
4585 opts
->x_param_values
,
4586 opts_set
->x_param_values
);
4587 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
4588 ix86_tune_cost
->prefetch_block
,
4589 opts
->x_param_values
,
4590 opts_set
->x_param_values
);
4591 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
4592 ix86_tune_cost
->l1_cache_size
,
4593 opts
->x_param_values
,
4594 opts_set
->x_param_values
);
4595 maybe_set_param_value (PARAM_L2_CACHE_SIZE
,
4596 ix86_tune_cost
->l2_cache_size
,
4597 opts
->x_param_values
,
4598 opts_set
->x_param_values
);
4600 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4601 if (opts
->x_flag_prefetch_loop_arrays
< 0
4603 && (opts
->x_optimize
>= 3 || opts
->x_flag_profile_use
)
4604 && !opts
->x_optimize_size
4605 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL
)
4606 opts
->x_flag_prefetch_loop_arrays
= 1;
4608 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4609 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4610 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
) && !opts
->x_flag_split_stack
)
4611 targetm
.expand_builtin_va_start
= NULL
;
4613 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
4615 ix86_gen_leave
= gen_leave_rex64
;
4616 if (Pmode
== DImode
)
4618 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_di
;
4619 ix86_gen_tls_local_dynamic_base_64
4620 = gen_tls_local_dynamic_base_64_di
;
4624 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_si
;
4625 ix86_gen_tls_local_dynamic_base_64
4626 = gen_tls_local_dynamic_base_64_si
;
4630 ix86_gen_leave
= gen_leave
;
4632 if (Pmode
== DImode
)
4634 ix86_gen_add3
= gen_adddi3
;
4635 ix86_gen_sub3
= gen_subdi3
;
4636 ix86_gen_sub3_carry
= gen_subdi3_carry
;
4637 ix86_gen_one_cmpl2
= gen_one_cmpldi2
;
4638 ix86_gen_andsp
= gen_anddi3
;
4639 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_di
;
4640 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probedi
;
4641 ix86_gen_probe_stack_range
= gen_probe_stack_rangedi
;
4642 ix86_gen_monitor
= gen_sse3_monitor_di
;
4643 ix86_gen_monitorx
= gen_monitorx_di
;
4644 ix86_gen_clzero
= gen_clzero_di
;
4648 ix86_gen_add3
= gen_addsi3
;
4649 ix86_gen_sub3
= gen_subsi3
;
4650 ix86_gen_sub3_carry
= gen_subsi3_carry
;
4651 ix86_gen_one_cmpl2
= gen_one_cmplsi2
;
4652 ix86_gen_andsp
= gen_andsi3
;
4653 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_si
;
4654 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probesi
;
4655 ix86_gen_probe_stack_range
= gen_probe_stack_rangesi
;
4656 ix86_gen_monitor
= gen_sse3_monitor_si
;
4657 ix86_gen_monitorx
= gen_monitorx_si
;
4658 ix86_gen_clzero
= gen_clzero_si
;
4662 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4663 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
4664 opts
->x_target_flags
|= MASK_CLD
& ~opts_set
->x_target_flags
;
4667 /* Set the default value for -mfentry. */
4668 if (!opts_set
->x_flag_fentry
)
4669 opts
->x_flag_fentry
= TARGET_SEH
;
4672 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
) && opts
->x_flag_pic
4673 && opts
->x_flag_fentry
)
4674 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4676 else if (TARGET_SEH
&& !opts
->x_flag_fentry
)
4677 sorry ("-mno-fentry isn%'t compatible with SEH");
4680 if (TARGET_SEH
&& TARGET_CALL_MS2SYSV_XLOGUES
)
4681 sorry ("-mcall-ms2sysv-xlogues isn%'t currently supported with SEH");
4683 if (!(opts_set
->x_target_flags
& MASK_VZEROUPPER
)
4684 && TARGET_EMIT_VZEROUPPER
)
4685 opts
->x_target_flags
|= MASK_VZEROUPPER
;
4686 if (!(opts_set
->x_target_flags
& MASK_STV
))
4687 opts
->x_target_flags
|= MASK_STV
;
4688 /* Disable STV if -mpreferred-stack-boundary={2,3} or
4689 -mincoming-stack-boundary={2,3} or -mstackrealign - the needed
4690 stack realignment will be extra cost the pass doesn't take into
4691 account and the pass can't realign the stack. */
4692 if (ix86_preferred_stack_boundary
< 128
4693 || ix86_incoming_stack_boundary
< 128
4694 || opts
->x_ix86_force_align_arg_pointer
)
4695 opts
->x_target_flags
&= ~MASK_STV
;
4696 if (!ix86_tune_features
[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL
]
4697 && !(opts_set
->x_target_flags
& MASK_AVX256_SPLIT_UNALIGNED_LOAD
))
4698 opts
->x_target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_LOAD
;
4699 if (!ix86_tune_features
[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL
]
4700 && !(opts_set
->x_target_flags
& MASK_AVX256_SPLIT_UNALIGNED_STORE
))
4701 opts
->x_target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_STORE
;
4703 /* Enable 128-bit AVX instruction generation
4704 for the auto-vectorizer. */
4705 if (TARGET_AVX128_OPTIMAL
4706 && (opts_set
->x_prefer_vector_width_type
== PVW_NONE
))
4707 opts
->x_prefer_vector_width_type
= PVW_AVX128
;
4709 /* Use 256-bit AVX instruction generation
4710 in the auto-vectorizer. */
4711 if (ix86_tune_features
[X86_TUNE_AVX256_OPTIMAL
]
4712 && (opts_set
->x_prefer_vector_width_type
== PVW_NONE
))
4713 opts
->x_prefer_vector_width_type
= PVW_AVX256
;
4715 if (opts
->x_ix86_recip_name
)
4717 char *p
= ASTRDUP (opts
->x_ix86_recip_name
);
4719 unsigned int mask
, i
;
4722 while ((q
= strtok (p
, ",")) != NULL
)
4733 if (!strcmp (q
, "default"))
4734 mask
= RECIP_MASK_ALL
;
4737 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
4738 if (!strcmp (q
, recip_options
[i
].string
))
4740 mask
= recip_options
[i
].mask
;
4744 if (i
== ARRAY_SIZE (recip_options
))
4746 error ("unknown option for -mrecip=%s", q
);
4748 mask
= RECIP_MASK_NONE
;
4752 opts
->x_recip_mask_explicit
|= mask
;
4754 opts
->x_recip_mask
&= ~mask
;
4756 opts
->x_recip_mask
|= mask
;
4760 if (TARGET_RECIP_P (opts
->x_target_flags
))
4761 opts
->x_recip_mask
|= RECIP_MASK_ALL
& ~opts
->x_recip_mask_explicit
;
4762 else if (opts_set
->x_target_flags
& MASK_RECIP
)
4763 opts
->x_recip_mask
&= ~(RECIP_MASK_ALL
& ~opts
->x_recip_mask_explicit
);
4765 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4766 for 64-bit Bionic. Also default long double to 64-bit for Intel
4768 if ((TARGET_HAS_BIONIC
|| TARGET_IAMCU
)
4769 && !(opts_set
->x_target_flags
4770 & (MASK_LONG_DOUBLE_64
| MASK_LONG_DOUBLE_128
)))
4771 opts
->x_target_flags
|= (TARGET_64BIT
4772 ? MASK_LONG_DOUBLE_128
4773 : MASK_LONG_DOUBLE_64
);
4775 /* Only one of them can be active. */
4776 gcc_assert ((opts
->x_target_flags
& MASK_LONG_DOUBLE_64
) == 0
4777 || (opts
->x_target_flags
& MASK_LONG_DOUBLE_128
) == 0);
4779 /* Handle stack protector */
4780 if (!opts_set
->x_ix86_stack_protector_guard
)
4781 opts
->x_ix86_stack_protector_guard
4782 = TARGET_HAS_BIONIC
? SSP_GLOBAL
: SSP_TLS
;
4784 #ifdef TARGET_THREAD_SSP_OFFSET
4785 ix86_stack_protector_guard_offset
= TARGET_THREAD_SSP_OFFSET
;
4788 if (global_options_set
.x_ix86_stack_protector_guard_offset_str
)
4791 const char *str
= ix86_stack_protector_guard_offset_str
;
4796 #if defined(INT64_T_IS_LONG)
4797 offset
= strtol (str
, &endp
, 0);
4799 offset
= strtoll (str
, &endp
, 0);
4802 if (!*str
|| *endp
|| errno
)
4803 error ("%qs is not a valid number "
4804 "in -mstack-protector-guard-offset=", str
);
4806 if (!IN_RANGE (offset
, HOST_WIDE_INT_C (-0x80000000),
4807 HOST_WIDE_INT_C (0x7fffffff)))
4808 error ("%qs is not a valid offset "
4809 "in -mstack-protector-guard-offset=", str
);
4811 ix86_stack_protector_guard_offset
= offset
;
4814 ix86_stack_protector_guard_reg
= DEFAULT_TLS_SEG_REG
;
4816 /* The kernel uses a different segment register for performance
4817 reasons; a system call would not have to trash the userspace
4818 segment register, which would be expensive. */
4819 if (ix86_cmodel
== CM_KERNEL
)
4820 ix86_stack_protector_guard_reg
= ADDR_SPACE_SEG_GS
;
4822 if (global_options_set
.x_ix86_stack_protector_guard_reg_str
)
4824 const char *str
= ix86_stack_protector_guard_reg_str
;
4825 addr_space_t seg
= ADDR_SPACE_GENERIC
;
4827 /* Discard optional register prefix. */
4831 if (strlen (str
) == 2 && str
[1] == 's')
4834 seg
= ADDR_SPACE_SEG_FS
;
4835 else if (str
[0] == 'g')
4836 seg
= ADDR_SPACE_SEG_GS
;
4839 if (seg
== ADDR_SPACE_GENERIC
)
4840 error ("%qs is not a valid base register "
4841 "in -mstack-protector-guard-reg=",
4842 ix86_stack_protector_guard_reg_str
);
4844 ix86_stack_protector_guard_reg
= seg
;
4847 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4848 if (opts
->x_ix86_tune_memcpy_strategy
)
4850 char *str
= xstrdup (opts
->x_ix86_tune_memcpy_strategy
);
4851 ix86_parse_stringop_strategy_string (str
, false);
4855 if (opts
->x_ix86_tune_memset_strategy
)
4857 char *str
= xstrdup (opts
->x_ix86_tune_memset_strategy
);
4858 ix86_parse_stringop_strategy_string (str
, true);
4862 /* Save the initial options in case the user does function specific
4865 target_option_default_node
= target_option_current_node
4866 = build_target_option_node (opts
);
4868 /* Do not support control flow instrumentation if CET is not enabled. */
4869 if (opts
->x_flag_cf_protection
!= CF_NONE
)
4871 if (!(TARGET_IBT_P (opts
->x_ix86_isa_flags2
)
4872 || TARGET_SHSTK_P (opts
->x_ix86_isa_flags2
)))
4874 if (flag_cf_protection
== CF_FULL
)
4876 error ("%<-fcf-protection=full%> requires CET support "
4877 "on this target. Use -mcet or one of -mibt, "
4878 "-mshstk options to enable CET");
4880 else if (flag_cf_protection
== CF_BRANCH
)
4882 error ("%<-fcf-protection=branch%> requires CET support "
4883 "on this target. Use -mcet or one of -mibt, "
4884 "-mshstk options to enable CET");
4886 else if (flag_cf_protection
== CF_RETURN
)
4888 error ("%<-fcf-protection=return%> requires CET support "
4889 "on this target. Use -mcet or one of -mibt, "
4890 "-mshstk options to enable CET");
4892 flag_cf_protection
= CF_NONE
;
4895 opts
->x_flag_cf_protection
=
4896 (cf_protection_level
) (opts
->x_flag_cf_protection
| CF_SET
);
4902 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4905 ix86_option_override (void)
4907 ix86_option_override_internal (true, &global_options
, &global_options_set
);
4910 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
4912 ix86_offload_options (void)
4915 return xstrdup ("-foffload-abi=lp64");
4916 return xstrdup ("-foffload-abi=ilp32");
4919 /* Update register usage after having seen the compiler flags. */
4922 ix86_conditional_register_usage (void)
4926 /* If there are no caller-saved registers, preserve all registers.
4927 except fixed_regs and registers used for function return value
4928 since aggregate_value_p checks call_used_regs[regno] on return
4930 if (cfun
&& cfun
->machine
->no_caller_saved_registers
)
4931 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4932 if (!fixed_regs
[i
] && !ix86_function_value_regno_p (i
))
4933 call_used_regs
[i
] = 0;
4935 /* For 32-bit targets, squash the REX registers. */
4938 for (i
= FIRST_REX_INT_REG
; i
<= LAST_REX_INT_REG
; i
++)
4939 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4940 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
4941 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4942 for (i
= FIRST_EXT_REX_SSE_REG
; i
<= LAST_EXT_REX_SSE_REG
; i
++)
4943 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4946 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4947 c_mask
= CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI
);
4949 CLEAR_HARD_REG_SET (reg_class_contents
[(int)CLOBBERED_REGS
]);
4951 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4953 /* Set/reset conditionally defined registers from
4954 CALL_USED_REGISTERS initializer. */
4955 if (call_used_regs
[i
] > 1)
4956 call_used_regs
[i
] = !!(call_used_regs
[i
] & c_mask
);
4958 /* Calculate registers of CLOBBERED_REGS register set
4959 as call used registers from GENERAL_REGS register set. */
4960 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)GENERAL_REGS
], i
)
4961 && call_used_regs
[i
])
4962 SET_HARD_REG_BIT (reg_class_contents
[(int)CLOBBERED_REGS
], i
);
4965 /* If MMX is disabled, squash the registers. */
4967 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4968 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)MMX_REGS
], i
))
4969 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4971 /* If SSE is disabled, squash the registers. */
4973 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4974 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)SSE_REGS
], i
))
4975 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4977 /* If the FPU is disabled, squash the registers. */
4978 if (! (TARGET_80387
|| TARGET_FLOAT_RETURNS_IN_80387
))
4979 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4980 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)FLOAT_REGS
], i
))
4981 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4983 /* If AVX512F is disabled, squash the registers. */
4984 if (! TARGET_AVX512F
)
4986 for (i
= FIRST_EXT_REX_SSE_REG
; i
<= LAST_EXT_REX_SSE_REG
; i
++)
4987 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4989 for (i
= FIRST_MASK_REG
; i
<= LAST_MASK_REG
; i
++)
4990 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4993 /* If MPX is disabled, squash the registers. */
4995 for (i
= FIRST_BND_REG
; i
<= LAST_BND_REG
; i
++)
4996 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4999 /* Canonicalize a comparison from one we don't have to one we do have. */
5002 ix86_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
5003 bool op0_preserve_value
)
5005 /* The order of operands in x87 ficom compare is forced by combine in
5006 simplify_comparison () function. Float operator is treated as RTX_OBJ
5007 with a precedence over other operators and is always put in the first
5008 place. Swap condition and operands to match ficom instruction. */
5009 if (!op0_preserve_value
5010 && GET_CODE (*op0
) == FLOAT
&& MEM_P (XEXP (*op0
, 0)) && REG_P (*op1
))
5012 enum rtx_code scode
= swap_condition ((enum rtx_code
) *code
);
5014 /* We are called only for compares that are split to SAHF instruction.
5015 Ensure that we have setcc/jcc insn for the swapped condition. */
5016 if (ix86_fp_compare_code_to_integer (scode
) != UNKNOWN
)
5018 std::swap (*op0
, *op1
);
5019 *code
= (int) scode
;
5024 /* Save the current options */
5027 ix86_function_specific_save (struct cl_target_option
*ptr
,
5028 struct gcc_options
*opts
)
5030 ptr
->arch
= ix86_arch
;
5031 ptr
->schedule
= ix86_schedule
;
5032 ptr
->prefetch_sse
= x86_prefetch_sse
;
5033 ptr
->tune
= ix86_tune
;
5034 ptr
->branch_cost
= ix86_branch_cost
;
5035 ptr
->tune_defaulted
= ix86_tune_defaulted
;
5036 ptr
->arch_specified
= ix86_arch_specified
;
5037 ptr
->x_ix86_isa_flags_explicit
= opts
->x_ix86_isa_flags_explicit
;
5038 ptr
->x_ix86_isa_flags2_explicit
= opts
->x_ix86_isa_flags2_explicit
;
5039 ptr
->x_recip_mask_explicit
= opts
->x_recip_mask_explicit
;
5040 ptr
->x_ix86_arch_string
= opts
->x_ix86_arch_string
;
5041 ptr
->x_ix86_tune_string
= opts
->x_ix86_tune_string
;
5042 ptr
->x_ix86_cmodel
= opts
->x_ix86_cmodel
;
5043 ptr
->x_ix86_abi
= opts
->x_ix86_abi
;
5044 ptr
->x_ix86_asm_dialect
= opts
->x_ix86_asm_dialect
;
5045 ptr
->x_ix86_branch_cost
= opts
->x_ix86_branch_cost
;
5046 ptr
->x_ix86_dump_tunes
= opts
->x_ix86_dump_tunes
;
5047 ptr
->x_ix86_force_align_arg_pointer
= opts
->x_ix86_force_align_arg_pointer
;
5048 ptr
->x_ix86_force_drap
= opts
->x_ix86_force_drap
;
5049 ptr
->x_ix86_incoming_stack_boundary_arg
= opts
->x_ix86_incoming_stack_boundary_arg
;
5050 ptr
->x_ix86_pmode
= opts
->x_ix86_pmode
;
5051 ptr
->x_ix86_preferred_stack_boundary_arg
= opts
->x_ix86_preferred_stack_boundary_arg
;
5052 ptr
->x_ix86_recip_name
= opts
->x_ix86_recip_name
;
5053 ptr
->x_ix86_regparm
= opts
->x_ix86_regparm
;
5054 ptr
->x_ix86_section_threshold
= opts
->x_ix86_section_threshold
;
5055 ptr
->x_ix86_sse2avx
= opts
->x_ix86_sse2avx
;
5056 ptr
->x_ix86_stack_protector_guard
= opts
->x_ix86_stack_protector_guard
;
5057 ptr
->x_ix86_stringop_alg
= opts
->x_ix86_stringop_alg
;
5058 ptr
->x_ix86_tls_dialect
= opts
->x_ix86_tls_dialect
;
5059 ptr
->x_ix86_tune_ctrl_string
= opts
->x_ix86_tune_ctrl_string
;
5060 ptr
->x_ix86_tune_memcpy_strategy
= opts
->x_ix86_tune_memcpy_strategy
;
5061 ptr
->x_ix86_tune_memset_strategy
= opts
->x_ix86_tune_memset_strategy
;
5062 ptr
->x_ix86_tune_no_default
= opts
->x_ix86_tune_no_default
;
5063 ptr
->x_ix86_veclibabi_type
= opts
->x_ix86_veclibabi_type
;
5065 /* The fields are char but the variables are not; make sure the
5066 values fit in the fields. */
5067 gcc_assert (ptr
->arch
== ix86_arch
);
5068 gcc_assert (ptr
->schedule
== ix86_schedule
);
5069 gcc_assert (ptr
->tune
== ix86_tune
);
5070 gcc_assert (ptr
->branch_cost
== ix86_branch_cost
);
5073 /* Restore the current options */
5076 ix86_function_specific_restore (struct gcc_options
*opts
,
5077 struct cl_target_option
*ptr
)
5079 enum processor_type old_tune
= ix86_tune
;
5080 enum processor_type old_arch
= ix86_arch
;
5081 unsigned int ix86_arch_mask
;
5084 /* We don't change -fPIC. */
5085 opts
->x_flag_pic
= flag_pic
;
5087 ix86_arch
= (enum processor_type
) ptr
->arch
;
5088 ix86_schedule
= (enum attr_cpu
) ptr
->schedule
;
5089 ix86_tune
= (enum processor_type
) ptr
->tune
;
5090 x86_prefetch_sse
= ptr
->prefetch_sse
;
5091 opts
->x_ix86_branch_cost
= ptr
->branch_cost
;
5092 ix86_tune_defaulted
= ptr
->tune_defaulted
;
5093 ix86_arch_specified
= ptr
->arch_specified
;
5094 opts
->x_ix86_isa_flags_explicit
= ptr
->x_ix86_isa_flags_explicit
;
5095 opts
->x_ix86_isa_flags2_explicit
= ptr
->x_ix86_isa_flags2_explicit
;
5096 opts
->x_recip_mask_explicit
= ptr
->x_recip_mask_explicit
;
5097 opts
->x_ix86_arch_string
= ptr
->x_ix86_arch_string
;
5098 opts
->x_ix86_tune_string
= ptr
->x_ix86_tune_string
;
5099 opts
->x_ix86_cmodel
= ptr
->x_ix86_cmodel
;
5100 opts
->x_ix86_abi
= ptr
->x_ix86_abi
;
5101 opts
->x_ix86_asm_dialect
= ptr
->x_ix86_asm_dialect
;
5102 opts
->x_ix86_branch_cost
= ptr
->x_ix86_branch_cost
;
5103 opts
->x_ix86_dump_tunes
= ptr
->x_ix86_dump_tunes
;
5104 opts
->x_ix86_force_align_arg_pointer
= ptr
->x_ix86_force_align_arg_pointer
;
5105 opts
->x_ix86_force_drap
= ptr
->x_ix86_force_drap
;
5106 opts
->x_ix86_incoming_stack_boundary_arg
= ptr
->x_ix86_incoming_stack_boundary_arg
;
5107 opts
->x_ix86_pmode
= ptr
->x_ix86_pmode
;
5108 opts
->x_ix86_preferred_stack_boundary_arg
= ptr
->x_ix86_preferred_stack_boundary_arg
;
5109 opts
->x_ix86_recip_name
= ptr
->x_ix86_recip_name
;
5110 opts
->x_ix86_regparm
= ptr
->x_ix86_regparm
;
5111 opts
->x_ix86_section_threshold
= ptr
->x_ix86_section_threshold
;
5112 opts
->x_ix86_sse2avx
= ptr
->x_ix86_sse2avx
;
5113 opts
->x_ix86_stack_protector_guard
= ptr
->x_ix86_stack_protector_guard
;
5114 opts
->x_ix86_stringop_alg
= ptr
->x_ix86_stringop_alg
;
5115 opts
->x_ix86_tls_dialect
= ptr
->x_ix86_tls_dialect
;
5116 opts
->x_ix86_tune_ctrl_string
= ptr
->x_ix86_tune_ctrl_string
;
5117 opts
->x_ix86_tune_memcpy_strategy
= ptr
->x_ix86_tune_memcpy_strategy
;
5118 opts
->x_ix86_tune_memset_strategy
= ptr
->x_ix86_tune_memset_strategy
;
5119 opts
->x_ix86_tune_no_default
= ptr
->x_ix86_tune_no_default
;
5120 opts
->x_ix86_veclibabi_type
= ptr
->x_ix86_veclibabi_type
;
5121 ix86_tune_cost
= processor_target_table
[ix86_tune
].cost
;
5122 /* TODO: ix86_cost should be chosen at instruction or function granuality
5123 so for cold code we use size_cost even in !optimize_size compilation. */
5124 if (opts
->x_optimize_size
)
5125 ix86_cost
= &ix86_size_cost
;
5127 ix86_cost
= ix86_tune_cost
;
5129 /* Recreate the arch feature tests if the arch changed */
5130 if (old_arch
!= ix86_arch
)
5132 ix86_arch_mask
= 1u << ix86_arch
;
5133 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
5134 ix86_arch_features
[i
]
5135 = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
5138 /* Recreate the tune optimization tests */
5139 if (old_tune
!= ix86_tune
)
5140 set_ix86_tune_features (ix86_tune
, false);
5143 /* Adjust target options after streaming them in. This is mainly about
5144 reconciling them with global options. */
5147 ix86_function_specific_post_stream_in (struct cl_target_option
*ptr
)
5149 /* flag_pic is a global option, but ix86_cmodel is target saved option
5150 partly computed from flag_pic. If flag_pic is on, adjust x_ix86_cmodel
5151 for PIC, or error out. */
5153 switch (ptr
->x_ix86_cmodel
)
5156 ptr
->x_ix86_cmodel
= CM_SMALL_PIC
;
5160 ptr
->x_ix86_cmodel
= CM_MEDIUM_PIC
;
5164 ptr
->x_ix86_cmodel
= CM_LARGE_PIC
;
5168 error ("code model %s does not support PIC mode", "kernel");
5175 switch (ptr
->x_ix86_cmodel
)
5178 ptr
->x_ix86_cmodel
= CM_SMALL
;
5182 ptr
->x_ix86_cmodel
= CM_MEDIUM
;
5186 ptr
->x_ix86_cmodel
= CM_LARGE
;
5194 /* Print the current options */
5197 ix86_function_specific_print (FILE *file
, int indent
,
5198 struct cl_target_option
*ptr
)
5201 = ix86_target_string (ptr
->x_ix86_isa_flags
, ptr
->x_ix86_isa_flags2
,
5202 ptr
->x_target_flags
, ptr
->x_ix86_target_flags
,
5203 NULL
, NULL
, ptr
->x_ix86_fpmath
, false);
5205 gcc_assert (ptr
->arch
< PROCESSOR_max
);
5206 fprintf (file
, "%*sarch = %d (%s)\n",
5208 ptr
->arch
, processor_target_table
[ptr
->arch
].name
);
5210 gcc_assert (ptr
->tune
< PROCESSOR_max
);
5211 fprintf (file
, "%*stune = %d (%s)\n",
5213 ptr
->tune
, processor_target_table
[ptr
->tune
].name
);
5215 fprintf (file
, "%*sbranch_cost = %d\n", indent
, "", ptr
->branch_cost
);
5219 fprintf (file
, "%*s%s\n", indent
, "", target_string
);
5220 free (target_string
);
5225 /* Inner function to process the attribute((target(...))), take an argument and
5226 set the current options from the argument. If we have a list, recursively go
5230 ix86_valid_target_attribute_inner_p (tree args
, char *p_strings
[],
5231 struct gcc_options
*opts
,
5232 struct gcc_options
*opts_set
,
5233 struct gcc_options
*enum_opts_set
)
5238 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
5239 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
5240 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
5241 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
5242 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
5258 enum ix86_opt_type type
;
5263 IX86_ATTR_ISA ("sgx", OPT_msgx
),
5264 IX86_ATTR_ISA ("avx5124fmaps", OPT_mavx5124fmaps
),
5265 IX86_ATTR_ISA ("avx5124vnniw", OPT_mavx5124vnniw
),
5266 IX86_ATTR_ISA ("avx512vpopcntdq", OPT_mavx512vpopcntdq
),
5267 IX86_ATTR_ISA ("avx512vbmi2", OPT_mavx512vbmi2
),
5268 IX86_ATTR_ISA ("avx512vnni", OPT_mavx512vnni
),
5270 IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi
),
5271 IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma
),
5272 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl
),
5273 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw
),
5274 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq
),
5275 IX86_ATTR_ISA ("avx512er", OPT_mavx512er
),
5276 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf
),
5277 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd
),
5278 IX86_ATTR_ISA ("avx512f", OPT_mavx512f
),
5279 IX86_ATTR_ISA ("avx2", OPT_mavx2
),
5280 IX86_ATTR_ISA ("fma", OPT_mfma
),
5281 IX86_ATTR_ISA ("xop", OPT_mxop
),
5282 IX86_ATTR_ISA ("fma4", OPT_mfma4
),
5283 IX86_ATTR_ISA ("f16c", OPT_mf16c
),
5284 IX86_ATTR_ISA ("avx", OPT_mavx
),
5285 IX86_ATTR_ISA ("sse4", OPT_msse4
),
5286 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2
),
5287 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1
),
5288 IX86_ATTR_ISA ("sse4a", OPT_msse4a
),
5289 IX86_ATTR_ISA ("ssse3", OPT_mssse3
),
5290 IX86_ATTR_ISA ("sse3", OPT_msse3
),
5291 IX86_ATTR_ISA ("aes", OPT_maes
),
5292 IX86_ATTR_ISA ("sha", OPT_msha
),
5293 IX86_ATTR_ISA ("pclmul", OPT_mpclmul
),
5294 IX86_ATTR_ISA ("sse2", OPT_msse2
),
5295 IX86_ATTR_ISA ("sse", OPT_msse
),
5296 IX86_ATTR_ISA ("3dnowa", OPT_m3dnowa
),
5297 IX86_ATTR_ISA ("3dnow", OPT_m3dnow
),
5298 IX86_ATTR_ISA ("mmx", OPT_mmmx
),
5299 IX86_ATTR_ISA ("rtm", OPT_mrtm
),
5300 IX86_ATTR_ISA ("prfchw", OPT_mprfchw
),
5301 IX86_ATTR_ISA ("rdseed", OPT_mrdseed
),
5302 IX86_ATTR_ISA ("adx", OPT_madx
),
5303 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1
),
5304 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt
),
5305 IX86_ATTR_ISA ("xsaves", OPT_mxsaves
),
5306 IX86_ATTR_ISA ("xsavec", OPT_mxsavec
),
5307 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt
),
5308 IX86_ATTR_ISA ("xsave", OPT_mxsave
),
5309 IX86_ATTR_ISA ("abm", OPT_mabm
),
5310 IX86_ATTR_ISA ("bmi", OPT_mbmi
),
5311 IX86_ATTR_ISA ("bmi2", OPT_mbmi2
),
5312 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt
),
5313 IX86_ATTR_ISA ("tbm", OPT_mtbm
),
5314 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt
),
5315 IX86_ATTR_ISA ("cx16", OPT_mcx16
),
5316 IX86_ATTR_ISA ("sahf", OPT_msahf
),
5317 IX86_ATTR_ISA ("movbe", OPT_mmovbe
),
5318 IX86_ATTR_ISA ("crc32", OPT_mcrc32
),
5319 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase
),
5320 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd
),
5321 IX86_ATTR_ISA ("mwaitx", OPT_mmwaitx
),
5322 IX86_ATTR_ISA ("clzero", OPT_mclzero
),
5323 IX86_ATTR_ISA ("pku", OPT_mpku
),
5324 IX86_ATTR_ISA ("lwp", OPT_mlwp
),
5325 IX86_ATTR_ISA ("hle", OPT_mhle
),
5326 IX86_ATTR_ISA ("fxsr", OPT_mfxsr
),
5327 IX86_ATTR_ISA ("mpx", OPT_mmpx
),
5328 IX86_ATTR_ISA ("clwb", OPT_mclwb
),
5329 IX86_ATTR_ISA ("rdpid", OPT_mrdpid
),
5330 IX86_ATTR_ISA ("gfni", OPT_mgfni
),
5331 IX86_ATTR_ISA ("ibt", OPT_mibt
),
5332 IX86_ATTR_ISA ("shstk", OPT_mshstk
),
5333 IX86_ATTR_ISA ("vaes", OPT_mvaes
),
5334 IX86_ATTR_ISA ("vpclmulqdq", OPT_mvpclmulqdq
),
5337 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_
),
5339 /* string options */
5340 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH
),
5341 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE
),
5344 IX86_ATTR_YES ("cld",
5348 IX86_ATTR_NO ("fancy-math-387",
5349 OPT_mfancy_math_387
,
5350 MASK_NO_FANCY_MATH_387
),
5352 IX86_ATTR_YES ("ieee-fp",
5356 IX86_ATTR_YES ("inline-all-stringops",
5357 OPT_minline_all_stringops
,
5358 MASK_INLINE_ALL_STRINGOPS
),
5360 IX86_ATTR_YES ("inline-stringops-dynamically",
5361 OPT_minline_stringops_dynamically
,
5362 MASK_INLINE_STRINGOPS_DYNAMICALLY
),
5364 IX86_ATTR_NO ("align-stringops",
5365 OPT_mno_align_stringops
,
5366 MASK_NO_ALIGN_STRINGOPS
),
5368 IX86_ATTR_YES ("recip",
5374 /* If this is a list, recurse to get the options. */
5375 if (TREE_CODE (args
) == TREE_LIST
)
5379 for (; args
; args
= TREE_CHAIN (args
))
5380 if (TREE_VALUE (args
)
5381 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args
),
5382 p_strings
, opts
, opts_set
,
5389 else if (TREE_CODE (args
) != STRING_CST
)
5391 error ("attribute %<target%> argument not a string");
5395 /* Handle multiple arguments separated by commas. */
5396 next_optstr
= ASTRDUP (TREE_STRING_POINTER (args
));
5398 while (next_optstr
&& *next_optstr
!= '\0')
5400 char *p
= next_optstr
;
5402 char *comma
= strchr (next_optstr
, ',');
5403 const char *opt_string
;
5404 size_t len
, opt_len
;
5409 enum ix86_opt_type type
= ix86_opt_unknown
;
5415 len
= comma
- next_optstr
;
5416 next_optstr
= comma
+ 1;
5424 /* Recognize no-xxx. */
5425 if (len
> 3 && p
[0] == 'n' && p
[1] == 'o' && p
[2] == '-')
5434 /* Find the option. */
5437 for (i
= 0; i
< ARRAY_SIZE (attrs
); i
++)
5439 type
= attrs
[i
].type
;
5440 opt_len
= attrs
[i
].len
;
5441 if (ch
== attrs
[i
].string
[0]
5442 && ((type
!= ix86_opt_str
&& type
!= ix86_opt_enum
)
5445 && memcmp (p
, attrs
[i
].string
, opt_len
) == 0)
5448 mask
= attrs
[i
].mask
;
5449 opt_string
= attrs
[i
].string
;
5454 /* Process the option. */
5457 error ("attribute(target(\"%s\")) is unknown", orig_p
);
5461 else if (type
== ix86_opt_isa
)
5463 struct cl_decoded_option decoded
;
5465 generate_option (opt
, NULL
, opt_set_p
, CL_TARGET
, &decoded
);
5466 ix86_handle_option (opts
, opts_set
,
5467 &decoded
, input_location
);
5470 else if (type
== ix86_opt_yes
|| type
== ix86_opt_no
)
5472 if (type
== ix86_opt_no
)
5473 opt_set_p
= !opt_set_p
;
5476 opts
->x_target_flags
|= mask
;
5478 opts
->x_target_flags
&= ~mask
;
5481 else if (type
== ix86_opt_str
)
5485 error ("option(\"%s\") was already specified", opt_string
);
5489 p_strings
[opt
] = xstrdup (p
+ opt_len
);
5492 else if (type
== ix86_opt_enum
)
5497 arg_ok
= opt_enum_arg_to_value (opt
, p
+ opt_len
, &value
, CL_TARGET
);
5499 set_option (opts
, enum_opts_set
, opt
, value
,
5500 p
+ opt_len
, DK_UNSPECIFIED
, input_location
,
5504 error ("attribute(target(\"%s\")) is unknown", orig_p
);
5516 /* Release allocated strings. */
5518 release_options_strings (char **option_strings
)
5520 /* Free up memory allocated to hold the strings */
5521 for (unsigned i
= 0; i
< IX86_FUNCTION_SPECIFIC_MAX
; i
++)
5522 free (option_strings
[i
]);
5525 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
5528 ix86_valid_target_attribute_tree (tree args
,
5529 struct gcc_options
*opts
,
5530 struct gcc_options
*opts_set
)
5532 const char *orig_arch_string
= opts
->x_ix86_arch_string
;
5533 const char *orig_tune_string
= opts
->x_ix86_tune_string
;
5534 enum fpmath_unit orig_fpmath_set
= opts_set
->x_ix86_fpmath
;
5535 int orig_tune_defaulted
= ix86_tune_defaulted
;
5536 int orig_arch_specified
= ix86_arch_specified
;
5537 char *option_strings
[IX86_FUNCTION_SPECIFIC_MAX
] = { NULL
, NULL
};
5539 struct cl_target_option
*def
5540 = TREE_TARGET_OPTION (target_option_default_node
);
5541 struct gcc_options enum_opts_set
;
5543 memset (&enum_opts_set
, 0, sizeof (enum_opts_set
));
5545 /* Process each of the options on the chain. */
5546 if (! ix86_valid_target_attribute_inner_p (args
, option_strings
, opts
,
5547 opts_set
, &enum_opts_set
))
5548 return error_mark_node
;
5550 /* If the changed options are different from the default, rerun
5551 ix86_option_override_internal, and then save the options away.
5552 The string options are attribute options, and will be undone
5553 when we copy the save structure. */
5554 if (opts
->x_ix86_isa_flags
!= def
->x_ix86_isa_flags
5555 || opts
->x_ix86_isa_flags2
!= def
->x_ix86_isa_flags2
5556 || opts
->x_target_flags
!= def
->x_target_flags
5557 || option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
]
5558 || option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
]
5559 || enum_opts_set
.x_ix86_fpmath
)
5561 /* If we are using the default tune= or arch=, undo the string assigned,
5562 and use the default. */
5563 if (option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
])
5565 opts
->x_ix86_arch_string
5566 = ggc_strdup (option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
]);
5568 /* If arch= is set, clear all bits in x_ix86_isa_flags,
5569 except for ISA_64BIT, ABI_64, ABI_X32, and CODE16. */
5570 opts
->x_ix86_isa_flags
&= (OPTION_MASK_ISA_64BIT
5571 | OPTION_MASK_ABI_64
5572 | OPTION_MASK_ABI_X32
5573 | OPTION_MASK_CODE16
);
5574 opts
->x_ix86_isa_flags2
= 0;
5576 else if (!orig_arch_specified
)
5577 opts
->x_ix86_arch_string
= NULL
;
5579 if (option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
])
5580 opts
->x_ix86_tune_string
5581 = ggc_strdup (option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
]);
5582 else if (orig_tune_defaulted
)
5583 opts
->x_ix86_tune_string
= NULL
;
5585 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
5586 if (enum_opts_set
.x_ix86_fpmath
)
5587 opts_set
->x_ix86_fpmath
= (enum fpmath_unit
) 1;
5589 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
5590 bool r
= ix86_option_override_internal (false, opts
, opts_set
);
5593 release_options_strings (option_strings
);
5594 return error_mark_node
;
5597 /* Add any builtin functions with the new isa if any. */
5598 ix86_add_new_builtins (opts
->x_ix86_isa_flags
, opts
->x_ix86_isa_flags2
);
5600 /* Save the current options unless we are validating options for
5602 t
= build_target_option_node (opts
);
5604 opts
->x_ix86_arch_string
= orig_arch_string
;
5605 opts
->x_ix86_tune_string
= orig_tune_string
;
5606 opts_set
->x_ix86_fpmath
= orig_fpmath_set
;
5608 release_options_strings (option_strings
);
5614 /* Hook to validate attribute((target("string"))). */
5617 ix86_valid_target_attribute_p (tree fndecl
,
5618 tree
ARG_UNUSED (name
),
5620 int ARG_UNUSED (flags
))
5622 struct gcc_options func_options
;
5623 tree new_target
, new_optimize
;
5626 /* attribute((target("default"))) does nothing, beyond
5627 affecting multi-versioning. */
5628 if (TREE_VALUE (args
)
5629 && TREE_CODE (TREE_VALUE (args
)) == STRING_CST
5630 && TREE_CHAIN (args
) == NULL_TREE
5631 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args
)), "default") == 0)
5634 tree old_optimize
= build_optimization_node (&global_options
);
5636 /* Get the optimization options of the current function. */
5637 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
5640 func_optimize
= old_optimize
;
5642 /* Init func_options. */
5643 memset (&func_options
, 0, sizeof (func_options
));
5644 init_options_struct (&func_options
, NULL
);
5645 lang_hooks
.init_options_struct (&func_options
);
5647 cl_optimization_restore (&func_options
,
5648 TREE_OPTIMIZATION (func_optimize
));
5650 /* Initialize func_options to the default before its target options can
5652 cl_target_option_restore (&func_options
,
5653 TREE_TARGET_OPTION (target_option_default_node
));
5655 new_target
= ix86_valid_target_attribute_tree (args
, &func_options
,
5656 &global_options_set
);
5658 new_optimize
= build_optimization_node (&func_options
);
5660 if (new_target
== error_mark_node
)
5663 else if (fndecl
&& new_target
)
5665 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
5667 if (old_optimize
!= new_optimize
)
5668 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
5671 finalize_options_struct (&func_options
);
5677 /* Hook to determine if one function can safely inline another. */
5680 ix86_can_inline_p (tree caller
, tree callee
)
5682 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
5683 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
5685 callee_tree
= target_option_default_node
;
5687 caller_tree
= target_option_default_node
;
5688 if (callee_tree
== caller_tree
)
5691 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
5692 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
5695 /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
5696 function can inline a SSE2 function but a SSE2 function can't inline
5698 if (((caller_opts
->x_ix86_isa_flags
& callee_opts
->x_ix86_isa_flags
)
5699 != callee_opts
->x_ix86_isa_flags
)
5700 || ((caller_opts
->x_ix86_isa_flags2
& callee_opts
->x_ix86_isa_flags2
)
5701 != callee_opts
->x_ix86_isa_flags2
))
5704 /* See if we have the same non-isa options. */
5705 else if (caller_opts
->x_target_flags
!= callee_opts
->x_target_flags
)
5708 /* See if arch, tune, etc. are the same. */
5709 else if (caller_opts
->arch
!= callee_opts
->arch
)
5712 else if (caller_opts
->tune
!= callee_opts
->tune
)
5715 else if (caller_opts
->x_ix86_fpmath
!= callee_opts
->x_ix86_fpmath
5716 /* If the calle doesn't use FP expressions differences in
5717 ix86_fpmath can be ignored. We are called from FEs
5718 for multi-versioning call optimization, so beware of
5719 ipa_fn_summaries not available. */
5720 && (! ipa_fn_summaries
5721 || ipa_fn_summaries
->get
5722 (cgraph_node::get (callee
))->fp_expressions
))
5725 else if (caller_opts
->branch_cost
!= callee_opts
->branch_cost
)
5735 /* Remember the last target of ix86_set_current_function. */
5736 static GTY(()) tree ix86_previous_fndecl
;
5738 /* Set targets globals to the default (or current #pragma GCC target
5739 if active). Invalidate ix86_previous_fndecl cache. */
5742 ix86_reset_previous_fndecl (void)
5744 tree new_tree
= target_option_current_node
;
5745 cl_target_option_restore (&global_options
, TREE_TARGET_OPTION (new_tree
));
5746 if (TREE_TARGET_GLOBALS (new_tree
))
5747 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
5748 else if (new_tree
== target_option_default_node
)
5749 restore_target_globals (&default_target_globals
);
5751 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
5752 ix86_previous_fndecl
= NULL_TREE
;
5755 /* Set the func_type field from the function FNDECL. */
5758 ix86_set_func_type (tree fndecl
)
5760 if (cfun
->machine
->func_type
== TYPE_UNKNOWN
)
5762 if (lookup_attribute ("interrupt",
5763 TYPE_ATTRIBUTES (TREE_TYPE (fndecl
))))
5765 if (ix86_function_naked (fndecl
))
5766 error_at (DECL_SOURCE_LOCATION (fndecl
),
5767 "interrupt and naked attributes are not compatible");
5770 for (tree arg
= DECL_ARGUMENTS (fndecl
);
5772 arg
= TREE_CHAIN (arg
))
5774 cfun
->machine
->no_caller_saved_registers
= true;
5775 cfun
->machine
->func_type
5776 = nargs
== 2 ? TYPE_EXCEPTION
: TYPE_INTERRUPT
;
5778 ix86_optimize_mode_switching
[X86_DIRFLAG
] = 1;
5780 /* Only dwarf2out.c can handle -WORD(AP) as a pointer argument. */
5781 if (write_symbols
!= NO_DEBUG
&& write_symbols
!= DWARF2_DEBUG
)
5782 sorry ("Only DWARF debug format is supported for interrupt "
5783 "service routine.");
5787 cfun
->machine
->func_type
= TYPE_NORMAL
;
5788 if (lookup_attribute ("no_caller_saved_registers",
5789 TYPE_ATTRIBUTES (TREE_TYPE (fndecl
))))
5790 cfun
->machine
->no_caller_saved_registers
= true;
5795 /* Establish appropriate back-end context for processing the function
5796 FNDECL. The argument might be NULL to indicate processing at top
5797 level, outside of any function scope. */
5799 ix86_set_current_function (tree fndecl
)
5801 /* Only change the context if the function changes. This hook is called
5802 several times in the course of compiling a function, and we don't want to
5803 slow things down too much or call target_reinit when it isn't safe. */
5804 if (fndecl
== ix86_previous_fndecl
)
5806 /* There may be 2 function bodies for the same function FNDECL,
5807 one is extern inline and one isn't. Call ix86_set_func_type
5808 to set the func_type field. */
5809 if (fndecl
!= NULL_TREE
)
5810 ix86_set_func_type (fndecl
);
5815 if (ix86_previous_fndecl
== NULL_TREE
)
5816 old_tree
= target_option_current_node
;
5817 else if (DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl
))
5818 old_tree
= DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl
);
5820 old_tree
= target_option_default_node
;
5822 if (fndecl
== NULL_TREE
)
5824 if (old_tree
!= target_option_current_node
)
5825 ix86_reset_previous_fndecl ();
5829 ix86_set_func_type (fndecl
);
5831 tree new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
5832 if (new_tree
== NULL_TREE
)
5833 new_tree
= target_option_default_node
;
5835 if (old_tree
!= new_tree
)
5837 cl_target_option_restore (&global_options
, TREE_TARGET_OPTION (new_tree
));
5838 if (TREE_TARGET_GLOBALS (new_tree
))
5839 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
5840 else if (new_tree
== target_option_default_node
)
5841 restore_target_globals (&default_target_globals
);
5843 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
5845 ix86_previous_fndecl
= fndecl
;
5847 static bool prev_no_caller_saved_registers
;
5849 /* 64-bit MS and SYSV ABI have different set of call used registers.
5850 Avoid expensive re-initialization of init_regs each time we switch
5851 function context. */
5853 && (call_used_regs
[SI_REG
]
5854 == (cfun
->machine
->call_abi
== MS_ABI
)))
5856 /* Need to re-initialize init_regs if caller-saved registers are
5858 else if (prev_no_caller_saved_registers
5859 != cfun
->machine
->no_caller_saved_registers
)
5862 if (cfun
->machine
->func_type
!= TYPE_NORMAL
5863 || cfun
->machine
->no_caller_saved_registers
)
5865 /* Don't allow MPX, SSE, MMX nor x87 instructions since they
5866 may change processor state. */
5870 else if (TARGET_SSE
)
5872 else if (TARGET_MMX
)
5874 else if (TARGET_80387
)
5880 if (cfun
->machine
->func_type
!= TYPE_NORMAL
)
5881 sorry ("%s instructions aren't allowed in %s service routine",
5882 isa
, (cfun
->machine
->func_type
== TYPE_EXCEPTION
5883 ? "exception" : "interrupt"));
5885 sorry ("%s instructions aren't allowed in function with "
5886 "no_caller_saved_registers attribute", isa
);
5887 /* Don't issue the same error twice. */
5888 cfun
->machine
->func_type
= TYPE_NORMAL
;
5889 cfun
->machine
->no_caller_saved_registers
= false;
5893 prev_no_caller_saved_registers
5894 = cfun
->machine
->no_caller_saved_registers
;
5898 /* Return true if this goes in large data/bss. */
5901 ix86_in_large_data_p (tree exp
)
5903 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
5906 if (exp
== NULL_TREE
)
5909 /* Functions are never large data. */
5910 if (TREE_CODE (exp
) == FUNCTION_DECL
)
5913 /* Automatic variables are never large data. */
5914 if (VAR_P (exp
) && !is_global_var (exp
))
5917 if (VAR_P (exp
) && DECL_SECTION_NAME (exp
))
5919 const char *section
= DECL_SECTION_NAME (exp
);
5920 if (strcmp (section
, ".ldata") == 0
5921 || strcmp (section
, ".lbss") == 0)
5927 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
5929 /* If this is an incomplete type with size 0, then we can't put it
5930 in data because it might be too big when completed. Also,
5931 int_size_in_bytes returns -1 if size can vary or is larger than
5932 an integer in which case also it is safer to assume that it goes in
5934 if (size
<= 0 || size
> ix86_section_threshold
)
5941 /* i386-specific section flag to mark large sections. */
5942 #define SECTION_LARGE SECTION_MACH_DEP
5944 /* Switch to the appropriate section for output of DECL.
5945 DECL is either a `VAR_DECL' node or a constant of some sort.
5946 RELOC indicates whether forming the initial value of DECL requires
5947 link-time relocations. */
5949 ATTRIBUTE_UNUSED
static section
*
5950 x86_64_elf_select_section (tree decl
, int reloc
,
5951 unsigned HOST_WIDE_INT align
)
5953 if (ix86_in_large_data_p (decl
))
5955 const char *sname
= NULL
;
5956 unsigned int flags
= SECTION_WRITE
| SECTION_LARGE
;
5957 switch (categorize_decl_for_section (decl
, reloc
))
5962 case SECCAT_DATA_REL
:
5963 sname
= ".ldata.rel";
5965 case SECCAT_DATA_REL_LOCAL
:
5966 sname
= ".ldata.rel.local";
5968 case SECCAT_DATA_REL_RO
:
5969 sname
= ".ldata.rel.ro";
5971 case SECCAT_DATA_REL_RO_LOCAL
:
5972 sname
= ".ldata.rel.ro.local";
5976 flags
|= SECTION_BSS
;
5979 case SECCAT_RODATA_MERGE_STR
:
5980 case SECCAT_RODATA_MERGE_STR_INIT
:
5981 case SECCAT_RODATA_MERGE_CONST
:
5983 flags
&= ~SECTION_WRITE
;
5985 case SECCAT_SRODATA
:
5992 /* We don't split these for medium model. Place them into
5993 default sections and hope for best. */
5998 /* We might get called with string constants, but get_named_section
5999 doesn't like them as they are not DECLs. Also, we need to set
6000 flags in that case. */
6002 return get_section (sname
, flags
, NULL
);
6003 return get_named_section (decl
, sname
, reloc
);
6006 return default_elf_select_section (decl
, reloc
, align
);
6009 /* Select a set of attributes for section NAME based on the properties
6010 of DECL and whether or not RELOC indicates that DECL's initializer
6011 might contain runtime relocations. */
6013 static unsigned int ATTRIBUTE_UNUSED
6014 x86_64_elf_section_type_flags (tree decl
, const char *name
, int reloc
)
6016 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
6018 if (ix86_in_large_data_p (decl
))
6019 flags
|= SECTION_LARGE
;
6021 if (decl
== NULL_TREE
6022 && (strcmp (name
, ".ldata.rel.ro") == 0
6023 || strcmp (name
, ".ldata.rel.ro.local") == 0))
6024 flags
|= SECTION_RELRO
;
6026 if (strcmp (name
, ".lbss") == 0
6027 || strncmp (name
, ".lbss.", 5) == 0
6028 || strncmp (name
, ".gnu.linkonce.lb.", 16) == 0)
6029 flags
|= SECTION_BSS
;
6034 /* Build up a unique section name, expressed as a
6035 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
6036 RELOC indicates whether the initial value of EXP requires
6037 link-time relocations. */
6039 static void ATTRIBUTE_UNUSED
6040 x86_64_elf_unique_section (tree decl
, int reloc
)
6042 if (ix86_in_large_data_p (decl
))
6044 const char *prefix
= NULL
;
6045 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
6046 bool one_only
= DECL_COMDAT_GROUP (decl
) && !HAVE_COMDAT_GROUP
;
6048 switch (categorize_decl_for_section (decl
, reloc
))
6051 case SECCAT_DATA_REL
:
6052 case SECCAT_DATA_REL_LOCAL
:
6053 case SECCAT_DATA_REL_RO
:
6054 case SECCAT_DATA_REL_RO_LOCAL
:
6055 prefix
= one_only
? ".ld" : ".ldata";
6058 prefix
= one_only
? ".lb" : ".lbss";
6061 case SECCAT_RODATA_MERGE_STR
:
6062 case SECCAT_RODATA_MERGE_STR_INIT
:
6063 case SECCAT_RODATA_MERGE_CONST
:
6064 prefix
= one_only
? ".lr" : ".lrodata";
6066 case SECCAT_SRODATA
:
6073 /* We don't split these for medium model. Place them into
6074 default sections and hope for best. */
6079 const char *name
, *linkonce
;
6082 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
6083 name
= targetm
.strip_name_encoding (name
);
6085 /* If we're using one_only, then there needs to be a .gnu.linkonce
6086 prefix to the section name. */
6087 linkonce
= one_only
? ".gnu.linkonce" : "";
6089 string
= ACONCAT ((linkonce
, prefix
, ".", name
, NULL
));
6091 set_decl_section_name (decl
, string
);
6095 default_unique_section (decl
, reloc
);
6098 #ifdef COMMON_ASM_OP
6100 #ifndef LARGECOMM_SECTION_ASM_OP
6101 #define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t"
6104 /* This says how to output assembler code to declare an
6105 uninitialized external linkage data object.
6107 For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for
6110 x86_elf_aligned_decl_common (FILE *file
, tree decl
,
6111 const char *name
, unsigned HOST_WIDE_INT size
,
6114 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
6115 && size
> (unsigned int)ix86_section_threshold
)
6117 switch_to_section (get_named_section (decl
, ".lbss", 0));
6118 fputs (LARGECOMM_SECTION_ASM_OP
, file
);
6121 fputs (COMMON_ASM_OP
, file
);
6122 assemble_name (file
, name
);
6123 fprintf (file
, "," HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
6124 size
, align
/ BITS_PER_UNIT
);
6128 /* Utility function for targets to use in implementing
6129 ASM_OUTPUT_ALIGNED_BSS. */
6132 x86_output_aligned_bss (FILE *file
, tree decl
, const char *name
,
6133 unsigned HOST_WIDE_INT size
, int align
)
6135 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
6136 && size
> (unsigned int)ix86_section_threshold
)
6137 switch_to_section (get_named_section (decl
, ".lbss", 0));
6139 switch_to_section (bss_section
);
6140 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
6141 #ifdef ASM_DECLARE_OBJECT_NAME
6142 last_assemble_variable_decl
= decl
;
6143 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
6145 /* Standard thing is just output label for the object. */
6146 ASM_OUTPUT_LABEL (file
, name
);
6147 #endif /* ASM_DECLARE_OBJECT_NAME */
6148 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
6151 /* Decide whether we must probe the stack before any space allocation
6152 on this target. It's essentially TARGET_STACK_PROBE except when
6153 -fstack-check causes the stack to be already probed differently. */
6156 ix86_target_stack_probe (void)
6158 /* Do not probe the stack twice if static stack checking is enabled. */
6159 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
6162 return TARGET_STACK_PROBE
;
6165 /* Decide whether we can make a sibling call to a function. DECL is the
6166 declaration of the function being targeted by the call and EXP is the
6167 CALL_EXPR representing the call. */
6170 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
6172 tree type
, decl_or_type
;
6174 bool bind_global
= decl
&& !targetm
.binds_local_p (decl
);
6176 if (ix86_function_naked (current_function_decl
))
6179 /* Sibling call isn't OK if there are no caller-saved registers
6180 since all registers must be preserved before return. */
6181 if (cfun
->machine
->no_caller_saved_registers
)
6184 /* If we are generating position-independent code, we cannot sibcall
6185 optimize direct calls to global functions, as the PLT requires
6186 %ebx be live. (Darwin does not have a PLT.) */
6194 /* If we need to align the outgoing stack, then sibcalling would
6195 unalign the stack, which may break the called function. */
6196 if (ix86_minimum_incoming_stack_boundary (true)
6197 < PREFERRED_STACK_BOUNDARY
)
6202 decl_or_type
= decl
;
6203 type
= TREE_TYPE (decl
);
6207 /* We're looking at the CALL_EXPR, we need the type of the function. */
6208 type
= CALL_EXPR_FN (exp
); /* pointer expression */
6209 type
= TREE_TYPE (type
); /* pointer type */
6210 type
= TREE_TYPE (type
); /* function type */
6211 decl_or_type
= type
;
6214 /* Check that the return value locations are the same. Like
6215 if we are returning floats on the 80387 register stack, we cannot
6216 make a sibcall from a function that doesn't return a float to a
6217 function that does or, conversely, from a function that does return
6218 a float to a function that doesn't; the necessary stack adjustment
6219 would not be executed. This is also the place we notice
6220 differences in the return value ABI. Note that it is ok for one
6221 of the functions to have void return type as long as the return
6222 value of the other is passed in a register. */
6223 a
= ix86_function_value (TREE_TYPE (exp
), decl_or_type
, false);
6224 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
6226 if (STACK_REG_P (a
) || STACK_REG_P (b
))
6228 if (!rtx_equal_p (a
, b
))
6231 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
6233 else if (!rtx_equal_p (a
, b
))
6238 /* The SYSV ABI has more call-clobbered registers;
6239 disallow sibcalls from MS to SYSV. */
6240 if (cfun
->machine
->call_abi
== MS_ABI
6241 && ix86_function_type_abi (type
) == SYSV_ABI
)
6246 /* If this call is indirect, we'll need to be able to use a
6247 call-clobbered register for the address of the target function.
6248 Make sure that all such registers are not used for passing
6249 parameters. Note that DLLIMPORT functions and call to global
6250 function via GOT slot are indirect. */
6252 || (bind_global
&& flag_pic
&& !flag_plt
)
6253 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& DECL_DLLIMPORT_P (decl
)))
6255 /* Check if regparm >= 3 since arg_reg_available is set to
6256 false if regparm == 0. If regparm is 1 or 2, there is
6257 always a call-clobbered register available.
6259 ??? The symbol indirect call doesn't need a call-clobbered
6260 register. But we don't know if this is a symbol indirect
6261 call or not here. */
6262 if (ix86_function_regparm (type
, NULL
) >= 3
6263 && !cfun
->machine
->arg_reg_available
)
6268 /* Otherwise okay. That also includes certain types of indirect calls. */
6272 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
6273 and "sseregparm" calling convention attributes;
6274 arguments as in struct attribute_spec.handler. */
6277 ix86_handle_cconv_attribute (tree
*node
, tree name
, tree args
, int,
6280 if (TREE_CODE (*node
) != FUNCTION_TYPE
6281 && TREE_CODE (*node
) != METHOD_TYPE
6282 && TREE_CODE (*node
) != FIELD_DECL
6283 && TREE_CODE (*node
) != TYPE_DECL
)
6285 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6287 *no_add_attrs
= true;
6291 /* Can combine regparm with all attributes but fastcall, and thiscall. */
6292 if (is_attribute_p ("regparm", name
))
6296 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
6298 error ("fastcall and regparm attributes are not compatible");
6301 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
6303 error ("regparam and thiscall attributes are not compatible");
6306 cst
= TREE_VALUE (args
);
6307 if (TREE_CODE (cst
) != INTEGER_CST
)
6309 warning (OPT_Wattributes
,
6310 "%qE attribute requires an integer constant argument",
6312 *no_add_attrs
= true;
6314 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
6316 warning (OPT_Wattributes
, "argument to %qE attribute larger than %d",
6318 *no_add_attrs
= true;
6326 /* Do not warn when emulating the MS ABI. */
6327 if ((TREE_CODE (*node
) != FUNCTION_TYPE
6328 && TREE_CODE (*node
) != METHOD_TYPE
)
6329 || ix86_function_type_abi (*node
) != MS_ABI
)
6330 warning (OPT_Wattributes
, "%qE attribute ignored",
6332 *no_add_attrs
= true;
6336 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
6337 if (is_attribute_p ("fastcall", name
))
6339 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
6341 error ("fastcall and cdecl attributes are not compatible");
6343 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
6345 error ("fastcall and stdcall attributes are not compatible");
6347 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
6349 error ("fastcall and regparm attributes are not compatible");
6351 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
6353 error ("fastcall and thiscall attributes are not compatible");
6357 /* Can combine stdcall with fastcall (redundant), regparm and
6359 else if (is_attribute_p ("stdcall", name
))
6361 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
6363 error ("stdcall and cdecl attributes are not compatible");
6365 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
6367 error ("stdcall and fastcall attributes are not compatible");
6369 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
6371 error ("stdcall and thiscall attributes are not compatible");
6375 /* Can combine cdecl with regparm and sseregparm. */
6376 else if (is_attribute_p ("cdecl", name
))
6378 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
6380 error ("stdcall and cdecl attributes are not compatible");
6382 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
6384 error ("fastcall and cdecl attributes are not compatible");
6386 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
6388 error ("cdecl and thiscall attributes are not compatible");
6391 else if (is_attribute_p ("thiscall", name
))
6393 if (TREE_CODE (*node
) != METHOD_TYPE
&& pedantic
)
6394 warning (OPT_Wattributes
, "%qE attribute is used for non-class method",
6396 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
6398 error ("stdcall and thiscall attributes are not compatible");
6400 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
6402 error ("fastcall and thiscall attributes are not compatible");
6404 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
6406 error ("cdecl and thiscall attributes are not compatible");
6410 /* Can combine sseregparm with all attributes. */
6415 /* The transactional memory builtins are implicitly regparm or fastcall
6416 depending on the ABI. Override the generic do-nothing attribute that
6417 these builtins were declared with, and replace it with one of the two
6418 attributes that we expect elsewhere. */
6421 ix86_handle_tm_regparm_attribute (tree
*node
, tree
, tree
,
6422 int flags
, bool *no_add_attrs
)
6426 /* In no case do we want to add the placeholder attribute. */
6427 *no_add_attrs
= true;
6429 /* The 64-bit ABI is unchanged for transactional memory. */
6433 /* ??? Is there a better way to validate 32-bit windows? We have
6434 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
6435 if (CHECK_STACK_LIMIT
> 0)
6436 alt
= tree_cons (get_identifier ("fastcall"), NULL
, NULL
);
6439 alt
= tree_cons (NULL
, build_int_cst (NULL
, 2), NULL
);
6440 alt
= tree_cons (get_identifier ("regparm"), alt
, NULL
);
6442 decl_attributes (node
, alt
, flags
);
6447 /* This function determines from TYPE the calling-convention. */
6450 ix86_get_callcvt (const_tree type
)
6452 unsigned int ret
= 0;
6457 return IX86_CALLCVT_CDECL
;
6459 attrs
= TYPE_ATTRIBUTES (type
);
6460 if (attrs
!= NULL_TREE
)
6462 if (lookup_attribute ("cdecl", attrs
))
6463 ret
|= IX86_CALLCVT_CDECL
;
6464 else if (lookup_attribute ("stdcall", attrs
))
6465 ret
|= IX86_CALLCVT_STDCALL
;
6466 else if (lookup_attribute ("fastcall", attrs
))
6467 ret
|= IX86_CALLCVT_FASTCALL
;
6468 else if (lookup_attribute ("thiscall", attrs
))
6469 ret
|= IX86_CALLCVT_THISCALL
;
6471 /* Regparam isn't allowed for thiscall and fastcall. */
6472 if ((ret
& (IX86_CALLCVT_THISCALL
| IX86_CALLCVT_FASTCALL
)) == 0)
6474 if (lookup_attribute ("regparm", attrs
))
6475 ret
|= IX86_CALLCVT_REGPARM
;
6476 if (lookup_attribute ("sseregparm", attrs
))
6477 ret
|= IX86_CALLCVT_SSEREGPARM
;
6480 if (IX86_BASE_CALLCVT(ret
) != 0)
6484 is_stdarg
= stdarg_p (type
);
6485 if (TARGET_RTD
&& !is_stdarg
)
6486 return IX86_CALLCVT_STDCALL
| ret
;
6490 || TREE_CODE (type
) != METHOD_TYPE
6491 || ix86_function_type_abi (type
) != MS_ABI
)
6492 return IX86_CALLCVT_CDECL
| ret
;
6494 return IX86_CALLCVT_THISCALL
;
6497 /* Return 0 if the attributes for two types are incompatible, 1 if they
6498 are compatible, and 2 if they are nearly compatible (which causes a
6499 warning to be generated). */
6502 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
6504 unsigned int ccvt1
, ccvt2
;
6506 if (TREE_CODE (type1
) != FUNCTION_TYPE
6507 && TREE_CODE (type1
) != METHOD_TYPE
)
6510 ccvt1
= ix86_get_callcvt (type1
);
6511 ccvt2
= ix86_get_callcvt (type2
);
6514 if (ix86_function_regparm (type1
, NULL
)
6515 != ix86_function_regparm (type2
, NULL
))
6521 /* Return the regparm value for a function with the indicated TYPE and DECL.
6522 DECL may be NULL when calling function indirectly
6523 or considering a libcall. */
6526 ix86_function_regparm (const_tree type
, const_tree decl
)
6533 return (ix86_function_type_abi (type
) == SYSV_ABI
6534 ? X86_64_REGPARM_MAX
: X86_64_MS_REGPARM_MAX
);
6535 ccvt
= ix86_get_callcvt (type
);
6536 regparm
= ix86_regparm
;
6538 if ((ccvt
& IX86_CALLCVT_REGPARM
) != 0)
6540 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
6543 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
6547 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
6549 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
6552 /* Use register calling convention for local functions when possible. */
6554 && TREE_CODE (decl
) == FUNCTION_DECL
)
6556 cgraph_node
*target
= cgraph_node::get (decl
);
6558 target
= target
->function_symbol ();
6560 /* Caller and callee must agree on the calling convention, so
6561 checking here just optimize means that with
6562 __attribute__((optimize (...))) caller could use regparm convention
6563 and callee not, or vice versa. Instead look at whether the callee
6564 is optimized or not. */
6565 if (target
&& opt_for_fn (target
->decl
, optimize
)
6566 && !(profile_flag
&& !flag_fentry
))
6568 cgraph_local_info
*i
= &target
->local
;
6569 if (i
&& i
->local
&& i
->can_change_signature
)
6571 int local_regparm
, globals
= 0, regno
;
6573 /* Make sure no regparm register is taken by a
6574 fixed register variable. */
6575 for (local_regparm
= 0; local_regparm
< REGPARM_MAX
;
6577 if (fixed_regs
[local_regparm
])
6580 /* We don't want to use regparm(3) for nested functions as
6581 these use a static chain pointer in the third argument. */
6582 if (local_regparm
== 3 && DECL_STATIC_CHAIN (target
->decl
))
6585 /* Save a register for the split stack. */
6586 if (flag_split_stack
)
6588 if (local_regparm
== 3)
6590 else if (local_regparm
== 2
6591 && DECL_STATIC_CHAIN (target
->decl
))
6595 /* Each fixed register usage increases register pressure,
6596 so less registers should be used for argument passing.
6597 This functionality can be overriden by an explicit
6599 for (regno
= AX_REG
; regno
<= DI_REG
; regno
++)
6600 if (fixed_regs
[regno
])
6604 = globals
< local_regparm
? local_regparm
- globals
: 0;
6606 if (local_regparm
> regparm
)
6607 regparm
= local_regparm
;
6615 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
6616 DFmode (2) arguments in SSE registers for a function with the
6617 indicated TYPE and DECL. DECL may be NULL when calling function
6618 indirectly or considering a libcall. Return -1 if any FP parameter
6619 should be rejected by error. This is used in siutation we imply SSE
6620 calling convetion but the function is called from another function with
6621 SSE disabled. Otherwise return 0. */
6624 ix86_function_sseregparm (const_tree type
, const_tree decl
, bool warn
)
6626 gcc_assert (!TARGET_64BIT
);
6628 /* Use SSE registers to pass SFmode and DFmode arguments if requested
6629 by the sseregparm attribute. */
6630 if (TARGET_SSEREGPARM
6631 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
6638 error ("calling %qD with attribute sseregparm without "
6639 "SSE/SSE2 enabled", decl
);
6641 error ("calling %qT with attribute sseregparm without "
6642 "SSE/SSE2 enabled", type
);
6653 cgraph_node
*target
= cgraph_node::get (decl
);
6655 target
= target
->function_symbol ();
6657 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
6658 (and DFmode for SSE2) arguments in SSE registers. */
6660 /* TARGET_SSE_MATH */
6661 && (target_opts_for_fn (target
->decl
)->x_ix86_fpmath
& FPMATH_SSE
)
6662 && opt_for_fn (target
->decl
, optimize
)
6663 && !(profile_flag
&& !flag_fentry
))
6665 cgraph_local_info
*i
= &target
->local
;
6666 if (i
&& i
->local
&& i
->can_change_signature
)
6668 /* Refuse to produce wrong code when local function with SSE enabled
6669 is called from SSE disabled function.
6670 FIXME: We need a way to detect these cases cross-ltrans partition
6671 and avoid using SSE calling conventions on local functions called
6672 from function with SSE disabled. For now at least delay the
6673 warning until we know we are going to produce wrong code.
6675 if (!TARGET_SSE
&& warn
)
6677 return TARGET_SSE2_P (target_opts_for_fn (target
->decl
)
6678 ->x_ix86_isa_flags
) ? 2 : 1;
6685 /* Return true if EAX is live at the start of the function. Used by
6686 ix86_expand_prologue to determine if we need special help before
6687 calling allocate_stack_worker. */
6690 ix86_eax_live_at_start_p (void)
6692 /* Cheat. Don't bother working forward from ix86_function_regparm
6693 to the function type to whether an actual argument is located in
6694 eax. Instead just look at cfg info, which is still close enough
6695 to correct at this point. This gives false positives for broken
6696 functions that might use uninitialized data that happens to be
6697 allocated in eax, but who cares? */
6698 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 0);
6702 ix86_keep_aggregate_return_pointer (tree fntype
)
6708 attr
= lookup_attribute ("callee_pop_aggregate_return",
6709 TYPE_ATTRIBUTES (fntype
));
6711 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
))) == 0);
6713 /* For 32-bit MS-ABI the default is to keep aggregate
6715 if (ix86_function_type_abi (fntype
) == MS_ABI
)
6718 return KEEP_AGGREGATE_RETURN_POINTER
!= 0;
6721 /* Value is the number of bytes of arguments automatically
6722 popped when returning from a subroutine call.
6723 FUNDECL is the declaration node of the function (as a tree),
6724 FUNTYPE is the data type of the function (as a tree),
6725 or for a library call it is an identifier node for the subroutine name.
6726 SIZE is the number of bytes of arguments passed on the stack.
6728 On the 80386, the RTD insn may be used to pop them if the number
6729 of args is fixed, but if the number is variable then the caller
6730 must pop them all. RTD can't be used for library calls now
6731 because the library is compiled with the Unix compiler.
6732 Use of RTD is a selectable option, since it is incompatible with
6733 standard Unix calling sequences. If the option is not selected,
6734 the caller must always pop the args.
6736 The attribute stdcall is equivalent to RTD on a per module basis. */
6739 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
6743 /* None of the 64-bit ABIs pop arguments. */
6747 ccvt
= ix86_get_callcvt (funtype
);
6749 if ((ccvt
& (IX86_CALLCVT_STDCALL
| IX86_CALLCVT_FASTCALL
6750 | IX86_CALLCVT_THISCALL
)) != 0
6751 && ! stdarg_p (funtype
))
6754 /* Lose any fake structure return argument if it is passed on the stack. */
6755 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
6756 && !ix86_keep_aggregate_return_pointer (funtype
))
6758 int nregs
= ix86_function_regparm (funtype
, fundecl
);
6760 return GET_MODE_SIZE (Pmode
);
6766 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
6769 ix86_legitimate_combined_insn (rtx_insn
*insn
)
6773 /* Check operand constraints in case hard registers were propagated
6774 into insn pattern. This check prevents combine pass from
6775 generating insn patterns with invalid hard register operands.
6776 These invalid insns can eventually confuse reload to error out
6777 with a spill failure. See also PRs 46829 and 46843. */
6779 gcc_assert (INSN_CODE (insn
) >= 0);
6781 extract_insn (insn
);
6782 preprocess_constraints (insn
);
6784 int n_operands
= recog_data
.n_operands
;
6785 int n_alternatives
= recog_data
.n_alternatives
;
6786 for (i
= 0; i
< n_operands
; i
++)
6788 rtx op
= recog_data
.operand
[i
];
6789 machine_mode mode
= GET_MODE (op
);
6790 const operand_alternative
*op_alt
;
6795 /* A unary operator may be accepted by the predicate, but it
6796 is irrelevant for matching constraints. */
6802 if (REG_P (SUBREG_REG (op
))
6803 && REGNO (SUBREG_REG (op
)) < FIRST_PSEUDO_REGISTER
)
6804 offset
= subreg_regno_offset (REGNO (SUBREG_REG (op
)),
6805 GET_MODE (SUBREG_REG (op
)),
6808 op
= SUBREG_REG (op
);
6811 if (!(REG_P (op
) && HARD_REGISTER_P (op
)))
6814 op_alt
= recog_op_alt
;
6816 /* Operand has no constraints, anything is OK. */
6817 win
= !n_alternatives
;
6819 alternative_mask preferred
= get_preferred_alternatives (insn
);
6820 for (j
= 0; j
< n_alternatives
; j
++, op_alt
+= n_operands
)
6822 if (!TEST_BIT (preferred
, j
))
6824 if (op_alt
[i
].anything_ok
6825 || (op_alt
[i
].matches
!= -1
6827 (recog_data
.operand
[i
],
6828 recog_data
.operand
[op_alt
[i
].matches
]))
6829 || reg_fits_class_p (op
, op_alt
[i
].cl
, offset
, mode
))
6843 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
6845 static unsigned HOST_WIDE_INT
6846 ix86_asan_shadow_offset (void)
6848 return TARGET_LP64
? (TARGET_MACHO
? (HOST_WIDE_INT_1
<< 44)
6849 : HOST_WIDE_INT_C (0x7fff8000))
6850 : (HOST_WIDE_INT_1
<< 29);
6853 /* Argument support functions. */
6855 /* Return true when register may be used to pass function parameters. */
6857 ix86_function_arg_regno_p (int regno
)
6860 enum calling_abi call_abi
;
6861 const int *parm_regs
;
6863 if (TARGET_MPX
&& BND_REGNO_P (regno
))
6869 return (regno
< REGPARM_MAX
6870 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
6872 return (regno
< REGPARM_MAX
6873 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
6874 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
6875 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
6876 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
6879 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
6880 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
6883 /* TODO: The function should depend on current function ABI but
6884 builtins.c would need updating then. Therefore we use the
6886 call_abi
= ix86_cfun_abi ();
6888 /* RAX is used as hidden argument to va_arg functions. */
6889 if (call_abi
== SYSV_ABI
&& regno
== AX_REG
)
6892 if (call_abi
== MS_ABI
)
6893 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
6895 parm_regs
= x86_64_int_parameter_registers
;
6897 for (i
= 0; i
< (call_abi
== MS_ABI
6898 ? X86_64_MS_REGPARM_MAX
: X86_64_REGPARM_MAX
); i
++)
6899 if (regno
== parm_regs
[i
])
6904 /* Return if we do not know how to pass TYPE solely in registers. */
6907 ix86_must_pass_in_stack (machine_mode mode
, const_tree type
)
6909 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
6912 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
6913 The layout_type routine is crafty and tries to trick us into passing
6914 currently unsupported vector types on the stack by using TImode. */
6915 return (!TARGET_64BIT
&& mode
== TImode
6916 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
6919 /* It returns the size, in bytes, of the area reserved for arguments passed
6920 in registers for the function represented by fndecl dependent to the used
6923 ix86_reg_parm_stack_space (const_tree fndecl
)
6925 enum calling_abi call_abi
= SYSV_ABI
;
6926 if (fndecl
!= NULL_TREE
&& TREE_CODE (fndecl
) == FUNCTION_DECL
)
6927 call_abi
= ix86_function_abi (fndecl
);
6929 call_abi
= ix86_function_type_abi (fndecl
);
6930 if (TARGET_64BIT
&& call_abi
== MS_ABI
)
6935 /* We add this as a workaround in order to use libc_has_function
6938 ix86_libc_has_function (enum function_class fn_class
)
6940 return targetm
.libc_has_function (fn_class
);
6943 /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
6944 specifying the call abi used. */
6946 ix86_function_type_abi (const_tree fntype
)
6948 enum calling_abi abi
= ix86_abi
;
6950 if (fntype
== NULL_TREE
|| TYPE_ATTRIBUTES (fntype
) == NULL_TREE
)
6954 && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype
)))
6957 if (TARGET_X32
&& !warned
)
6959 error ("X32 does not support ms_abi attribute");
6965 else if (abi
== MS_ABI
6966 && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype
)))
6972 static enum calling_abi
6973 ix86_function_abi (const_tree fndecl
)
6975 return fndecl
? ix86_function_type_abi (TREE_TYPE (fndecl
)) : ix86_abi
;
6978 /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
6979 specifying the call abi used. */
6981 ix86_cfun_abi (void)
6983 return cfun
? cfun
->machine
->call_abi
: ix86_abi
;
6987 ix86_function_ms_hook_prologue (const_tree fn
)
6989 if (fn
&& lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn
)))
6991 if (decl_function_context (fn
) != NULL_TREE
)
6992 error_at (DECL_SOURCE_LOCATION (fn
),
6993 "ms_hook_prologue is not compatible with nested function");
7001 ix86_function_naked (const_tree fn
)
7003 if (fn
&& lookup_attribute ("naked", DECL_ATTRIBUTES (fn
)))
7009 /* Write the extra assembler code needed to declare a function properly. */
7012 ix86_asm_output_function_label (FILE *asm_out_file
, const char *fname
,
7015 bool is_ms_hook
= ix86_function_ms_hook_prologue (decl
);
7019 int i
, filler_count
= (TARGET_64BIT
? 32 : 16);
7020 unsigned int filler_cc
= 0xcccccccc;
7022 for (i
= 0; i
< filler_count
; i
+= 4)
7023 fprintf (asm_out_file
, ASM_LONG
" %#x\n", filler_cc
);
7026 #ifdef SUBTARGET_ASM_UNWIND_INIT
7027 SUBTARGET_ASM_UNWIND_INIT (asm_out_file
);
7030 ASM_OUTPUT_LABEL (asm_out_file
, fname
);
7032 /* Output magic byte marker, if hot-patch attribute is set. */
7037 /* leaq [%rsp + 0], %rsp */
7038 fputs (ASM_BYTE
"0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n",
7043 /* movl.s %edi, %edi
7045 movl.s %esp, %ebp */
7046 fputs (ASM_BYTE
"0x8b, 0xff, 0x55, 0x8b, 0xec\n", asm_out_file
);
7051 /* Implementation of call abi switching target hook. Specific to FNDECL
7052 the specific call register sets are set. See also
7053 ix86_conditional_register_usage for more details. */
7055 ix86_call_abi_override (const_tree fndecl
)
7057 cfun
->machine
->call_abi
= ix86_function_abi (fndecl
);
7060 /* Return 1 if pseudo register should be created and used to hold
7061 GOT address for PIC code. */
7063 ix86_use_pseudo_pic_reg (void)
7066 && (ix86_cmodel
== CM_SMALL_PIC
7073 /* Initialize large model PIC register. */
7076 ix86_init_large_pic_reg (unsigned int tmp_regno
)
7078 rtx_code_label
*label
;
7081 gcc_assert (Pmode
== DImode
);
7082 label
= gen_label_rtx ();
7084 LABEL_PRESERVE_P (label
) = 1;
7085 tmp_reg
= gen_rtx_REG (Pmode
, tmp_regno
);
7086 gcc_assert (REGNO (pic_offset_table_rtx
) != tmp_regno
);
7087 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
,
7089 emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
7090 emit_insn (ix86_gen_add3 (pic_offset_table_rtx
,
7091 pic_offset_table_rtx
, tmp_reg
));
7092 const char *name
= LABEL_NAME (label
);
7093 PUT_CODE (label
, NOTE
);
7094 NOTE_KIND (label
) = NOTE_INSN_DELETED_LABEL
;
7095 NOTE_DELETED_LABEL_NAME (label
) = name
;
7098 /* Create and initialize PIC register if required. */
7100 ix86_init_pic_reg (void)
7105 if (!ix86_use_pseudo_pic_reg ())
7112 if (ix86_cmodel
== CM_LARGE_PIC
)
7113 ix86_init_large_pic_reg (R11_REG
);
7115 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
7119 /* If there is future mcount call in the function it is more profitable
7120 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
7121 rtx reg
= crtl
->profile
7122 ? gen_rtx_REG (Pmode
, REAL_PIC_OFFSET_TABLE_REGNUM
)
7123 : pic_offset_table_rtx
;
7124 rtx_insn
*insn
= emit_insn (gen_set_got (reg
));
7125 RTX_FRAME_RELATED_P (insn
) = 1;
7127 emit_move_insn (pic_offset_table_rtx
, reg
);
7128 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
7134 entry_edge
= single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
7135 insert_insn_on_edge (seq
, entry_edge
);
7136 commit_one_edge_insertion (entry_edge
);
7139 /* Initialize a variable CUM of type CUMULATIVE_ARGS
7140 for a call to a function whose data type is FNTYPE.
7141 For a library call, FNTYPE is 0. */
7144 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
7145 tree fntype
, /* tree ptr for function decl */
7146 rtx libname
, /* SYMBOL_REF of library name or 0 */
7150 struct cgraph_local_info
*i
= NULL
;
7151 struct cgraph_node
*target
= NULL
;
7153 memset (cum
, 0, sizeof (*cum
));
7157 target
= cgraph_node::get (fndecl
);
7160 target
= target
->function_symbol ();
7161 i
= cgraph_node::local_info (target
->decl
);
7162 cum
->call_abi
= ix86_function_abi (target
->decl
);
7165 cum
->call_abi
= ix86_function_abi (fndecl
);
7168 cum
->call_abi
= ix86_function_type_abi (fntype
);
7170 cum
->caller
= caller
;
7172 /* Set up the number of registers to use for passing arguments. */
7173 cum
->nregs
= ix86_regparm
;
7176 cum
->nregs
= (cum
->call_abi
== SYSV_ABI
7177 ? X86_64_REGPARM_MAX
7178 : X86_64_MS_REGPARM_MAX
);
7182 cum
->sse_nregs
= SSE_REGPARM_MAX
;
7185 cum
->sse_nregs
= (cum
->call_abi
== SYSV_ABI
7186 ? X86_64_SSE_REGPARM_MAX
7187 : X86_64_MS_SSE_REGPARM_MAX
);
7191 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
7192 cum
->warn_avx512f
= true;
7193 cum
->warn_avx
= true;
7194 cum
->warn_sse
= true;
7195 cum
->warn_mmx
= true;
7197 /* Because type might mismatch in between caller and callee, we need to
7198 use actual type of function for local calls.
7199 FIXME: cgraph_analyze can be told to actually record if function uses
7200 va_start so for local functions maybe_vaarg can be made aggressive
7202 FIXME: once typesytem is fixed, we won't need this code anymore. */
7203 if (i
&& i
->local
&& i
->can_change_signature
)
7204 fntype
= TREE_TYPE (target
->decl
);
7205 cum
->stdarg
= stdarg_p (fntype
);
7206 cum
->maybe_vaarg
= (fntype
7207 ? (!prototype_p (fntype
) || stdarg_p (fntype
))
7210 cum
->bnd_regno
= FIRST_BND_REG
;
7211 cum
->bnds_in_bt
= 0;
7212 cum
->force_bnd_pass
= 0;
7215 cum
->warn_empty
= !warn_abi
|| cum
->stdarg
;
7216 if (!cum
->warn_empty
&& fntype
)
7218 function_args_iterator iter
;
7220 bool seen_empty_type
= false;
7221 FOREACH_FUNCTION_ARGS (fntype
, argtype
, iter
)
7223 if (argtype
== error_mark_node
|| VOID_TYPE_P (argtype
))
7225 if (TYPE_EMPTY_P (argtype
))
7226 seen_empty_type
= true;
7227 else if (seen_empty_type
)
7229 cum
->warn_empty
= true;
7237 /* If there are variable arguments, then we won't pass anything
7238 in registers in 32-bit mode. */
7239 if (stdarg_p (fntype
))
7242 /* Since in 32-bit, variable arguments are always passed on
7243 stack, there is scratch register available for indirect
7245 cfun
->machine
->arg_reg_available
= true;
7248 cum
->warn_avx512f
= false;
7249 cum
->warn_avx
= false;
7250 cum
->warn_sse
= false;
7251 cum
->warn_mmx
= false;
7255 /* Use ecx and edx registers if function has fastcall attribute,
7256 else look for regparm information. */
7259 unsigned int ccvt
= ix86_get_callcvt (fntype
);
7260 if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
7263 cum
->fastcall
= 1; /* Same first register as in fastcall. */
7265 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
7271 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
7274 /* Set up the number of SSE registers used for passing SFmode
7275 and DFmode arguments. Warn for mismatching ABI. */
7276 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
, true);
7279 cfun
->machine
->arg_reg_available
= (cum
->nregs
> 0);
7282 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
7283 But in the case of vector types, it is some vector mode.
7285 When we have only some of our vector isa extensions enabled, then there
7286 are some modes for which vector_mode_supported_p is false. For these
7287 modes, the generic vector support in gcc will choose some non-vector mode
7288 in order to implement the type. By computing the natural mode, we'll
7289 select the proper ABI location for the operand and not depend on whatever
7290 the middle-end decides to do with these vector types.
7292 The midde-end can't deal with the vector types > 16 bytes. In this
7293 case, we return the original mode and warn ABI change if CUM isn't
7296 If INT_RETURN is true, warn ABI change if the vector mode isn't
7297 available for function return value. */
7300 type_natural_mode (const_tree type
, const CUMULATIVE_ARGS
*cum
,
7303 machine_mode mode
= TYPE_MODE (type
);
7305 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
7307 HOST_WIDE_INT size
= int_size_in_bytes (type
);
7308 if ((size
== 8 || size
== 16 || size
== 32 || size
== 64)
7309 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
7310 && TYPE_VECTOR_SUBPARTS (type
) > 1)
7312 machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
7314 /* There are no XFmode vector modes. */
7315 if (innermode
== XFmode
)
7318 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
7319 mode
= MIN_MODE_VECTOR_FLOAT
;
7321 mode
= MIN_MODE_VECTOR_INT
;
7323 /* Get the mode which has this inner mode and number of units. */
7324 FOR_EACH_MODE_FROM (mode
, mode
)
7325 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
7326 && GET_MODE_INNER (mode
) == innermode
)
7328 if (size
== 64 && !TARGET_AVX512F
&& !TARGET_IAMCU
)
7330 static bool warnedavx512f
;
7331 static bool warnedavx512f_ret
;
7333 if (cum
&& cum
->warn_avx512f
&& !warnedavx512f
)
7335 if (warning (OPT_Wpsabi
, "AVX512F vector argument "
7336 "without AVX512F enabled changes the ABI"))
7337 warnedavx512f
= true;
7339 else if (in_return
&& !warnedavx512f_ret
)
7341 if (warning (OPT_Wpsabi
, "AVX512F vector return "
7342 "without AVX512F enabled changes the ABI"))
7343 warnedavx512f_ret
= true;
7346 return TYPE_MODE (type
);
7348 else if (size
== 32 && !TARGET_AVX
&& !TARGET_IAMCU
)
7350 static bool warnedavx
;
7351 static bool warnedavx_ret
;
7353 if (cum
&& cum
->warn_avx
&& !warnedavx
)
7355 if (warning (OPT_Wpsabi
, "AVX vector argument "
7356 "without AVX enabled changes the ABI"))
7359 else if (in_return
&& !warnedavx_ret
)
7361 if (warning (OPT_Wpsabi
, "AVX vector return "
7362 "without AVX enabled changes the ABI"))
7363 warnedavx_ret
= true;
7366 return TYPE_MODE (type
);
7368 else if (((size
== 8 && TARGET_64BIT
) || size
== 16)
7372 static bool warnedsse
;
7373 static bool warnedsse_ret
;
7375 if (cum
&& cum
->warn_sse
&& !warnedsse
)
7377 if (warning (OPT_Wpsabi
, "SSE vector argument "
7378 "without SSE enabled changes the ABI"))
7381 else if (!TARGET_64BIT
&& in_return
&& !warnedsse_ret
)
7383 if (warning (OPT_Wpsabi
, "SSE vector return "
7384 "without SSE enabled changes the ABI"))
7385 warnedsse_ret
= true;
7388 else if ((size
== 8 && !TARGET_64BIT
)
7390 || cfun
->machine
->func_type
== TYPE_NORMAL
)
7394 static bool warnedmmx
;
7395 static bool warnedmmx_ret
;
7397 if (cum
&& cum
->warn_mmx
&& !warnedmmx
)
7399 if (warning (OPT_Wpsabi
, "MMX vector argument "
7400 "without MMX enabled changes the ABI"))
7403 else if (in_return
&& !warnedmmx_ret
)
7405 if (warning (OPT_Wpsabi
, "MMX vector return "
7406 "without MMX enabled changes the ABI"))
7407 warnedmmx_ret
= true;
7420 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
7421 this may not agree with the mode that the type system has chosen for the
7422 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
7423 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
7426 gen_reg_or_parallel (machine_mode mode
, machine_mode orig_mode
,
7431 if (orig_mode
!= BLKmode
)
7432 tmp
= gen_rtx_REG (orig_mode
, regno
);
7435 tmp
= gen_rtx_REG (mode
, regno
);
7436 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
7437 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
7443 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
7444 of this code is to classify each 8bytes of incoming argument by the register
7445 class and assign registers accordingly. */
7447 /* Return the union class of CLASS1 and CLASS2.
7448 See the x86-64 PS ABI for details. */
7450 static enum x86_64_reg_class
7451 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
7453 /* Rule #1: If both classes are equal, this is the resulting class. */
7454 if (class1
== class2
)
7457 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
7459 if (class1
== X86_64_NO_CLASS
)
7461 if (class2
== X86_64_NO_CLASS
)
7464 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
7465 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
7466 return X86_64_MEMORY_CLASS
;
7468 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
7469 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
7470 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
7471 return X86_64_INTEGERSI_CLASS
;
7472 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
7473 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
7474 return X86_64_INTEGER_CLASS
;
7476 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
7478 if (class1
== X86_64_X87_CLASS
7479 || class1
== X86_64_X87UP_CLASS
7480 || class1
== X86_64_COMPLEX_X87_CLASS
7481 || class2
== X86_64_X87_CLASS
7482 || class2
== X86_64_X87UP_CLASS
7483 || class2
== X86_64_COMPLEX_X87_CLASS
)
7484 return X86_64_MEMORY_CLASS
;
7486 /* Rule #6: Otherwise class SSE is used. */
7487 return X86_64_SSE_CLASS
;
7490 /* Classify the argument of type TYPE and mode MODE.
7491 CLASSES will be filled by the register class used to pass each word
7492 of the operand. The number of words is returned. In case the parameter
7493 should be passed in memory, 0 is returned. As a special case for zero
7494 sized containers, classes[0] will be NO_CLASS and 1 is returned.
7496 BIT_OFFSET is used internally for handling records and specifies offset
7497 of the offset in bits modulo 512 to avoid overflow cases.
7499 See the x86-64 PS ABI for details.
7503 classify_argument (machine_mode mode
, const_tree type
,
7504 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
7506 HOST_WIDE_INT bytes
=
7507 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
7508 int words
= CEIL (bytes
+ (bit_offset
% 64) / 8, UNITS_PER_WORD
);
7510 /* Variable sized entities are always passed/returned in memory. */
7514 if (mode
!= VOIDmode
7515 && targetm
.calls
.must_pass_in_stack (mode
, type
))
7518 if (type
&& AGGREGATE_TYPE_P (type
))
7522 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
7524 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
7528 for (i
= 0; i
< words
; i
++)
7529 classes
[i
] = X86_64_NO_CLASS
;
7531 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
7532 signalize memory class, so handle it as special case. */
7535 classes
[0] = X86_64_NO_CLASS
;
7539 /* Classify each field of record and merge classes. */
7540 switch (TREE_CODE (type
))
7543 /* And now merge the fields of structure. */
7544 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
7546 if (TREE_CODE (field
) == FIELD_DECL
)
7550 if (TREE_TYPE (field
) == error_mark_node
)
7553 /* Bitfields are always classified as integer. Handle them
7554 early, since later code would consider them to be
7555 misaligned integers. */
7556 if (DECL_BIT_FIELD (field
))
7558 for (i
= (int_bit_position (field
)
7559 + (bit_offset
% 64)) / 8 / 8;
7560 i
< ((int_bit_position (field
) + (bit_offset
% 64))
7561 + tree_to_shwi (DECL_SIZE (field
))
7564 merge_classes (X86_64_INTEGER_CLASS
,
7571 type
= TREE_TYPE (field
);
7573 /* Flexible array member is ignored. */
7574 if (TYPE_MODE (type
) == BLKmode
7575 && TREE_CODE (type
) == ARRAY_TYPE
7576 && TYPE_SIZE (type
) == NULL_TREE
7577 && TYPE_DOMAIN (type
) != NULL_TREE
7578 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type
))
7583 if (!warned
&& warn_psabi
)
7586 inform (input_location
,
7587 "the ABI of passing struct with"
7588 " a flexible array member has"
7589 " changed in GCC 4.4");
7593 num
= classify_argument (TYPE_MODE (type
), type
,
7595 (int_bit_position (field
)
7596 + bit_offset
) % 512);
7599 pos
= (int_bit_position (field
)
7600 + (bit_offset
% 64)) / 8 / 8;
7601 for (i
= 0; i
< num
&& (i
+ pos
) < words
; i
++)
7603 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
7610 /* Arrays are handled as small records. */
7613 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
7614 TREE_TYPE (type
), subclasses
, bit_offset
);
7618 /* The partial classes are now full classes. */
7619 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
7620 subclasses
[0] = X86_64_SSE_CLASS
;
7621 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
7622 && !((bit_offset
% 64) == 0 && bytes
== 4))
7623 subclasses
[0] = X86_64_INTEGER_CLASS
;
7625 for (i
= 0; i
< words
; i
++)
7626 classes
[i
] = subclasses
[i
% num
];
7631 case QUAL_UNION_TYPE
:
7632 /* Unions are similar to RECORD_TYPE but offset is always 0.
7634 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
7636 if (TREE_CODE (field
) == FIELD_DECL
)
7640 if (TREE_TYPE (field
) == error_mark_node
)
7643 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
7644 TREE_TYPE (field
), subclasses
,
7648 for (i
= 0; i
< num
&& i
< words
; i
++)
7649 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
7660 /* When size > 16 bytes, if the first one isn't
7661 X86_64_SSE_CLASS or any other ones aren't
7662 X86_64_SSEUP_CLASS, everything should be passed in
7664 if (classes
[0] != X86_64_SSE_CLASS
)
7667 for (i
= 1; i
< words
; i
++)
7668 if (classes
[i
] != X86_64_SSEUP_CLASS
)
7672 /* Final merger cleanup. */
7673 for (i
= 0; i
< words
; i
++)
7675 /* If one class is MEMORY, everything should be passed in
7677 if (classes
[i
] == X86_64_MEMORY_CLASS
)
7680 /* The X86_64_SSEUP_CLASS should be always preceded by
7681 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
7682 if (classes
[i
] == X86_64_SSEUP_CLASS
7683 && classes
[i
- 1] != X86_64_SSE_CLASS
7684 && classes
[i
- 1] != X86_64_SSEUP_CLASS
)
7686 /* The first one should never be X86_64_SSEUP_CLASS. */
7687 gcc_assert (i
!= 0);
7688 classes
[i
] = X86_64_SSE_CLASS
;
7691 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
7692 everything should be passed in memory. */
7693 if (classes
[i
] == X86_64_X87UP_CLASS
7694 && (classes
[i
- 1] != X86_64_X87_CLASS
))
7698 /* The first one should never be X86_64_X87UP_CLASS. */
7699 gcc_assert (i
!= 0);
7700 if (!warned
&& warn_psabi
)
7703 inform (input_location
,
7704 "the ABI of passing union with long double"
7705 " has changed in GCC 4.4");
7713 /* Compute alignment needed. We align all types to natural boundaries with
7714 exception of XFmode that is aligned to 64bits. */
7715 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
7717 int mode_alignment
= GET_MODE_BITSIZE (mode
);
7720 mode_alignment
= 128;
7721 else if (mode
== XCmode
)
7722 mode_alignment
= 256;
7723 if (COMPLEX_MODE_P (mode
))
7724 mode_alignment
/= 2;
7725 /* Misaligned fields are always returned in memory. */
7726 if (bit_offset
% mode_alignment
)
7730 /* for V1xx modes, just use the base mode */
7731 if (VECTOR_MODE_P (mode
) && mode
!= V1DImode
&& mode
!= V1TImode
7732 && GET_MODE_UNIT_SIZE (mode
) == bytes
)
7733 mode
= GET_MODE_INNER (mode
);
7735 /* Classification of atomic types. */
7740 classes
[0] = X86_64_SSE_CLASS
;
7743 classes
[0] = X86_64_SSE_CLASS
;
7744 classes
[1] = X86_64_SSEUP_CLASS
;
7754 int size
= bit_offset
+ (int) GET_MODE_BITSIZE (mode
);
7756 /* Analyze last 128 bits only. */
7757 size
= (size
- 1) & 0x7f;
7761 classes
[0] = X86_64_INTEGERSI_CLASS
;
7766 classes
[0] = X86_64_INTEGER_CLASS
;
7769 else if (size
< 64+32)
7771 classes
[0] = X86_64_INTEGER_CLASS
;
7772 classes
[1] = X86_64_INTEGERSI_CLASS
;
7775 else if (size
< 64+64)
7777 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
7785 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
7789 /* OImode shouldn't be used directly. */
7794 if (!(bit_offset
% 64))
7795 classes
[0] = X86_64_SSESF_CLASS
;
7797 classes
[0] = X86_64_SSE_CLASS
;
7800 classes
[0] = X86_64_SSEDF_CLASS
;
7803 classes
[0] = X86_64_X87_CLASS
;
7804 classes
[1] = X86_64_X87UP_CLASS
;
7807 classes
[0] = X86_64_SSE_CLASS
;
7808 classes
[1] = X86_64_SSEUP_CLASS
;
7811 classes
[0] = X86_64_SSE_CLASS
;
7812 if (!(bit_offset
% 64))
7818 if (!warned
&& warn_psabi
)
7821 inform (input_location
,
7822 "the ABI of passing structure with complex float"
7823 " member has changed in GCC 4.4");
7825 classes
[1] = X86_64_SSESF_CLASS
;
7829 classes
[0] = X86_64_SSEDF_CLASS
;
7830 classes
[1] = X86_64_SSEDF_CLASS
;
7833 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
7836 /* This modes is larger than 16 bytes. */
7844 classes
[0] = X86_64_SSE_CLASS
;
7845 classes
[1] = X86_64_SSEUP_CLASS
;
7846 classes
[2] = X86_64_SSEUP_CLASS
;
7847 classes
[3] = X86_64_SSEUP_CLASS
;
7855 classes
[0] = X86_64_SSE_CLASS
;
7856 classes
[1] = X86_64_SSEUP_CLASS
;
7857 classes
[2] = X86_64_SSEUP_CLASS
;
7858 classes
[3] = X86_64_SSEUP_CLASS
;
7859 classes
[4] = X86_64_SSEUP_CLASS
;
7860 classes
[5] = X86_64_SSEUP_CLASS
;
7861 classes
[6] = X86_64_SSEUP_CLASS
;
7862 classes
[7] = X86_64_SSEUP_CLASS
;
7870 classes
[0] = X86_64_SSE_CLASS
;
7871 classes
[1] = X86_64_SSEUP_CLASS
;
7879 classes
[0] = X86_64_SSE_CLASS
;
7885 gcc_assert (VECTOR_MODE_P (mode
));
7890 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
7892 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
7893 classes
[0] = X86_64_INTEGERSI_CLASS
;
7895 classes
[0] = X86_64_INTEGER_CLASS
;
7896 classes
[1] = X86_64_INTEGER_CLASS
;
7897 return 1 + (bytes
> 8);
7901 /* Examine the argument and return set number of register required in each
7902 class. Return true iff parameter should be passed in memory. */
7905 examine_argument (machine_mode mode
, const_tree type
, int in_return
,
7906 int *int_nregs
, int *sse_nregs
)
7908 enum x86_64_reg_class regclass
[MAX_CLASSES
];
7909 int n
= classify_argument (mode
, type
, regclass
, 0);
7916 for (n
--; n
>= 0; n
--)
7917 switch (regclass
[n
])
7919 case X86_64_INTEGER_CLASS
:
7920 case X86_64_INTEGERSI_CLASS
:
7923 case X86_64_SSE_CLASS
:
7924 case X86_64_SSESF_CLASS
:
7925 case X86_64_SSEDF_CLASS
:
7928 case X86_64_NO_CLASS
:
7929 case X86_64_SSEUP_CLASS
:
7931 case X86_64_X87_CLASS
:
7932 case X86_64_X87UP_CLASS
:
7933 case X86_64_COMPLEX_X87_CLASS
:
7937 case X86_64_MEMORY_CLASS
:
7944 /* Construct container for the argument used by GCC interface. See
7945 FUNCTION_ARG for the detailed description. */
7948 construct_container (machine_mode mode
, machine_mode orig_mode
,
7949 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
7950 const int *intreg
, int sse_regno
)
7952 /* The following variables hold the static issued_error state. */
7953 static bool issued_sse_arg_error
;
7954 static bool issued_sse_ret_error
;
7955 static bool issued_x87_ret_error
;
7957 machine_mode tmpmode
;
7959 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
7960 enum x86_64_reg_class regclass
[MAX_CLASSES
];
7964 int needed_sseregs
, needed_intregs
;
7965 rtx exp
[MAX_CLASSES
];
7968 n
= classify_argument (mode
, type
, regclass
, 0);
7971 if (examine_argument (mode
, type
, in_return
, &needed_intregs
,
7974 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
7977 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
7978 some less clueful developer tries to use floating-point anyway. */
7979 if (needed_sseregs
&& !TARGET_SSE
)
7983 if (!issued_sse_ret_error
)
7985 error ("SSE register return with SSE disabled");
7986 issued_sse_ret_error
= true;
7989 else if (!issued_sse_arg_error
)
7991 error ("SSE register argument with SSE disabled");
7992 issued_sse_arg_error
= true;
7997 /* Likewise, error if the ABI requires us to return values in the
7998 x87 registers and the user specified -mno-80387. */
7999 if (!TARGET_FLOAT_RETURNS_IN_80387
&& in_return
)
8000 for (i
= 0; i
< n
; i
++)
8001 if (regclass
[i
] == X86_64_X87_CLASS
8002 || regclass
[i
] == X86_64_X87UP_CLASS
8003 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
8005 if (!issued_x87_ret_error
)
8007 error ("x87 register return with x87 disabled");
8008 issued_x87_ret_error
= true;
8013 /* First construct simple cases. Avoid SCmode, since we want to use
8014 single register to pass this type. */
8015 if (n
== 1 && mode
!= SCmode
)
8016 switch (regclass
[0])
8018 case X86_64_INTEGER_CLASS
:
8019 case X86_64_INTEGERSI_CLASS
:
8020 return gen_rtx_REG (mode
, intreg
[0]);
8021 case X86_64_SSE_CLASS
:
8022 case X86_64_SSESF_CLASS
:
8023 case X86_64_SSEDF_CLASS
:
8024 if (mode
!= BLKmode
)
8025 return gen_reg_or_parallel (mode
, orig_mode
,
8026 SSE_REGNO (sse_regno
));
8028 case X86_64_X87_CLASS
:
8029 case X86_64_COMPLEX_X87_CLASS
:
8030 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
8031 case X86_64_NO_CLASS
:
8032 /* Zero sized array, struct or class. */
8038 && regclass
[0] == X86_64_SSE_CLASS
8039 && regclass
[1] == X86_64_SSEUP_CLASS
8041 return gen_reg_or_parallel (mode
, orig_mode
,
8042 SSE_REGNO (sse_regno
));
8044 && regclass
[0] == X86_64_SSE_CLASS
8045 && regclass
[1] == X86_64_SSEUP_CLASS
8046 && regclass
[2] == X86_64_SSEUP_CLASS
8047 && regclass
[3] == X86_64_SSEUP_CLASS
8049 return gen_reg_or_parallel (mode
, orig_mode
,
8050 SSE_REGNO (sse_regno
));
8052 && regclass
[0] == X86_64_SSE_CLASS
8053 && regclass
[1] == X86_64_SSEUP_CLASS
8054 && regclass
[2] == X86_64_SSEUP_CLASS
8055 && regclass
[3] == X86_64_SSEUP_CLASS
8056 && regclass
[4] == X86_64_SSEUP_CLASS
8057 && regclass
[5] == X86_64_SSEUP_CLASS
8058 && regclass
[6] == X86_64_SSEUP_CLASS
8059 && regclass
[7] == X86_64_SSEUP_CLASS
8061 return gen_reg_or_parallel (mode
, orig_mode
,
8062 SSE_REGNO (sse_regno
));
8064 && regclass
[0] == X86_64_X87_CLASS
8065 && regclass
[1] == X86_64_X87UP_CLASS
)
8066 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
8069 && regclass
[0] == X86_64_INTEGER_CLASS
8070 && regclass
[1] == X86_64_INTEGER_CLASS
8071 && (mode
== CDImode
|| mode
== TImode
)
8072 && intreg
[0] + 1 == intreg
[1])
8073 return gen_rtx_REG (mode
, intreg
[0]);
8075 /* Otherwise figure out the entries of the PARALLEL. */
8076 for (i
= 0; i
< n
; i
++)
8080 switch (regclass
[i
])
8082 case X86_64_NO_CLASS
:
8084 case X86_64_INTEGER_CLASS
:
8085 case X86_64_INTEGERSI_CLASS
:
8086 /* Merge TImodes on aligned occasions here too. */
8087 if (i
* 8 + 8 > bytes
)
8089 unsigned int tmpbits
= (bytes
- i
* 8) * BITS_PER_UNIT
;
8090 if (!int_mode_for_size (tmpbits
, 0).exists (&tmpmode
))
8091 /* We've requested 24 bytes we
8092 don't have mode for. Use DImode. */
8095 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
8100 = gen_rtx_EXPR_LIST (VOIDmode
,
8101 gen_rtx_REG (tmpmode
, *intreg
),
8105 case X86_64_SSESF_CLASS
:
8107 = gen_rtx_EXPR_LIST (VOIDmode
,
8108 gen_rtx_REG (SFmode
,
8109 SSE_REGNO (sse_regno
)),
8113 case X86_64_SSEDF_CLASS
:
8115 = gen_rtx_EXPR_LIST (VOIDmode
,
8116 gen_rtx_REG (DFmode
,
8117 SSE_REGNO (sse_regno
)),
8121 case X86_64_SSE_CLASS
:
8129 if (i
== 0 && regclass
[1] == X86_64_SSEUP_CLASS
)
8139 && regclass
[1] == X86_64_SSEUP_CLASS
8140 && regclass
[2] == X86_64_SSEUP_CLASS
8141 && regclass
[3] == X86_64_SSEUP_CLASS
);
8147 && regclass
[1] == X86_64_SSEUP_CLASS
8148 && regclass
[2] == X86_64_SSEUP_CLASS
8149 && regclass
[3] == X86_64_SSEUP_CLASS
8150 && regclass
[4] == X86_64_SSEUP_CLASS
8151 && regclass
[5] == X86_64_SSEUP_CLASS
8152 && regclass
[6] == X86_64_SSEUP_CLASS
8153 && regclass
[7] == X86_64_SSEUP_CLASS
);
8161 = gen_rtx_EXPR_LIST (VOIDmode
,
8162 gen_rtx_REG (tmpmode
,
8163 SSE_REGNO (sse_regno
)),
8172 /* Empty aligned struct, union or class. */
8176 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
8177 for (i
= 0; i
< nexps
; i
++)
8178 XVECEXP (ret
, 0, i
) = exp
[i
];
8182 /* Update the data in CUM to advance over an argument of mode MODE
8183 and data type TYPE. (TYPE is null for libcalls where that information
8184 may not be available.)
8186 Return a number of integer regsiters advanced over. */
8189 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, machine_mode mode
,
8190 const_tree type
, HOST_WIDE_INT bytes
,
8191 HOST_WIDE_INT words
)
8194 bool error_p
= false;
8198 /* Intel MCU psABI passes scalars and aggregates no larger than 8
8199 bytes in registers. */
8200 if (!VECTOR_MODE_P (mode
) && bytes
<= 8)
8220 cum
->words
+= words
;
8221 cum
->nregs
-= words
;
8222 cum
->regno
+= words
;
8223 if (cum
->nregs
>= 0)
8225 if (cum
->nregs
<= 0)
8228 cfun
->machine
->arg_reg_available
= false;
8234 /* OImode shouldn't be used directly. */
8238 if (cum
->float_in_sse
== -1)
8240 if (cum
->float_in_sse
< 2)
8244 if (cum
->float_in_sse
== -1)
8246 if (cum
->float_in_sse
< 1)
8269 if (!type
|| !AGGREGATE_TYPE_P (type
))
8271 cum
->sse_words
+= words
;
8272 cum
->sse_nregs
-= 1;
8273 cum
->sse_regno
+= 1;
8274 if (cum
->sse_nregs
<= 0)
8288 if (!type
|| !AGGREGATE_TYPE_P (type
))
8290 cum
->mmx_words
+= words
;
8291 cum
->mmx_nregs
-= 1;
8292 cum
->mmx_regno
+= 1;
8293 if (cum
->mmx_nregs
<= 0)
8303 cum
->float_in_sse
= 0;
8304 error ("calling %qD with SSE calling convention without "
8305 "SSE/SSE2 enabled", cum
->decl
);
8306 sorry ("this is a GCC bug that can be worked around by adding "
8307 "attribute used to function called");
8314 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, machine_mode mode
,
8315 const_tree type
, HOST_WIDE_INT words
, bool named
)
8317 int int_nregs
, sse_nregs
;
8319 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
8320 if (!named
&& (VALID_AVX512F_REG_MODE (mode
)
8321 || VALID_AVX256_REG_MODE (mode
)))
8324 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
)
8325 && sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
8327 cum
->nregs
-= int_nregs
;
8328 cum
->sse_nregs
-= sse_nregs
;
8329 cum
->regno
+= int_nregs
;
8330 cum
->sse_regno
+= sse_nregs
;
8335 int align
= ix86_function_arg_boundary (mode
, type
) / BITS_PER_WORD
;
8336 cum
->words
= ROUND_UP (cum
->words
, align
);
8337 cum
->words
+= words
;
8343 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
8344 HOST_WIDE_INT words
)
8346 /* Otherwise, this should be passed indirect. */
8347 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
8349 cum
->words
+= words
;
8359 /* Update the data in CUM to advance over an argument of mode MODE and
8360 data type TYPE. (TYPE is null for libcalls where that information
8361 may not be available.) */
8364 ix86_function_arg_advance (cumulative_args_t cum_v
, machine_mode mode
,
8365 const_tree type
, bool named
)
8367 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
8368 HOST_WIDE_INT bytes
, words
;
8371 /* The argument of interrupt handler is a special case and is
8372 handled in ix86_function_arg. */
8373 if (!cum
->caller
&& cfun
->machine
->func_type
!= TYPE_NORMAL
)
8376 if (mode
== BLKmode
)
8377 bytes
= int_size_in_bytes (type
);
8379 bytes
= GET_MODE_SIZE (mode
);
8380 words
= CEIL (bytes
, UNITS_PER_WORD
);
8383 mode
= type_natural_mode (type
, NULL
, false);
8385 if ((type
&& POINTER_BOUNDS_TYPE_P (type
))
8386 || POINTER_BOUNDS_MODE_P (mode
))
8388 /* If we pass bounds in BT then just update remained bounds count. */
8389 if (cum
->bnds_in_bt
)
8395 /* Update remained number of bounds to force. */
8396 if (cum
->force_bnd_pass
)
8397 cum
->force_bnd_pass
--;
8404 /* The first arg not going to Bounds Tables resets this counter. */
8405 cum
->bnds_in_bt
= 0;
8406 /* For unnamed args we always pass bounds to avoid bounds mess when
8407 passed and received types do not match. If bounds do not follow
8408 unnamed arg, still pretend required number of bounds were passed. */
8409 if (cum
->force_bnd_pass
)
8411 cum
->bnd_regno
+= cum
->force_bnd_pass
;
8412 cum
->force_bnd_pass
= 0;
8417 enum calling_abi call_abi
= cum
? cum
->call_abi
: ix86_abi
;
8419 if (call_abi
== MS_ABI
)
8420 nregs
= function_arg_advance_ms_64 (cum
, bytes
, words
);
8422 nregs
= function_arg_advance_64 (cum
, mode
, type
, words
, named
);
8425 nregs
= function_arg_advance_32 (cum
, mode
, type
, bytes
, words
);
8427 /* For stdarg we expect bounds to be passed for each value passed
8430 cum
->force_bnd_pass
= nregs
;
8431 /* For pointers passed in memory we expect bounds passed in Bounds
8435 /* Track if there are outgoing arguments on stack. */
8437 cfun
->machine
->outgoing_args_on_stack
= true;
8439 cum
->bnds_in_bt
= chkp_type_bounds_count (type
);
8443 /* Define where to put the arguments to a function.
8444 Value is zero to push the argument on the stack,
8445 or a hard register in which to store the argument.
8447 MODE is the argument's machine mode.
8448 TYPE is the data type of the argument (as a tree).
8449 This is null for libcalls where that information may
8451 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8452 the preceding args and about the function being called.
8453 NAMED is nonzero if this argument is a named parameter
8454 (otherwise it is an extra parameter matching an ellipsis). */
8457 function_arg_32 (CUMULATIVE_ARGS
*cum
, machine_mode mode
,
8458 machine_mode orig_mode
, const_tree type
,
8459 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
8461 bool error_p
= false;
8463 /* Avoid the AL settings for the Unix64 ABI. */
8464 if (mode
== VOIDmode
)
8469 /* Intel MCU psABI passes scalars and aggregates no larger than 8
8470 bytes in registers. */
8471 if (!VECTOR_MODE_P (mode
) && bytes
<= 8)
8490 if (words
<= cum
->nregs
)
8492 int regno
= cum
->regno
;
8494 /* Fastcall allocates the first two DWORD (SImode) or
8495 smaller arguments to ECX and EDX if it isn't an
8501 || (type
&& AGGREGATE_TYPE_P (type
)))
8504 /* ECX not EAX is the first allocated register. */
8505 if (regno
== AX_REG
)
8508 return gen_rtx_REG (mode
, regno
);
8513 if (cum
->float_in_sse
== -1)
8515 if (cum
->float_in_sse
< 2)
8519 if (cum
->float_in_sse
== -1)
8521 if (cum
->float_in_sse
< 1)
8525 /* In 32bit, we pass TImode in xmm registers. */
8532 if (!type
|| !AGGREGATE_TYPE_P (type
))
8535 return gen_reg_or_parallel (mode
, orig_mode
,
8536 cum
->sse_regno
+ FIRST_SSE_REG
);
8542 /* OImode and XImode shouldn't be used directly. */
8557 if (!type
|| !AGGREGATE_TYPE_P (type
))
8560 return gen_reg_or_parallel (mode
, orig_mode
,
8561 cum
->sse_regno
+ FIRST_SSE_REG
);
8571 if (!type
|| !AGGREGATE_TYPE_P (type
))
8574 return gen_reg_or_parallel (mode
, orig_mode
,
8575 cum
->mmx_regno
+ FIRST_MMX_REG
);
8581 cum
->float_in_sse
= 0;
8582 error ("calling %qD with SSE calling convention without "
8583 "SSE/SSE2 enabled", cum
->decl
);
8584 sorry ("this is a GCC bug that can be worked around by adding "
8585 "attribute used to function called");
8592 function_arg_64 (const CUMULATIVE_ARGS
*cum
, machine_mode mode
,
8593 machine_mode orig_mode
, const_tree type
, bool named
)
8595 /* Handle a hidden AL argument containing number of registers
8596 for varargs x86-64 functions. */
8597 if (mode
== VOIDmode
)
8598 return GEN_INT (cum
->maybe_vaarg
8599 ? (cum
->sse_nregs
< 0
8600 ? X86_64_SSE_REGPARM_MAX
8621 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8627 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
8629 &x86_64_int_parameter_registers
[cum
->regno
],
8634 function_arg_ms_64 (const CUMULATIVE_ARGS
*cum
, machine_mode mode
,
8635 machine_mode orig_mode
, bool named
,
8636 HOST_WIDE_INT bytes
)
8640 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
8641 We use value of -2 to specify that current function call is MSABI. */
8642 if (mode
== VOIDmode
)
8643 return GEN_INT (-2);
8645 /* If we've run out of registers, it goes on the stack. */
8646 if (cum
->nregs
== 0)
8649 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
8651 /* Only floating point modes are passed in anything but integer regs. */
8652 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
8655 regno
= cum
->regno
+ FIRST_SSE_REG
;
8660 /* Unnamed floating parameters are passed in both the
8661 SSE and integer registers. */
8662 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
8663 t2
= gen_rtx_REG (mode
, regno
);
8664 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
8665 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
8666 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
8669 /* Handle aggregated types passed in register. */
8670 if (orig_mode
== BLKmode
)
8672 if (bytes
> 0 && bytes
<= 8)
8673 mode
= (bytes
> 4 ? DImode
: SImode
);
8674 if (mode
== BLKmode
)
8678 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
8681 /* Return where to put the arguments to a function.
8682 Return zero to push the argument on the stack, or a hard register in which to store the argument.
8684 MODE is the argument's machine mode. TYPE is the data type of the
8685 argument. It is null for libcalls where that information may not be
8686 available. CUM gives information about the preceding args and about
8687 the function being called. NAMED is nonzero if this argument is a
8688 named parameter (otherwise it is an extra parameter matching an
8692 ix86_function_arg (cumulative_args_t cum_v
, machine_mode omode
,
8693 const_tree type
, bool named
)
8695 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
8696 machine_mode mode
= omode
;
8697 HOST_WIDE_INT bytes
, words
;
8700 if (!cum
->caller
&& cfun
->machine
->func_type
!= TYPE_NORMAL
)
8702 gcc_assert (type
!= NULL_TREE
);
8703 if (POINTER_TYPE_P (type
))
8705 /* This is the pointer argument. */
8706 gcc_assert (TYPE_MODE (type
) == Pmode
);
8707 /* It is at -WORD(AP) in the current frame in interrupt and
8708 exception handlers. */
8709 arg
= plus_constant (Pmode
, arg_pointer_rtx
, -UNITS_PER_WORD
);
8713 gcc_assert (cfun
->machine
->func_type
== TYPE_EXCEPTION
8714 && TREE_CODE (type
) == INTEGER_TYPE
8715 && TYPE_MODE (type
) == word_mode
);
8716 /* The error code is the word-mode integer argument at
8717 -2 * WORD(AP) in the current frame of the exception
8719 arg
= gen_rtx_MEM (word_mode
,
8720 plus_constant (Pmode
,
8722 -2 * UNITS_PER_WORD
));
8727 /* All pointer bounds arguments are handled separately here. */
8728 if ((type
&& POINTER_BOUNDS_TYPE_P (type
))
8729 || POINTER_BOUNDS_MODE_P (mode
))
8731 /* Return NULL if bounds are forced to go in Bounds Table. */
8732 if (cum
->bnds_in_bt
)
8734 /* Return the next available bound reg if any. */
8735 else if (cum
->bnd_regno
<= LAST_BND_REG
)
8736 arg
= gen_rtx_REG (BNDmode
, cum
->bnd_regno
);
8737 /* Return the next special slot number otherwise. */
8739 arg
= GEN_INT (cum
->bnd_regno
- LAST_BND_REG
- 1);
8744 if (mode
== BLKmode
)
8745 bytes
= int_size_in_bytes (type
);
8747 bytes
= GET_MODE_SIZE (mode
);
8748 words
= CEIL (bytes
, UNITS_PER_WORD
);
8750 /* To simplify the code below, represent vector types with a vector mode
8751 even if MMX/SSE are not active. */
8752 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
8753 mode
= type_natural_mode (type
, cum
, false);
8757 enum calling_abi call_abi
= cum
? cum
->call_abi
: ix86_abi
;
8759 if (call_abi
== MS_ABI
)
8760 arg
= function_arg_ms_64 (cum
, mode
, omode
, named
, bytes
);
8762 arg
= function_arg_64 (cum
, mode
, omode
, type
, named
);
8765 arg
= function_arg_32 (cum
, mode
, omode
, type
, bytes
, words
);
8767 /* Track if there are outgoing arguments on stack. */
8768 if (arg
== NULL_RTX
&& cum
->caller
)
8769 cfun
->machine
->outgoing_args_on_stack
= true;
8774 /* A C expression that indicates when an argument must be passed by
8775 reference. If nonzero for an argument, a copy of that argument is
8776 made in memory and a pointer to the argument is passed instead of
8777 the argument itself. The pointer is passed in whatever way is
8778 appropriate for passing a pointer to that type. */
8781 ix86_pass_by_reference (cumulative_args_t cum_v
, machine_mode mode
,
8782 const_tree type
, bool)
8784 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
8786 /* Bounds are never passed by reference. */
8787 if ((type
&& POINTER_BOUNDS_TYPE_P (type
))
8788 || POINTER_BOUNDS_MODE_P (mode
))
8793 enum calling_abi call_abi
= cum
? cum
->call_abi
: ix86_abi
;
8795 /* See Windows x64 Software Convention. */
8796 if (call_abi
== MS_ABI
)
8798 HOST_WIDE_INT msize
= GET_MODE_SIZE (mode
);
8802 /* Arrays are passed by reference. */
8803 if (TREE_CODE (type
) == ARRAY_TYPE
)
8806 if (RECORD_OR_UNION_TYPE_P (type
))
8808 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
8809 are passed by reference. */
8810 msize
= int_size_in_bytes (type
);
8814 /* __m128 is passed by reference. */
8815 return msize
!= 1 && msize
!= 2 && msize
!= 4 && msize
!= 8;
8817 else if (type
&& int_size_in_bytes (type
) == -1)
8824 /* Return true when TYPE should be 128bit aligned for 32bit argument
8825 passing ABI. XXX: This function is obsolete and is only used for
8826 checking psABI compatibility with previous versions of GCC. */
8829 ix86_compat_aligned_value_p (const_tree type
)
8831 machine_mode mode
= TYPE_MODE (type
);
8832 if (((TARGET_SSE
&& SSE_REG_MODE_P (mode
))
8836 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
8838 if (TYPE_ALIGN (type
) < 128)
8841 if (AGGREGATE_TYPE_P (type
))
8843 /* Walk the aggregates recursively. */
8844 switch (TREE_CODE (type
))
8848 case QUAL_UNION_TYPE
:
8852 /* Walk all the structure fields. */
8853 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
8855 if (TREE_CODE (field
) == FIELD_DECL
8856 && ix86_compat_aligned_value_p (TREE_TYPE (field
)))
8863 /* Just for use if some languages passes arrays by value. */
8864 if (ix86_compat_aligned_value_p (TREE_TYPE (type
)))
8875 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
8876 XXX: This function is obsolete and is only used for checking psABI
8877 compatibility with previous versions of GCC. */
8880 ix86_compat_function_arg_boundary (machine_mode mode
,
8881 const_tree type
, unsigned int align
)
8883 /* In 32bit, only _Decimal128 and __float128 are aligned to their
8884 natural boundaries. */
8885 if (!TARGET_64BIT
&& mode
!= TDmode
&& mode
!= TFmode
)
8887 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
8888 make an exception for SSE modes since these require 128bit
8891 The handling here differs from field_alignment. ICC aligns MMX
8892 arguments to 4 byte boundaries, while structure fields are aligned
8893 to 8 byte boundaries. */
8896 if (!(TARGET_SSE
&& SSE_REG_MODE_P (mode
)))
8897 align
= PARM_BOUNDARY
;
8901 if (!ix86_compat_aligned_value_p (type
))
8902 align
= PARM_BOUNDARY
;
8905 if (align
> BIGGEST_ALIGNMENT
)
8906 align
= BIGGEST_ALIGNMENT
;
8910 /* Return true when TYPE should be 128bit aligned for 32bit argument
8914 ix86_contains_aligned_value_p (const_tree type
)
8916 machine_mode mode
= TYPE_MODE (type
);
8918 if (mode
== XFmode
|| mode
== XCmode
)
8921 if (TYPE_ALIGN (type
) < 128)
8924 if (AGGREGATE_TYPE_P (type
))
8926 /* Walk the aggregates recursively. */
8927 switch (TREE_CODE (type
))
8931 case QUAL_UNION_TYPE
:
8935 /* Walk all the structure fields. */
8936 for (field
= TYPE_FIELDS (type
);
8938 field
= DECL_CHAIN (field
))
8940 if (TREE_CODE (field
) == FIELD_DECL
8941 && ix86_contains_aligned_value_p (TREE_TYPE (field
)))
8948 /* Just for use if some languages passes arrays by value. */
8949 if (ix86_contains_aligned_value_p (TREE_TYPE (type
)))
8958 return TYPE_ALIGN (type
) >= 128;
8963 /* Gives the alignment boundary, in bits, of an argument with the
8964 specified mode and type. */
8967 ix86_function_arg_boundary (machine_mode mode
, const_tree type
)
8972 /* Since the main variant type is used for call, we convert it to
8973 the main variant type. */
8974 type
= TYPE_MAIN_VARIANT (type
);
8975 align
= TYPE_ALIGN (type
);
8978 align
= GET_MODE_ALIGNMENT (mode
);
8979 if (align
< PARM_BOUNDARY
)
8980 align
= PARM_BOUNDARY
;
8984 unsigned int saved_align
= align
;
8988 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
8991 if (mode
== XFmode
|| mode
== XCmode
)
8992 align
= PARM_BOUNDARY
;
8994 else if (!ix86_contains_aligned_value_p (type
))
8995 align
= PARM_BOUNDARY
;
8998 align
= PARM_BOUNDARY
;
9003 && align
!= ix86_compat_function_arg_boundary (mode
, type
,
9007 inform (input_location
,
9008 "The ABI for passing parameters with %d-byte"
9009 " alignment has changed in GCC 4.6",
9010 align
/ BITS_PER_UNIT
);
9017 /* Return true if N is a possible register number of function value. */
9020 ix86_function_value_regno_p (const unsigned int regno
)
9027 return (!TARGET_64BIT
|| ix86_cfun_abi () != MS_ABI
);
9030 return TARGET_64BIT
&& ix86_cfun_abi () != MS_ABI
;
9034 return chkp_function_instrumented_p (current_function_decl
);
9036 /* Complex values are returned in %st(0)/%st(1) pair. */
9039 /* TODO: The function should depend on current function ABI but
9040 builtins.c would need updating then. Therefore we use the
9042 if (TARGET_64BIT
&& ix86_cfun_abi () == MS_ABI
)
9044 return TARGET_FLOAT_RETURNS_IN_80387
;
9046 /* Complex values are returned in %xmm0/%xmm1 pair. */
9052 if (TARGET_MACHO
|| TARGET_64BIT
)
9060 /* Define how to find the value returned by a function.
9061 VALTYPE is the data type of the value (as a tree).
9062 If the precise function being called is known, FUNC is its FUNCTION_DECL;
9063 otherwise, FUNC is 0. */
9066 function_value_32 (machine_mode orig_mode
, machine_mode mode
,
9067 const_tree fntype
, const_tree fn
)
9071 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
9072 we normally prevent this case when mmx is not available. However
9073 some ABIs may require the result to be returned like DImode. */
9074 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
9075 regno
= FIRST_MMX_REG
;
9077 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
9078 we prevent this case when sse is not available. However some ABIs
9079 may require the result to be returned like integer TImode. */
9080 else if (mode
== TImode
9081 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
9082 regno
= FIRST_SSE_REG
;
9084 /* 32-byte vector modes in %ymm0. */
9085 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 32)
9086 regno
= FIRST_SSE_REG
;
9088 /* 64-byte vector modes in %zmm0. */
9089 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 64)
9090 regno
= FIRST_SSE_REG
;
9092 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
9093 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
9094 regno
= FIRST_FLOAT_REG
;
9096 /* Most things go in %eax. */
9099 /* Override FP return register with %xmm0 for local functions when
9100 SSE math is enabled or for functions with sseregparm attribute. */
9101 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
9103 int sse_level
= ix86_function_sseregparm (fntype
, fn
, false);
9104 if (sse_level
== -1)
9106 error ("calling %qD with SSE calling convention without "
9107 "SSE/SSE2 enabled", fn
);
9108 sorry ("this is a GCC bug that can be worked around by adding "
9109 "attribute used to function called");
9111 else if ((sse_level
>= 1 && mode
== SFmode
)
9112 || (sse_level
== 2 && mode
== DFmode
))
9113 regno
= FIRST_SSE_REG
;
9116 /* OImode shouldn't be used directly. */
9117 gcc_assert (mode
!= OImode
);
9119 return gen_rtx_REG (orig_mode
, regno
);
9123 function_value_64 (machine_mode orig_mode
, machine_mode mode
,
9128 /* Handle libcalls, which don't provide a type node. */
9129 if (valtype
== NULL
)
9143 regno
= FIRST_SSE_REG
;
9147 regno
= FIRST_FLOAT_REG
;
9155 return gen_rtx_REG (mode
, regno
);
9157 else if (POINTER_TYPE_P (valtype
))
9159 /* Pointers are always returned in word_mode. */
9163 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
9164 X86_64_REGPARM_MAX
, X86_64_SSE_REGPARM_MAX
,
9165 x86_64_int_return_registers
, 0);
9167 /* For zero sized structures, construct_container returns NULL, but we
9168 need to keep rest of compiler happy by returning meaningful value. */
9170 ret
= gen_rtx_REG (orig_mode
, AX_REG
);
9176 function_value_ms_64 (machine_mode orig_mode
, machine_mode mode
,
9179 unsigned int regno
= AX_REG
;
9183 switch (GET_MODE_SIZE (mode
))
9186 if (valtype
!= NULL_TREE
9187 && !VECTOR_INTEGER_TYPE_P (valtype
)
9188 && !VECTOR_INTEGER_TYPE_P (valtype
)
9189 && !INTEGRAL_TYPE_P (valtype
)
9190 && !VECTOR_FLOAT_TYPE_P (valtype
))
9192 if ((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
9193 && !COMPLEX_MODE_P (mode
))
9194 regno
= FIRST_SSE_REG
;
9198 if (mode
== SFmode
|| mode
== DFmode
)
9199 regno
= FIRST_SSE_REG
;
9205 return gen_rtx_REG (orig_mode
, regno
);
9209 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
9210 machine_mode orig_mode
, machine_mode mode
)
9212 const_tree fn
, fntype
;
9215 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
9216 fn
= fntype_or_decl
;
9217 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
9219 if ((valtype
&& POINTER_BOUNDS_TYPE_P (valtype
))
9220 || POINTER_BOUNDS_MODE_P (mode
))
9221 return gen_rtx_REG (BNDmode
, FIRST_BND_REG
);
9222 else if (TARGET_64BIT
&& ix86_function_type_abi (fntype
) == MS_ABI
)
9223 return function_value_ms_64 (orig_mode
, mode
, valtype
);
9224 else if (TARGET_64BIT
)
9225 return function_value_64 (orig_mode
, mode
, valtype
);
9227 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
9231 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
, bool)
9233 machine_mode mode
, orig_mode
;
9235 orig_mode
= TYPE_MODE (valtype
);
9236 mode
= type_natural_mode (valtype
, NULL
, true);
9237 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
9240 /* Return an RTX representing a place where a function returns
9241 or recieves pointer bounds or NULL if no bounds are returned.
9243 VALTYPE is a data type of a value returned by the function.
9245 FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL
9246 or FUNCTION_TYPE of the function.
9248 If OUTGOING is false, return a place in which the caller will
9249 see the return value. Otherwise, return a place where a
9250 function returns a value. */
9253 ix86_function_value_bounds (const_tree valtype
,
9254 const_tree fntype_or_decl ATTRIBUTE_UNUSED
,
9255 bool outgoing ATTRIBUTE_UNUSED
)
9259 if (BOUNDED_TYPE_P (valtype
))
9260 res
= gen_rtx_REG (BNDmode
, FIRST_BND_REG
);
9261 else if (chkp_type_has_pointer (valtype
))
9266 unsigned i
, bnd_no
= 0;
9268 bitmap_obstack_initialize (NULL
);
9269 slots
= BITMAP_ALLOC (NULL
);
9270 chkp_find_bound_slots (valtype
, slots
);
9272 EXECUTE_IF_SET_IN_BITMAP (slots
, 0, i
, bi
)
9274 rtx reg
= gen_rtx_REG (BNDmode
, FIRST_BND_REG
+ bnd_no
);
9275 rtx offs
= GEN_INT (i
* POINTER_SIZE
/ BITS_PER_UNIT
);
9276 gcc_assert (bnd_no
< 2);
9277 bounds
[bnd_no
++] = gen_rtx_EXPR_LIST (VOIDmode
, reg
, offs
);
9280 res
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (bnd_no
, bounds
));
9282 BITMAP_FREE (slots
);
9283 bitmap_obstack_release (NULL
);
9291 /* Pointer function arguments and return values are promoted to
9292 word_mode for normal functions. */
9295 ix86_promote_function_mode (const_tree type
, machine_mode mode
,
9296 int *punsignedp
, const_tree fntype
,
9299 if (cfun
->machine
->func_type
== TYPE_NORMAL
9300 && type
!= NULL_TREE
9301 && POINTER_TYPE_P (type
))
9303 *punsignedp
= POINTERS_EXTEND_UNSIGNED
;
9306 return default_promote_function_mode (type
, mode
, punsignedp
, fntype
,
9310 /* Return true if a structure, union or array with MODE containing FIELD
9311 should be accessed using BLKmode. */
9314 ix86_member_type_forces_blk (const_tree field
, machine_mode mode
)
9316 /* Union with XFmode must be in BLKmode. */
9317 return (mode
== XFmode
9318 && (TREE_CODE (DECL_FIELD_CONTEXT (field
)) == UNION_TYPE
9319 || TREE_CODE (DECL_FIELD_CONTEXT (field
)) == QUAL_UNION_TYPE
));
9323 ix86_libcall_value (machine_mode mode
)
9325 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
9328 /* Return true iff type is returned in memory. */
9331 ix86_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
9333 #ifdef SUBTARGET_RETURN_IN_MEMORY
9334 return SUBTARGET_RETURN_IN_MEMORY (type
, fntype
);
9336 const machine_mode mode
= type_natural_mode (type
, NULL
, true);
9339 if (POINTER_BOUNDS_TYPE_P (type
))
9344 if (ix86_function_type_abi (fntype
) == MS_ABI
)
9346 size
= int_size_in_bytes (type
);
9348 /* __m128 is returned in xmm0. */
9349 if ((!type
|| VECTOR_INTEGER_TYPE_P (type
)
9350 || INTEGRAL_TYPE_P (type
)
9351 || VECTOR_FLOAT_TYPE_P (type
))
9352 && (SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
9353 && !COMPLEX_MODE_P (mode
)
9354 && (GET_MODE_SIZE (mode
) == 16 || size
== 16))
9357 /* Otherwise, the size must be exactly in [1248]. */
9358 return size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8;
9362 int needed_intregs
, needed_sseregs
;
9364 return examine_argument (mode
, type
, 1,
9365 &needed_intregs
, &needed_sseregs
);
9370 size
= int_size_in_bytes (type
);
9372 /* Intel MCU psABI returns scalars and aggregates no larger than 8
9373 bytes in registers. */
9375 return VECTOR_MODE_P (mode
) || size
< 0 || size
> 8;
9377 if (mode
== BLKmode
)
9380 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
9383 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
9385 /* User-created vectors small enough to fit in EAX. */
9389 /* Unless ABI prescibes otherwise,
9390 MMX/3dNow values are returned in MM0 if available. */
9393 return TARGET_VECT8_RETURNS
|| !TARGET_MMX
;
9395 /* SSE values are returned in XMM0 if available. */
9399 /* AVX values are returned in YMM0 if available. */
9403 /* AVX512F values are returned in ZMM0 if available. */
9405 return !TARGET_AVX512F
;
9414 /* OImode shouldn't be used directly. */
9415 gcc_assert (mode
!= OImode
);
9423 /* Create the va_list data type. */
9426 ix86_build_builtin_va_list_64 (void)
9428 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
9430 record
= lang_hooks
.types
.make_type (RECORD_TYPE
);
9431 type_decl
= build_decl (BUILTINS_LOCATION
,
9432 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
9434 f_gpr
= build_decl (BUILTINS_LOCATION
,
9435 FIELD_DECL
, get_identifier ("gp_offset"),
9436 unsigned_type_node
);
9437 f_fpr
= build_decl (BUILTINS_LOCATION
,
9438 FIELD_DECL
, get_identifier ("fp_offset"),
9439 unsigned_type_node
);
9440 f_ovf
= build_decl (BUILTINS_LOCATION
,
9441 FIELD_DECL
, get_identifier ("overflow_arg_area"),
9443 f_sav
= build_decl (BUILTINS_LOCATION
,
9444 FIELD_DECL
, get_identifier ("reg_save_area"),
9447 va_list_gpr_counter_field
= f_gpr
;
9448 va_list_fpr_counter_field
= f_fpr
;
9450 DECL_FIELD_CONTEXT (f_gpr
) = record
;
9451 DECL_FIELD_CONTEXT (f_fpr
) = record
;
9452 DECL_FIELD_CONTEXT (f_ovf
) = record
;
9453 DECL_FIELD_CONTEXT (f_sav
) = record
;
9455 TYPE_STUB_DECL (record
) = type_decl
;
9456 TYPE_NAME (record
) = type_decl
;
9457 TYPE_FIELDS (record
) = f_gpr
;
9458 DECL_CHAIN (f_gpr
) = f_fpr
;
9459 DECL_CHAIN (f_fpr
) = f_ovf
;
9460 DECL_CHAIN (f_ovf
) = f_sav
;
9462 layout_type (record
);
9464 TYPE_ATTRIBUTES (record
) = tree_cons (get_identifier ("sysv_abi va_list"),
9465 NULL_TREE
, TYPE_ATTRIBUTES (record
));
9467 /* The correct type is an array type of one element. */
9468 return build_array_type (record
, build_index_type (size_zero_node
));
9471 /* Setup the builtin va_list data type and for 64-bit the additional
9472 calling convention specific va_list data types. */
9475 ix86_build_builtin_va_list (void)
9479 /* Initialize ABI specific va_list builtin types.
9481 In lto1, we can encounter two va_list types:
9482 - one as a result of the type-merge across TUs, and
9483 - the one constructed here.
9484 These two types will not have the same TYPE_MAIN_VARIANT, and therefore
9485 a type identity check in canonical_va_list_type based on
9486 TYPE_MAIN_VARIANT (which we used to have) will not work.
9487 Instead, we tag each va_list_type_node with its unique attribute, and
9488 look for the attribute in the type identity check in
9489 canonical_va_list_type.
9491 Tagging sysv_va_list_type_node directly with the attribute is
9492 problematic since it's a array of one record, which will degrade into a
9493 pointer to record when used as parameter (see build_va_arg comments for
9494 an example), dropping the attribute in the process. So we tag the
9497 /* For SYSV_ABI we use an array of one record. */
9498 sysv_va_list_type_node
= ix86_build_builtin_va_list_64 ();
9500 /* For MS_ABI we use plain pointer to argument area. */
9501 tree char_ptr_type
= build_pointer_type (char_type_node
);
9502 tree attr
= tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE
,
9503 TYPE_ATTRIBUTES (char_ptr_type
));
9504 ms_va_list_type_node
= build_type_attribute_variant (char_ptr_type
, attr
);
9506 return ((ix86_abi
== MS_ABI
)
9507 ? ms_va_list_type_node
9508 : sysv_va_list_type_node
);
9512 /* For i386 we use plain pointer to argument area. */
9513 return build_pointer_type (char_type_node
);
9517 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
9520 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
9526 /* GPR size of varargs save area. */
9527 if (cfun
->va_list_gpr_size
)
9528 ix86_varargs_gpr_size
= X86_64_REGPARM_MAX
* UNITS_PER_WORD
;
9530 ix86_varargs_gpr_size
= 0;
9532 /* FPR size of varargs save area. We don't need it if we don't pass
9533 anything in SSE registers. */
9534 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
9535 ix86_varargs_fpr_size
= X86_64_SSE_REGPARM_MAX
* 16;
9537 ix86_varargs_fpr_size
= 0;
9539 if (! ix86_varargs_gpr_size
&& ! ix86_varargs_fpr_size
)
9542 save_area
= frame_pointer_rtx
;
9543 set
= get_varargs_alias_set ();
9545 max
= cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
9546 if (max
> X86_64_REGPARM_MAX
)
9547 max
= X86_64_REGPARM_MAX
;
9549 for (i
= cum
->regno
; i
< max
; i
++)
9551 mem
= gen_rtx_MEM (word_mode
,
9552 plus_constant (Pmode
, save_area
, i
* UNITS_PER_WORD
));
9553 MEM_NOTRAP_P (mem
) = 1;
9554 set_mem_alias_set (mem
, set
);
9555 emit_move_insn (mem
,
9556 gen_rtx_REG (word_mode
,
9557 x86_64_int_parameter_registers
[i
]));
9560 if (ix86_varargs_fpr_size
)
9563 rtx_code_label
*label
;
9566 /* Now emit code to save SSE registers. The AX parameter contains number
9567 of SSE parameter registers used to call this function, though all we
9568 actually check here is the zero/non-zero status. */
9570 label
= gen_label_rtx ();
9571 test
= gen_rtx_EQ (VOIDmode
, gen_rtx_REG (QImode
, AX_REG
), const0_rtx
);
9572 emit_jump_insn (gen_cbranchqi4 (test
, XEXP (test
, 0), XEXP (test
, 1),
9575 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
9576 we used movdqa (i.e. TImode) instead? Perhaps even better would
9577 be if we could determine the real mode of the data, via a hook
9578 into pass_stdarg. Ignore all that for now. */
9580 if (crtl
->stack_alignment_needed
< GET_MODE_ALIGNMENT (smode
))
9581 crtl
->stack_alignment_needed
= GET_MODE_ALIGNMENT (smode
);
9583 max
= cum
->sse_regno
+ cfun
->va_list_fpr_size
/ 16;
9584 if (max
> X86_64_SSE_REGPARM_MAX
)
9585 max
= X86_64_SSE_REGPARM_MAX
;
9587 for (i
= cum
->sse_regno
; i
< max
; ++i
)
9589 mem
= plus_constant (Pmode
, save_area
,
9590 i
* 16 + ix86_varargs_gpr_size
);
9591 mem
= gen_rtx_MEM (smode
, mem
);
9592 MEM_NOTRAP_P (mem
) = 1;
9593 set_mem_alias_set (mem
, set
);
9594 set_mem_align (mem
, GET_MODE_ALIGNMENT (smode
));
9596 emit_move_insn (mem
, gen_rtx_REG (smode
, SSE_REGNO (i
)));
9604 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
9606 alias_set_type set
= get_varargs_alias_set ();
9609 /* Reset to zero, as there might be a sysv vaarg used
9611 ix86_varargs_gpr_size
= 0;
9612 ix86_varargs_fpr_size
= 0;
9614 for (i
= cum
->regno
; i
< X86_64_MS_REGPARM_MAX
; i
++)
9618 mem
= gen_rtx_MEM (Pmode
,
9619 plus_constant (Pmode
, virtual_incoming_args_rtx
,
9620 i
* UNITS_PER_WORD
));
9621 MEM_NOTRAP_P (mem
) = 1;
9622 set_mem_alias_set (mem
, set
);
9624 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
9625 emit_move_insn (mem
, reg
);
9630 ix86_setup_incoming_varargs (cumulative_args_t cum_v
, machine_mode mode
,
9631 tree type
, int *, int no_rtl
)
9633 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
9634 CUMULATIVE_ARGS next_cum
;
9637 /* This argument doesn't appear to be used anymore. Which is good,
9638 because the old code here didn't suppress rtl generation. */
9639 gcc_assert (!no_rtl
);
9644 fntype
= TREE_TYPE (current_function_decl
);
9646 /* For varargs, we do not want to skip the dummy va_dcl argument.
9647 For stdargs, we do want to skip the last named argument. */
9649 if (stdarg_p (fntype
))
9650 ix86_function_arg_advance (pack_cumulative_args (&next_cum
), mode
, type
,
9653 if (cum
->call_abi
== MS_ABI
)
9654 setup_incoming_varargs_ms_64 (&next_cum
);
9656 setup_incoming_varargs_64 (&next_cum
);
9660 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v
,
9663 int *pretend_size ATTRIBUTE_UNUSED
,
9666 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
9667 CUMULATIVE_ARGS next_cum
;
9670 int bnd_reg
, i
, max
;
9672 gcc_assert (!no_rtl
);
9674 /* Do nothing if we use plain pointer to argument area. */
9675 if (!TARGET_64BIT
|| cum
->call_abi
== MS_ABI
)
9678 fntype
= TREE_TYPE (current_function_decl
);
9680 /* For varargs, we do not want to skip the dummy va_dcl argument.
9681 For stdargs, we do want to skip the last named argument. */
9683 if (stdarg_p (fntype
))
9684 ix86_function_arg_advance (pack_cumulative_args (&next_cum
), mode
, type
,
9686 save_area
= frame_pointer_rtx
;
9688 max
= cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
9689 if (max
> X86_64_REGPARM_MAX
)
9690 max
= X86_64_REGPARM_MAX
;
9692 bnd_reg
= cum
->bnd_regno
+ cum
->force_bnd_pass
;
9693 if (chkp_function_instrumented_p (current_function_decl
))
9694 for (i
= cum
->regno
; i
< max
; i
++)
9696 rtx addr
= plus_constant (Pmode
, save_area
, i
* UNITS_PER_WORD
);
9697 rtx ptr
= gen_rtx_REG (Pmode
,
9698 x86_64_int_parameter_registers
[i
]);
9701 if (bnd_reg
<= LAST_BND_REG
)
9702 bounds
= gen_rtx_REG (BNDmode
, bnd_reg
);
9706 plus_constant (Pmode
, arg_pointer_rtx
,
9707 (LAST_BND_REG
- bnd_reg
) * GET_MODE_SIZE (Pmode
));
9708 bounds
= gen_reg_rtx (BNDmode
);
9709 emit_insn (BNDmode
== BND64mode
9710 ? gen_bnd64_ldx (bounds
, ldx_addr
, ptr
)
9711 : gen_bnd32_ldx (bounds
, ldx_addr
, ptr
));
9714 emit_insn (BNDmode
== BND64mode
9715 ? gen_bnd64_stx (addr
, ptr
, bounds
)
9716 : gen_bnd32_stx (addr
, ptr
, bounds
));
9723 /* Checks if TYPE is of kind va_list char *. */
9726 is_va_list_char_pointer (tree type
)
9730 /* For 32-bit it is always true. */
9733 canonic
= ix86_canonical_va_list_type (type
);
9734 return (canonic
== ms_va_list_type_node
9735 || (ix86_abi
== MS_ABI
&& canonic
== va_list_type_node
));
9738 /* Implement va_start. */
9741 ix86_va_start (tree valist
, rtx nextarg
)
9743 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
9744 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
9745 tree gpr
, fpr
, ovf
, sav
, t
;
9749 if (flag_split_stack
9750 && cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
9752 unsigned int scratch_regno
;
9754 /* When we are splitting the stack, we can't refer to the stack
9755 arguments using internal_arg_pointer, because they may be on
9756 the old stack. The split stack prologue will arrange to
9757 leave a pointer to the old stack arguments in a scratch
9758 register, which we here copy to a pseudo-register. The split
9759 stack prologue can't set the pseudo-register directly because
9760 it (the prologue) runs before any registers have been saved. */
9762 scratch_regno
= split_stack_prologue_scratch_regno ();
9763 if (scratch_regno
!= INVALID_REGNUM
)
9768 reg
= gen_reg_rtx (Pmode
);
9769 cfun
->machine
->split_stack_varargs_pointer
= reg
;
9772 emit_move_insn (reg
, gen_rtx_REG (Pmode
, scratch_regno
));
9776 push_topmost_sequence ();
9777 emit_insn_after (seq
, entry_of_function ());
9778 pop_topmost_sequence ();
9782 /* Only 64bit target needs something special. */
9783 if (is_va_list_char_pointer (TREE_TYPE (valist
)))
9785 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
9786 std_expand_builtin_va_start (valist
, nextarg
);
9791 va_r
= expand_expr (valist
, NULL_RTX
, VOIDmode
, EXPAND_WRITE
);
9792 next
= expand_binop (ptr_mode
, add_optab
,
9793 cfun
->machine
->split_stack_varargs_pointer
,
9794 crtl
->args
.arg_offset_rtx
,
9795 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
9796 convert_move (va_r
, next
, 0);
9798 /* Store zero bounds for va_list. */
9799 if (chkp_function_instrumented_p (current_function_decl
))
9800 chkp_expand_bounds_reset_for_mem (valist
,
9801 make_tree (TREE_TYPE (valist
),
9808 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
9809 f_fpr
= DECL_CHAIN (f_gpr
);
9810 f_ovf
= DECL_CHAIN (f_fpr
);
9811 f_sav
= DECL_CHAIN (f_ovf
);
9813 valist
= build_simple_mem_ref (valist
);
9814 TREE_TYPE (valist
) = TREE_TYPE (sysv_va_list_type_node
);
9815 /* The following should be folded into the MEM_REF offset. */
9816 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), unshare_expr (valist
),
9818 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
9820 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
9822 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
9825 /* Count number of gp and fp argument registers used. */
9826 words
= crtl
->args
.info
.words
;
9827 n_gpr
= crtl
->args
.info
.regno
;
9828 n_fpr
= crtl
->args
.info
.sse_regno
;
9830 if (cfun
->va_list_gpr_size
)
9832 type
= TREE_TYPE (gpr
);
9833 t
= build2 (MODIFY_EXPR
, type
,
9834 gpr
, build_int_cst (type
, n_gpr
* 8));
9835 TREE_SIDE_EFFECTS (t
) = 1;
9836 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
9839 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
9841 type
= TREE_TYPE (fpr
);
9842 t
= build2 (MODIFY_EXPR
, type
, fpr
,
9843 build_int_cst (type
, n_fpr
* 16 + 8*X86_64_REGPARM_MAX
));
9844 TREE_SIDE_EFFECTS (t
) = 1;
9845 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
9848 /* Find the overflow area. */
9849 type
= TREE_TYPE (ovf
);
9850 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
9851 ovf_rtx
= crtl
->args
.internal_arg_pointer
;
9853 ovf_rtx
= cfun
->machine
->split_stack_varargs_pointer
;
9854 t
= make_tree (type
, ovf_rtx
);
9856 t
= fold_build_pointer_plus_hwi (t
, words
* UNITS_PER_WORD
);
9858 /* Store zero bounds for overflow area pointer. */
9859 if (chkp_function_instrumented_p (current_function_decl
))
9860 chkp_expand_bounds_reset_for_mem (ovf
, t
);
9862 t
= build2 (MODIFY_EXPR
, type
, ovf
, t
);
9863 TREE_SIDE_EFFECTS (t
) = 1;
9864 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
9866 if (ix86_varargs_gpr_size
|| ix86_varargs_fpr_size
)
9868 /* Find the register save area.
9869 Prologue of the function save it right above stack frame. */
9870 type
= TREE_TYPE (sav
);
9871 t
= make_tree (type
, frame_pointer_rtx
);
9872 if (!ix86_varargs_gpr_size
)
9873 t
= fold_build_pointer_plus_hwi (t
, -8 * X86_64_REGPARM_MAX
);
9875 /* Store zero bounds for save area pointer. */
9876 if (chkp_function_instrumented_p (current_function_decl
))
9877 chkp_expand_bounds_reset_for_mem (sav
, t
);
9879 t
= build2 (MODIFY_EXPR
, type
, sav
, t
);
9880 TREE_SIDE_EFFECTS (t
) = 1;
9881 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
9885 /* Implement va_arg. */
9888 ix86_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
9891 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
9892 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
9893 tree gpr
, fpr
, ovf
, sav
, t
;
9895 tree lab_false
, lab_over
= NULL_TREE
;
9900 machine_mode nat_mode
;
9901 unsigned int arg_boundary
;
9903 /* Only 64bit target needs something special. */
9904 if (is_va_list_char_pointer (TREE_TYPE (valist
)))
9905 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
9907 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
9908 f_fpr
= DECL_CHAIN (f_gpr
);
9909 f_ovf
= DECL_CHAIN (f_fpr
);
9910 f_sav
= DECL_CHAIN (f_ovf
);
9912 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
),
9913 valist
, f_gpr
, NULL_TREE
);
9915 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
9916 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
9917 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
9919 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
9921 type
= build_pointer_type (type
);
9922 size
= arg_int_size_in_bytes (type
);
9923 rsize
= CEIL (size
, UNITS_PER_WORD
);
9925 nat_mode
= type_natural_mode (type
, NULL
, false);
9940 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
9941 if (!TARGET_64BIT_MS_ABI
)
9949 container
= construct_container (nat_mode
, TYPE_MODE (type
),
9950 type
, 0, X86_64_REGPARM_MAX
,
9951 X86_64_SSE_REGPARM_MAX
, intreg
,
9956 /* Pull the value out of the saved registers. */
9958 addr
= create_tmp_var (ptr_type_node
, "addr");
9962 int needed_intregs
, needed_sseregs
;
9964 tree int_addr
, sse_addr
;
9966 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
9967 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
9969 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
9971 need_temp
= (!REG_P (container
)
9972 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
9973 || TYPE_ALIGN (type
) > 128));
9975 /* In case we are passing structure, verify that it is consecutive block
9976 on the register save area. If not we need to do moves. */
9977 if (!need_temp
&& !REG_P (container
))
9979 /* Verify that all registers are strictly consecutive */
9980 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
9984 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
9986 rtx slot
= XVECEXP (container
, 0, i
);
9987 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
9988 || INTVAL (XEXP (slot
, 1)) != i
* 16)
9996 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
9998 rtx slot
= XVECEXP (container
, 0, i
);
9999 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
10000 || INTVAL (XEXP (slot
, 1)) != i
* 8)
10012 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
10013 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
10016 /* First ensure that we fit completely in registers. */
10017 if (needed_intregs
)
10019 t
= build_int_cst (TREE_TYPE (gpr
),
10020 (X86_64_REGPARM_MAX
- needed_intregs
+ 1) * 8);
10021 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
10022 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
10023 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
10024 gimplify_and_add (t
, pre_p
);
10026 if (needed_sseregs
)
10028 t
= build_int_cst (TREE_TYPE (fpr
),
10029 (X86_64_SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
10030 + X86_64_REGPARM_MAX
* 8);
10031 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
10032 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
10033 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
10034 gimplify_and_add (t
, pre_p
);
10037 /* Compute index to start of area used for integer regs. */
10038 if (needed_intregs
)
10040 /* int_addr = gpr + sav; */
10041 t
= fold_build_pointer_plus (sav
, gpr
);
10042 gimplify_assign (int_addr
, t
, pre_p
);
10044 if (needed_sseregs
)
10046 /* sse_addr = fpr + sav; */
10047 t
= fold_build_pointer_plus (sav
, fpr
);
10048 gimplify_assign (sse_addr
, t
, pre_p
);
10052 int i
, prev_size
= 0;
10053 tree temp
= create_tmp_var (type
, "va_arg_tmp");
10055 /* addr = &temp; */
10056 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
10057 gimplify_assign (addr
, t
, pre_p
);
10059 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
10061 rtx slot
= XVECEXP (container
, 0, i
);
10062 rtx reg
= XEXP (slot
, 0);
10063 machine_mode mode
= GET_MODE (reg
);
10067 tree src_addr
, src
;
10069 tree dest_addr
, dest
;
10070 int cur_size
= GET_MODE_SIZE (mode
);
10072 gcc_assert (prev_size
<= INTVAL (XEXP (slot
, 1)));
10073 prev_size
= INTVAL (XEXP (slot
, 1));
10074 if (prev_size
+ cur_size
> size
)
10076 cur_size
= size
- prev_size
;
10077 unsigned int nbits
= cur_size
* BITS_PER_UNIT
;
10078 if (!int_mode_for_size (nbits
, 1).exists (&mode
))
10081 piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
10082 if (mode
== GET_MODE (reg
))
10083 addr_type
= build_pointer_type (piece_type
);
10085 addr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
10087 daddr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
10090 if (SSE_REGNO_P (REGNO (reg
)))
10092 src_addr
= sse_addr
;
10093 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
10097 src_addr
= int_addr
;
10098 src_offset
= REGNO (reg
) * 8;
10100 src_addr
= fold_convert (addr_type
, src_addr
);
10101 src_addr
= fold_build_pointer_plus_hwi (src_addr
, src_offset
);
10103 dest_addr
= fold_convert (daddr_type
, addr
);
10104 dest_addr
= fold_build_pointer_plus_hwi (dest_addr
, prev_size
);
10105 if (cur_size
== GET_MODE_SIZE (mode
))
10107 src
= build_va_arg_indirect_ref (src_addr
);
10108 dest
= build_va_arg_indirect_ref (dest_addr
);
10110 gimplify_assign (dest
, src
, pre_p
);
10115 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY
),
10116 3, dest_addr
, src_addr
,
10117 size_int (cur_size
));
10118 gimplify_and_add (copy
, pre_p
);
10120 prev_size
+= cur_size
;
10124 if (needed_intregs
)
10126 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
10127 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
10128 gimplify_assign (gpr
, t
, pre_p
);
10131 if (needed_sseregs
)
10133 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
10134 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
10135 gimplify_assign (unshare_expr (fpr
), t
, pre_p
);
10138 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
10140 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
10143 /* ... otherwise out of the overflow area. */
10145 /* When we align parameter on stack for caller, if the parameter
10146 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
10147 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
10148 here with caller. */
10149 arg_boundary
= ix86_function_arg_boundary (VOIDmode
, type
);
10150 if ((unsigned int) arg_boundary
> MAX_SUPPORTED_STACK_ALIGNMENT
)
10151 arg_boundary
= MAX_SUPPORTED_STACK_ALIGNMENT
;
10153 /* Care for on-stack alignment if needed. */
10154 if (arg_boundary
<= 64 || size
== 0)
10158 HOST_WIDE_INT align
= arg_boundary
/ 8;
10159 t
= fold_build_pointer_plus_hwi (ovf
, align
- 1);
10160 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
10161 build_int_cst (TREE_TYPE (t
), -align
));
10164 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
10165 gimplify_assign (addr
, t
, pre_p
);
10167 t
= fold_build_pointer_plus_hwi (t
, rsize
* UNITS_PER_WORD
);
10168 gimplify_assign (unshare_expr (ovf
), t
, pre_p
);
10171 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
10173 ptrtype
= build_pointer_type_for_mode (type
, ptr_mode
, true);
10174 addr
= fold_convert (ptrtype
, addr
);
10177 addr
= build_va_arg_indirect_ref (addr
);
10178 return build_va_arg_indirect_ref (addr
);
10181 /* Return true if OPNUM's MEM should be matched
10182 in movabs* patterns. */
10185 ix86_check_movabs (rtx insn
, int opnum
)
10189 set
= PATTERN (insn
);
10190 if (GET_CODE (set
) == PARALLEL
)
10191 set
= XVECEXP (set
, 0, 0);
10192 gcc_assert (GET_CODE (set
) == SET
);
10193 mem
= XEXP (set
, opnum
);
10194 while (SUBREG_P (mem
))
10195 mem
= SUBREG_REG (mem
);
10196 gcc_assert (MEM_P (mem
));
10197 return volatile_ok
|| !MEM_VOLATILE_P (mem
);
10200 /* Return false if INSN contains a MEM with a non-default address space. */
10202 ix86_check_no_addr_space (rtx insn
)
10204 subrtx_var_iterator::array_type array
;
10205 FOR_EACH_SUBRTX_VAR (iter
, array
, PATTERN (insn
), ALL
)
10208 if (MEM_P (x
) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x
)))
10214 /* Initialize the table of extra 80387 mathematical constants. */
10217 init_ext_80387_constants (void)
10219 static const char * cst
[5] =
10221 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
10222 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
10223 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
10224 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
10225 "3.1415926535897932385128089594061862044", /* 4: fldpi */
10229 for (i
= 0; i
< 5; i
++)
10231 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
10232 /* Ensure each constant is rounded to XFmode precision. */
10233 real_convert (&ext_80387_constants_table
[i
],
10234 XFmode
, &ext_80387_constants_table
[i
]);
10237 ext_80387_constants_init
= 1;
10240 /* Return non-zero if the constant is something that
10241 can be loaded with a special instruction. */
10244 standard_80387_constant_p (rtx x
)
10246 machine_mode mode
= GET_MODE (x
);
10248 const REAL_VALUE_TYPE
*r
;
10250 if (!(CONST_DOUBLE_P (x
) && X87_FLOAT_MODE_P (mode
)))
10253 if (x
== CONST0_RTX (mode
))
10255 if (x
== CONST1_RTX (mode
))
10258 r
= CONST_DOUBLE_REAL_VALUE (x
);
10260 /* For XFmode constants, try to find a special 80387 instruction when
10261 optimizing for size or on those CPUs that benefit from them. */
10263 && (optimize_function_for_size_p (cfun
) || TARGET_EXT_80387_CONSTANTS
))
10267 if (! ext_80387_constants_init
)
10268 init_ext_80387_constants ();
10270 for (i
= 0; i
< 5; i
++)
10271 if (real_identical (r
, &ext_80387_constants_table
[i
]))
10275 /* Load of the constant -0.0 or -1.0 will be split as
10276 fldz;fchs or fld1;fchs sequence. */
10277 if (real_isnegzero (r
))
10279 if (real_identical (r
, &dconstm1
))
10285 /* Return the opcode of the special instruction to be used to load
10289 standard_80387_constant_opcode (rtx x
)
10291 switch (standard_80387_constant_p (x
))
10311 gcc_unreachable ();
10315 /* Return the CONST_DOUBLE representing the 80387 constant that is
10316 loaded by the specified special instruction. The argument IDX
10317 matches the return value from standard_80387_constant_p. */
10320 standard_80387_constant_rtx (int idx
)
10324 if (! ext_80387_constants_init
)
10325 init_ext_80387_constants ();
10338 gcc_unreachable ();
10341 return const_double_from_real_value (ext_80387_constants_table
[i
],
10345 /* Return 1 if X is all bits 0 and 2 if X is all bits 1
10346 in supported SSE/AVX vector mode. */
10349 standard_sse_constant_p (rtx x
, machine_mode pred_mode
)
10356 mode
= GET_MODE (x
);
10358 if (x
== const0_rtx
|| const0_operand (x
, mode
))
10361 if (x
== constm1_rtx
|| vector_all_ones_operand (x
, mode
))
10363 /* VOIDmode integer constant, get mode from the predicate. */
10364 if (mode
== VOIDmode
)
10367 switch (GET_MODE_SIZE (mode
))
10370 if (TARGET_AVX512F
)
10383 gcc_unreachable ();
10392 /* Return the opcode of the special instruction to be used to load
10393 the constant operands[1] into operands[0]. */
10396 standard_sse_constant_opcode (rtx_insn
*insn
, rtx
*operands
)
10399 rtx x
= operands
[1];
10401 gcc_assert (TARGET_SSE
);
10403 mode
= GET_MODE (x
);
10405 if (x
== const0_rtx
|| const0_operand (x
, mode
))
10407 switch (get_attr_mode (insn
))
10410 if (!EXT_REX_SSE_REG_P (operands
[0]))
10411 return "%vpxor\t%0, %d0";
10415 if (EXT_REX_SSE_REG_P (operands
[0]))
10416 return (TARGET_AVX512VL
10417 ? "vpxord\t%x0, %x0, %x0"
10418 : "vpxord\t%g0, %g0, %g0");
10419 return "vpxor\t%x0, %x0, %x0";
10422 if (!EXT_REX_SSE_REG_P (operands
[0]))
10423 return "%vxorpd\t%0, %d0";
10427 if (!EXT_REX_SSE_REG_P (operands
[0]))
10428 return "vxorpd\t%x0, %x0, %x0";
10429 else if (TARGET_AVX512DQ
)
10430 return (TARGET_AVX512VL
10431 ? "vxorpd\t%x0, %x0, %x0"
10432 : "vxorpd\t%g0, %g0, %g0");
10434 return (TARGET_AVX512VL
10435 ? "vpxorq\t%x0, %x0, %x0"
10436 : "vpxorq\t%g0, %g0, %g0");
10439 if (!EXT_REX_SSE_REG_P (operands
[0]))
10440 return "%vxorps\t%0, %d0";
10444 if (!EXT_REX_SSE_REG_P (operands
[0]))
10445 return "vxorps\t%x0, %x0, %x0";
10446 else if (TARGET_AVX512DQ
)
10447 return (TARGET_AVX512VL
10448 ? "vxorps\t%x0, %x0, %x0"
10449 : "vxorps\t%g0, %g0, %g0");
10451 return (TARGET_AVX512VL
10452 ? "vpxord\t%x0, %x0, %x0"
10453 : "vpxord\t%g0, %g0, %g0");
10456 gcc_unreachable ();
10459 else if (x
== constm1_rtx
|| vector_all_ones_operand (x
, mode
))
10461 enum attr_mode insn_mode
= get_attr_mode (insn
);
10468 gcc_assert (TARGET_AVX512F
);
10469 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
10474 gcc_assert (TARGET_AVX2
);
10479 gcc_assert (TARGET_SSE2
);
10480 if (!EXT_REX_SSE_REG_P (operands
[0]))
10482 ? "vpcmpeqd\t%0, %0, %0"
10483 : "pcmpeqd\t%0, %0");
10484 else if (TARGET_AVX512VL
)
10485 return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
10487 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
10490 gcc_unreachable ();
10494 gcc_unreachable ();
10497 /* Returns true if INSN can be transformed from a memory load
10498 to a supported FP constant load. */
10501 ix86_standard_x87sse_constant_load_p (const rtx_insn
*insn
, rtx dst
)
10503 rtx src
= find_constant_src (insn
);
10505 gcc_assert (REG_P (dst
));
10508 || (SSE_REGNO_P (REGNO (dst
))
10509 && standard_sse_constant_p (src
, GET_MODE (dst
)) != 1)
10510 || (STACK_REGNO_P (REGNO (dst
))
10511 && standard_80387_constant_p (src
) < 1))
10517 /* Returns true if OP contains a symbol reference */
10520 symbolic_reference_mentioned_p (rtx op
)
10525 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
10528 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
10529 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
10535 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
10536 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
10540 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
10547 /* Return true if it is appropriate to emit `ret' instructions in the
10548 body of a function. Do this only if the epilogue is simple, needing a
10549 couple of insns. Prior to reloading, we can't tell how many registers
10550 must be saved, so return false then. Return false if there is no frame
10551 marker to de-allocate. */
10554 ix86_can_use_return_insn_p (void)
10556 if (ix86_function_naked (current_function_decl
))
10559 /* Don't use `ret' instruction in interrupt handler. */
10560 if (! reload_completed
10561 || frame_pointer_needed
10562 || cfun
->machine
->func_type
!= TYPE_NORMAL
)
10565 /* Don't allow more than 32k pop, since that's all we can do
10566 with one instruction. */
10567 if (crtl
->args
.pops_args
&& crtl
->args
.size
>= 32768)
10570 struct ix86_frame
&frame
= cfun
->machine
->frame
;
10571 return (frame
.stack_pointer_offset
== UNITS_PER_WORD
10572 && (frame
.nregs
+ frame
.nsseregs
) == 0);
10575 /* Value should be nonzero if functions must have frame pointers.
10576 Zero means the frame pointer need not be set up (and parms may
10577 be accessed via the stack pointer) in functions that seem suitable. */
10580 ix86_frame_pointer_required (void)
10582 /* If we accessed previous frames, then the generated code expects
10583 to be able to access the saved ebp value in our frame. */
10584 if (cfun
->machine
->accesses_prev_frame
)
10587 /* Several x86 os'es need a frame pointer for other reasons,
10588 usually pertaining to setjmp. */
10589 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
10592 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
10593 if (TARGET_32BIT_MS_ABI
&& cfun
->calls_setjmp
)
10596 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
10597 allocation is 4GB. */
10598 if (TARGET_64BIT_MS_ABI
&& get_frame_size () > SEH_MAX_FRAME_SIZE
)
10601 /* SSE saves require frame-pointer when stack is misaligned. */
10602 if (TARGET_64BIT_MS_ABI
&& ix86_incoming_stack_boundary
< 128)
10605 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
10606 turns off the frame pointer by default. Turn it back on now if
10607 we've not got a leaf function. */
10608 if (TARGET_OMIT_LEAF_FRAME_POINTER
10610 || ix86_current_function_calls_tls_descriptor
))
10613 if (crtl
->profile
&& !flag_fentry
)
10619 /* Record that the current function accesses previous call frames. */
10622 ix86_setup_frame_addresses (void)
10624 cfun
->machine
->accesses_prev_frame
= 1;
10627 #ifndef USE_HIDDEN_LINKONCE
10628 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
10629 # define USE_HIDDEN_LINKONCE 1
10631 # define USE_HIDDEN_LINKONCE 0
10635 static int pic_labels_used
;
10637 /* Fills in the label name that should be used for a pc thunk for
10638 the given register. */
10641 get_pc_thunk_name (char name
[32], unsigned int regno
)
10643 gcc_assert (!TARGET_64BIT
);
10645 if (USE_HIDDEN_LINKONCE
)
10646 sprintf (name
, "__x86.get_pc_thunk.%s", reg_names
[regno
]);
10648 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
10652 /* This function generates code for -fpic that loads %ebx with
10653 the return address of the caller and then returns. */
10656 ix86_code_end (void)
10661 for (regno
= FIRST_INT_REG
; regno
<= LAST_INT_REG
; regno
++)
10666 if (!(pic_labels_used
& (1 << regno
)))
10669 get_pc_thunk_name (name
, regno
);
10671 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
10672 get_identifier (name
),
10673 build_function_type_list (void_type_node
, NULL_TREE
));
10674 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
10675 NULL_TREE
, void_type_node
);
10676 TREE_PUBLIC (decl
) = 1;
10677 TREE_STATIC (decl
) = 1;
10678 DECL_IGNORED_P (decl
) = 1;
10683 switch_to_section (darwin_sections
[picbase_thunk_section
]);
10684 fputs ("\t.weak_definition\t", asm_out_file
);
10685 assemble_name (asm_out_file
, name
);
10686 fputs ("\n\t.private_extern\t", asm_out_file
);
10687 assemble_name (asm_out_file
, name
);
10688 putc ('\n', asm_out_file
);
10689 ASM_OUTPUT_LABEL (asm_out_file
, name
);
10690 DECL_WEAK (decl
) = 1;
10694 if (USE_HIDDEN_LINKONCE
)
10696 cgraph_node::create (decl
)->set_comdat_group (DECL_ASSEMBLER_NAME (decl
));
10698 targetm
.asm_out
.unique_section (decl
, 0);
10699 switch_to_section (get_named_section (decl
, NULL
, 0));
10701 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
10702 fputs ("\t.hidden\t", asm_out_file
);
10703 assemble_name (asm_out_file
, name
);
10704 putc ('\n', asm_out_file
);
10705 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
10709 switch_to_section (text_section
);
10710 ASM_OUTPUT_LABEL (asm_out_file
, name
);
10713 DECL_INITIAL (decl
) = make_node (BLOCK
);
10714 current_function_decl
= decl
;
10715 allocate_struct_function (decl
, false);
10716 init_function_start (decl
);
10717 /* We're about to hide the function body from callees of final_* by
10718 emitting it directly; tell them we're a thunk, if they care. */
10719 cfun
->is_thunk
= true;
10720 first_function_block_is_cold
= false;
10721 /* Make sure unwind info is emitted for the thunk if needed. */
10722 final_start_function (emit_barrier (), asm_out_file
, 1);
10724 /* Pad stack IP move with 4 instructions (two NOPs count
10725 as one instruction). */
10726 if (TARGET_PAD_SHORT_FUNCTION
)
10731 fputs ("\tnop\n", asm_out_file
);
10734 xops
[0] = gen_rtx_REG (Pmode
, regno
);
10735 xops
[1] = gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
10736 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops
);
10737 output_asm_insn ("%!ret", NULL
);
10738 final_end_function ();
10739 init_insn_lengths ();
10740 free_after_compilation (cfun
);
10742 current_function_decl
= NULL
;
10745 if (flag_split_stack
)
10746 file_end_indicate_split_stack ();
10749 /* Emit code for the SET_GOT patterns. */
10752 output_set_got (rtx dest
, rtx label
)
10758 if (TARGET_VXWORKS_RTP
&& flag_pic
)
10760 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
10761 xops
[2] = gen_rtx_MEM (Pmode
,
10762 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
10763 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
10765 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
10766 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
10767 an unadorned address. */
10768 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
10769 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
10770 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
10774 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
10779 get_pc_thunk_name (name
, REGNO (dest
));
10780 pic_labels_used
|= 1 << REGNO (dest
);
10782 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
10783 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
10784 output_asm_insn ("%!call\t%X2", xops
);
10787 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
10788 This is what will be referenced by the Mach-O PIC subsystem. */
10789 if (machopic_should_output_picbase_label () || !label
)
10790 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
10792 /* When we are restoring the pic base at the site of a nonlocal label,
10793 and we decided to emit the pic base above, we will still output a
10794 local label used for calculating the correction offset (even though
10795 the offset will be 0 in that case). */
10797 targetm
.asm_out
.internal_label (asm_out_file
, "L",
10798 CODE_LABEL_NUMBER (label
));
10804 /* We don't need a pic base, we're not producing pic. */
10805 gcc_unreachable ();
10807 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
10808 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops
);
10809 targetm
.asm_out
.internal_label (asm_out_file
, "L",
10810 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
10814 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops
);
10819 /* Generate an "push" pattern for input ARG. */
10824 struct machine_function
*m
= cfun
->machine
;
10826 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10827 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
10828 m
->fs
.sp_offset
+= UNITS_PER_WORD
;
10830 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
10831 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
10833 return gen_rtx_SET (gen_rtx_MEM (word_mode
,
10834 gen_rtx_PRE_DEC (Pmode
,
10835 stack_pointer_rtx
)),
10839 /* Generate an "pop" pattern for input ARG. */
10844 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
10845 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
10847 return gen_rtx_SET (arg
,
10848 gen_rtx_MEM (word_mode
,
10849 gen_rtx_POST_INC (Pmode
,
10850 stack_pointer_rtx
)));
10853 /* Return >= 0 if there is an unused call-clobbered register available
10854 for the entire function. */
10856 static unsigned int
10857 ix86_select_alt_pic_regnum (void)
10859 if (ix86_use_pseudo_pic_reg ())
10860 return INVALID_REGNUM
;
10864 && !ix86_current_function_calls_tls_descriptor
)
10867 /* Can't use the same register for both PIC and DRAP. */
10868 if (crtl
->drap_reg
)
10869 drap
= REGNO (crtl
->drap_reg
);
10872 for (i
= 2; i
>= 0; --i
)
10873 if (i
!= drap
&& !df_regs_ever_live_p (i
))
10877 return INVALID_REGNUM
;
10880 /* Return true if REGNO is used by the epilogue. */
10883 ix86_epilogue_uses (int regno
)
10885 /* If there are no caller-saved registers, we preserve all registers,
10886 except for MMX and x87 registers which aren't supported when saving
10887 and restoring registers. Don't explicitly save SP register since
10888 it is always preserved. */
10889 return (epilogue_completed
10890 && cfun
->machine
->no_caller_saved_registers
10891 && !fixed_regs
[regno
]
10892 && !STACK_REGNO_P (regno
)
10893 && !MMX_REGNO_P (regno
));
10896 /* Return nonzero if register REGNO can be used as a scratch register
10900 ix86_hard_regno_scratch_ok (unsigned int regno
)
10902 /* If there are no caller-saved registers, we can't use any register
10903 as a scratch register after epilogue and use REGNO as scratch
10904 register only if it has been used before to avoid saving and
10906 return (!cfun
->machine
->no_caller_saved_registers
10907 || (!epilogue_completed
10908 && df_regs_ever_live_p (regno
)));
10911 /* Return true if register class CL should be an additional allocno
10915 ix86_additional_allocno_class_p (reg_class_t cl
)
10917 return cl
== MOD4_SSE_REGS
;
10920 /* Return TRUE if we need to save REGNO. */
10923 ix86_save_reg (unsigned int regno
, bool maybe_eh_return
, bool ignore_outlined
)
10925 /* If there are no caller-saved registers, we preserve all registers,
10926 except for MMX and x87 registers which aren't supported when saving
10927 and restoring registers. Don't explicitly save SP register since
10928 it is always preserved. */
10929 if (cfun
->machine
->no_caller_saved_registers
)
10931 /* Don't preserve registers used for function return value. */
10932 rtx reg
= crtl
->return_rtx
;
10935 unsigned int i
= REGNO (reg
);
10936 unsigned int nregs
= REG_NREGS (reg
);
10937 while (nregs
-- > 0)
10938 if ((i
+ nregs
) == regno
)
10941 reg
= crtl
->return_bnd
;
10945 nregs
= REG_NREGS (reg
);
10946 while (nregs
-- > 0)
10947 if ((i
+ nregs
) == regno
)
10952 return (df_regs_ever_live_p (regno
)
10953 && !fixed_regs
[regno
]
10954 && !STACK_REGNO_P (regno
)
10955 && !MMX_REGNO_P (regno
)
10956 && (regno
!= HARD_FRAME_POINTER_REGNUM
10957 || !frame_pointer_needed
));
10960 if (regno
== REAL_PIC_OFFSET_TABLE_REGNUM
10961 && pic_offset_table_rtx
)
10963 if (ix86_use_pseudo_pic_reg ())
10965 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
10966 _mcount in prologue. */
10967 if (!TARGET_64BIT
&& flag_pic
&& crtl
->profile
)
10970 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
10972 || crtl
->calls_eh_return
10973 || crtl
->uses_const_pool
10974 || cfun
->has_nonlocal_label
)
10975 return ix86_select_alt_pic_regnum () == INVALID_REGNUM
;
10978 if (crtl
->calls_eh_return
&& maybe_eh_return
)
10983 unsigned test
= EH_RETURN_DATA_REGNO (i
);
10984 if (test
== INVALID_REGNUM
)
10991 if (ignore_outlined
&& cfun
->machine
->call_ms2sysv
)
10993 unsigned count
= cfun
->machine
->call_ms2sysv_extra_regs
10994 + xlogue_layout::MIN_REGS
;
10995 if (xlogue_layout::is_stub_managed_reg (regno
, count
))
11000 && regno
== REGNO (crtl
->drap_reg
)
11001 && !cfun
->machine
->no_drap_save_restore
)
11004 return (df_regs_ever_live_p (regno
)
11005 && !call_used_regs
[regno
]
11006 && !fixed_regs
[regno
]
11007 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
11010 /* Return number of saved general prupose registers. */
11013 ix86_nsaved_regs (void)
11018 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
11019 if (GENERAL_REGNO_P (regno
) && ix86_save_reg (regno
, true, true))
11024 /* Return number of saved SSE registers. */
11027 ix86_nsaved_sseregs (void)
11032 if (!TARGET_64BIT_MS_ABI
)
11034 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
11035 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true, true))
11040 /* Given FROM and TO register numbers, say whether this elimination is
11041 allowed. If stack alignment is needed, we can only replace argument
11042 pointer with hard frame pointer, or replace frame pointer with stack
11043 pointer. Otherwise, frame pointer elimination is automatically
11044 handled and all other eliminations are valid. */
11047 ix86_can_eliminate (const int from
, const int to
)
11049 if (stack_realign_fp
)
11050 return ((from
== ARG_POINTER_REGNUM
11051 && to
== HARD_FRAME_POINTER_REGNUM
)
11052 || (from
== FRAME_POINTER_REGNUM
11053 && to
== STACK_POINTER_REGNUM
));
11055 return to
== STACK_POINTER_REGNUM
? !frame_pointer_needed
: true;
11058 /* Return the offset between two registers, one to be eliminated, and the other
11059 its replacement, at the start of a routine. */
11062 ix86_initial_elimination_offset (int from
, int to
)
11064 struct ix86_frame
&frame
= cfun
->machine
->frame
;
11066 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
11067 return frame
.hard_frame_pointer_offset
;
11068 else if (from
== FRAME_POINTER_REGNUM
11069 && to
== HARD_FRAME_POINTER_REGNUM
)
11070 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
11073 gcc_assert (to
== STACK_POINTER_REGNUM
);
11075 if (from
== ARG_POINTER_REGNUM
)
11076 return frame
.stack_pointer_offset
;
11078 gcc_assert (from
== FRAME_POINTER_REGNUM
);
11079 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
11083 /* In a dynamically-aligned function, we can't know the offset from
11084 stack pointer to frame pointer, so we must ensure that setjmp
11085 eliminates fp against the hard fp (%ebp) rather than trying to
11086 index from %esp up to the top of the frame across a gap that is
11087 of unknown (at compile-time) size. */
11089 ix86_builtin_setjmp_frame_value (void)
11091 return stack_realign_fp
? hard_frame_pointer_rtx
: virtual_stack_vars_rtx
;
11094 /* Emits a warning for unsupported msabi to sysv pro/epilogues. */
11095 static void warn_once_call_ms2sysv_xlogues (const char *feature
)
11097 static bool warned_once
= false;
11100 warning (0, "-mcall-ms2sysv-xlogues is not compatible with %s",
11102 warned_once
= true;
11106 /* When using -fsplit-stack, the allocation routines set a field in
11107 the TCB to the bottom of the stack plus this much space, measured
11110 #define SPLIT_STACK_AVAILABLE 256
11112 /* Fill structure ix86_frame about frame of currently computed function. */
11115 ix86_compute_frame_layout (void)
11117 struct ix86_frame
*frame
= &cfun
->machine
->frame
;
11118 struct machine_function
*m
= cfun
->machine
;
11119 unsigned HOST_WIDE_INT stack_alignment_needed
;
11120 HOST_WIDE_INT offset
;
11121 unsigned HOST_WIDE_INT preferred_alignment
;
11122 HOST_WIDE_INT size
= get_frame_size ();
11123 HOST_WIDE_INT to_allocate
;
11125 /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
11126 * ms_abi functions that call a sysv function. We now need to prune away
11127 * cases where it should be disabled. */
11128 if (TARGET_64BIT
&& m
->call_ms2sysv
)
11130 gcc_assert (TARGET_64BIT_MS_ABI
);
11131 gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES
);
11132 gcc_assert (!TARGET_SEH
);
11133 gcc_assert (TARGET_SSE
);
11134 gcc_assert (!ix86_using_red_zone ());
11136 if (crtl
->calls_eh_return
)
11138 gcc_assert (!reload_completed
);
11139 m
->call_ms2sysv
= false;
11140 warn_once_call_ms2sysv_xlogues ("__builtin_eh_return");
11143 else if (ix86_static_chain_on_stack
)
11145 gcc_assert (!reload_completed
);
11146 m
->call_ms2sysv
= false;
11147 warn_once_call_ms2sysv_xlogues ("static call chains");
11150 /* Finally, compute which registers the stub will manage. */
11153 unsigned count
= xlogue_layout::count_stub_managed_regs ();
11154 m
->call_ms2sysv_extra_regs
= count
- xlogue_layout::MIN_REGS
;
11155 m
->call_ms2sysv_pad_in
= 0;
11159 frame
->nregs
= ix86_nsaved_regs ();
11160 frame
->nsseregs
= ix86_nsaved_sseregs ();
11162 /* 64-bit MS ABI seem to require stack alignment to be always 16,
11163 except for function prologues, leaf functions and when the defult
11164 incoming stack boundary is overriden at command line or via
11165 force_align_arg_pointer attribute. */
11166 if ((TARGET_64BIT_MS_ABI
&& crtl
->preferred_stack_boundary
< 128)
11167 && (!crtl
->is_leaf
|| cfun
->calls_alloca
!= 0
11168 || ix86_current_function_calls_tls_descriptor
11169 || ix86_incoming_stack_boundary
< 128))
11171 crtl
->preferred_stack_boundary
= 128;
11172 crtl
->stack_alignment_needed
= 128;
11175 stack_alignment_needed
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
11176 preferred_alignment
= crtl
->preferred_stack_boundary
/ BITS_PER_UNIT
;
11178 gcc_assert (!size
|| stack_alignment_needed
);
11179 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
11180 gcc_assert (preferred_alignment
<= stack_alignment_needed
);
11182 /* The only ABI saving SSE regs should be 64-bit ms_abi. */
11183 gcc_assert (TARGET_64BIT
|| !frame
->nsseregs
);
11184 if (TARGET_64BIT
&& m
->call_ms2sysv
)
11186 gcc_assert (stack_alignment_needed
>= 16);
11187 gcc_assert (!frame
->nsseregs
);
11190 /* For SEH we have to limit the amount of code movement into the prologue.
11191 At present we do this via a BLOCKAGE, at which point there's very little
11192 scheduling that can be done, which means that there's very little point
11193 in doing anything except PUSHs. */
11195 m
->use_fast_prologue_epilogue
= false;
11196 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun
)))
11198 int count
= frame
->nregs
;
11199 struct cgraph_node
*node
= cgraph_node::get (current_function_decl
);
11201 /* The fast prologue uses move instead of push to save registers. This
11202 is significantly longer, but also executes faster as modern hardware
11203 can execute the moves in parallel, but can't do that for push/pop.
11205 Be careful about choosing what prologue to emit: When function takes
11206 many instructions to execute we may use slow version as well as in
11207 case function is known to be outside hot spot (this is known with
11208 feedback only). Weight the size of function by number of registers
11209 to save as it is cheap to use one or two push instructions but very
11210 slow to use many of them. */
11212 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
11213 if (node
->frequency
< NODE_FREQUENCY_NORMAL
11214 || (flag_branch_probabilities
11215 && node
->frequency
< NODE_FREQUENCY_HOT
))
11216 m
->use_fast_prologue_epilogue
= false;
11218 m
->use_fast_prologue_epilogue
11219 = !expensive_function_p (count
);
11222 frame
->save_regs_using_mov
11223 = (TARGET_PROLOGUE_USING_MOVE
&& m
->use_fast_prologue_epilogue
11224 /* If static stack checking is enabled and done with probes,
11225 the registers need to be saved before allocating the frame. */
11226 && flag_stack_check
!= STATIC_BUILTIN_STACK_CHECK
);
11228 /* Skip return address and error code in exception handler. */
11229 offset
= INCOMING_FRAME_SP_OFFSET
;
11231 /* Skip pushed static chain. */
11232 if (ix86_static_chain_on_stack
)
11233 offset
+= UNITS_PER_WORD
;
11235 /* Skip saved base pointer. */
11236 if (frame_pointer_needed
)
11237 offset
+= UNITS_PER_WORD
;
11238 frame
->hfp_save_offset
= offset
;
11240 /* The traditional frame pointer location is at the top of the frame. */
11241 frame
->hard_frame_pointer_offset
= offset
;
11243 /* Register save area */
11244 offset
+= frame
->nregs
* UNITS_PER_WORD
;
11245 frame
->reg_save_offset
= offset
;
11247 /* On SEH target, registers are pushed just before the frame pointer
11250 frame
->hard_frame_pointer_offset
= offset
;
11252 /* Calculate the size of the va-arg area (not including padding, if any). */
11253 frame
->va_arg_size
= ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
;
11255 if (stack_realign_fp
)
11257 /* We may need a 16-byte aligned stack for the remainder of the
11258 register save area, but the stack frame for the local function
11259 may require a greater alignment if using AVX/2/512. In order
11260 to avoid wasting space, we first calculate the space needed for
11261 the rest of the register saves, add that to the stack pointer,
11262 and then realign the stack to the boundary of the start of the
11263 frame for the local function. */
11264 HOST_WIDE_INT space_needed
= 0;
11265 HOST_WIDE_INT sse_reg_space_needed
= 0;
11269 if (m
->call_ms2sysv
)
11271 m
->call_ms2sysv_pad_in
= 0;
11272 space_needed
= xlogue_layout::get_instance ().get_stack_space_used ();
11275 else if (frame
->nsseregs
)
11276 /* The only ABI that has saved SSE registers (Win64) also has a
11277 16-byte aligned default stack. However, many programs violate
11278 the ABI, and Wine64 forces stack realignment to compensate. */
11279 space_needed
= frame
->nsseregs
* 16;
11281 sse_reg_space_needed
= space_needed
= ROUND_UP (space_needed
, 16);
11283 /* 64-bit frame->va_arg_size should always be a multiple of 16, but
11284 rounding to be pedantic. */
11285 space_needed
= ROUND_UP (space_needed
+ frame
->va_arg_size
, 16);
11288 space_needed
= frame
->va_arg_size
;
11290 /* Record the allocation size required prior to the realignment AND. */
11291 frame
->stack_realign_allocate
= space_needed
;
11293 /* The re-aligned stack starts at frame->stack_realign_offset. Values
11294 before this point are not directly comparable with values below
11295 this point. Use sp_valid_at to determine if the stack pointer is
11296 valid for a given offset, fp_valid_at for the frame pointer, or
11297 choose_baseaddr to have a base register chosen for you.
11299 Note that the result of (frame->stack_realign_offset
11300 & (stack_alignment_needed - 1)) may not equal zero. */
11301 offset
= ROUND_UP (offset
+ space_needed
, stack_alignment_needed
);
11302 frame
->stack_realign_offset
= offset
- space_needed
;
11303 frame
->sse_reg_save_offset
= frame
->stack_realign_offset
11304 + sse_reg_space_needed
;
11308 frame
->stack_realign_offset
= offset
;
11310 if (TARGET_64BIT
&& m
->call_ms2sysv
)
11312 m
->call_ms2sysv_pad_in
= !!(offset
& UNITS_PER_WORD
);
11313 offset
+= xlogue_layout::get_instance ().get_stack_space_used ();
11316 /* Align and set SSE register save area. */
11317 else if (frame
->nsseregs
)
11319 /* If the incoming stack boundary is at least 16 bytes, or DRAP is
11320 required and the DRAP re-alignment boundary is at least 16 bytes,
11321 then we want the SSE register save area properly aligned. */
11322 if (ix86_incoming_stack_boundary
>= 128
11323 || (stack_realign_drap
&& stack_alignment_needed
>= 16))
11324 offset
= ROUND_UP (offset
, 16);
11325 offset
+= frame
->nsseregs
* 16;
11327 frame
->sse_reg_save_offset
= offset
;
11328 offset
+= frame
->va_arg_size
;
11331 /* Align start of frame for local function. */
11332 if (m
->call_ms2sysv
11333 || frame
->va_arg_size
!= 0
11336 || cfun
->calls_alloca
11337 || ix86_current_function_calls_tls_descriptor
)
11338 offset
= ROUND_UP (offset
, stack_alignment_needed
);
11340 /* Frame pointer points here. */
11341 frame
->frame_pointer_offset
= offset
;
11345 /* Add outgoing arguments area. Can be skipped if we eliminated
11346 all the function calls as dead code.
11347 Skipping is however impossible when function calls alloca. Alloca
11348 expander assumes that last crtl->outgoing_args_size
11349 of stack frame are unused. */
11350 if (ACCUMULATE_OUTGOING_ARGS
11351 && (!crtl
->is_leaf
|| cfun
->calls_alloca
11352 || ix86_current_function_calls_tls_descriptor
))
11354 offset
+= crtl
->outgoing_args_size
;
11355 frame
->outgoing_arguments_size
= crtl
->outgoing_args_size
;
11358 frame
->outgoing_arguments_size
= 0;
11360 /* Align stack boundary. Only needed if we're calling another function
11361 or using alloca. */
11362 if (!crtl
->is_leaf
|| cfun
->calls_alloca
11363 || ix86_current_function_calls_tls_descriptor
)
11364 offset
= ROUND_UP (offset
, preferred_alignment
);
11366 /* We've reached end of stack frame. */
11367 frame
->stack_pointer_offset
= offset
;
11369 /* Size prologue needs to allocate. */
11370 to_allocate
= offset
- frame
->sse_reg_save_offset
;
11372 if ((!to_allocate
&& frame
->nregs
<= 1)
11373 || (TARGET_64BIT
&& to_allocate
>= HOST_WIDE_INT_C (0x80000000)))
11374 frame
->save_regs_using_mov
= false;
11376 if (ix86_using_red_zone ()
11377 && crtl
->sp_is_unchanging
11379 && !ix86_pc_thunk_call_expanded
11380 && !ix86_current_function_calls_tls_descriptor
)
11382 frame
->red_zone_size
= to_allocate
;
11383 if (frame
->save_regs_using_mov
)
11384 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
11385 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
11386 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
11389 frame
->red_zone_size
= 0;
11390 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
11392 /* The SEH frame pointer location is near the bottom of the frame.
11393 This is enforced by the fact that the difference between the
11394 stack pointer and the frame pointer is limited to 240 bytes in
11395 the unwind data structure. */
11398 HOST_WIDE_INT diff
;
11400 /* If we can leave the frame pointer where it is, do so. Also, returns
11401 the establisher frame for __builtin_frame_address (0). */
11402 diff
= frame
->stack_pointer_offset
- frame
->hard_frame_pointer_offset
;
11403 if (diff
<= SEH_MAX_FRAME_SIZE
11404 && (diff
> 240 || (diff
& 15) != 0)
11405 && !crtl
->accesses_prior_frames
)
11407 /* Ideally we'd determine what portion of the local stack frame
11408 (within the constraint of the lowest 240) is most heavily used.
11409 But without that complication, simply bias the frame pointer
11410 by 128 bytes so as to maximize the amount of the local stack
11411 frame that is addressable with 8-bit offsets. */
11412 frame
->hard_frame_pointer_offset
= frame
->stack_pointer_offset
- 128;
11417 /* This is semi-inlined memory_address_length, but simplified
11418 since we know that we're always dealing with reg+offset, and
11419 to avoid having to create and discard all that rtl. */
11422 choose_baseaddr_len (unsigned int regno
, HOST_WIDE_INT offset
)
11428 /* EBP and R13 cannot be encoded without an offset. */
11429 len
= (regno
== BP_REG
|| regno
== R13_REG
);
11431 else if (IN_RANGE (offset
, -128, 127))
11434 /* ESP and R12 must be encoded with a SIB byte. */
11435 if (regno
== SP_REG
|| regno
== R12_REG
)
11441 /* Determine if the stack pointer is valid for accessing the CFA_OFFSET in
11442 the frame save area. The register is saved at CFA - CFA_OFFSET. */
11445 sp_valid_at (HOST_WIDE_INT cfa_offset
)
11447 const struct machine_frame_state
&fs
= cfun
->machine
->fs
;
11448 if (fs
.sp_realigned
&& cfa_offset
<= fs
.sp_realigned_offset
)
11450 /* Validate that the cfa_offset isn't in a "no-man's land". */
11451 gcc_assert (cfa_offset
<= fs
.sp_realigned_fp_last
);
11454 return fs
.sp_valid
;
11457 /* Determine if the frame pointer is valid for accessing the CFA_OFFSET in
11458 the frame save area. The register is saved at CFA - CFA_OFFSET. */
11461 fp_valid_at (HOST_WIDE_INT cfa_offset
)
11463 const struct machine_frame_state
&fs
= cfun
->machine
->fs
;
11464 if (fs
.sp_realigned
&& cfa_offset
> fs
.sp_realigned_fp_last
)
11466 /* Validate that the cfa_offset isn't in a "no-man's land". */
11467 gcc_assert (cfa_offset
>= fs
.sp_realigned_offset
);
11470 return fs
.fp_valid
;
11473 /* Choose a base register based upon alignment requested, speed and/or
11477 choose_basereg (HOST_WIDE_INT cfa_offset
, rtx
&base_reg
,
11478 HOST_WIDE_INT
&base_offset
,
11479 unsigned int align_reqested
, unsigned int *align
)
11481 const struct machine_function
*m
= cfun
->machine
;
11482 unsigned int hfp_align
;
11483 unsigned int drap_align
;
11484 unsigned int sp_align
;
11485 bool hfp_ok
= fp_valid_at (cfa_offset
);
11486 bool drap_ok
= m
->fs
.drap_valid
;
11487 bool sp_ok
= sp_valid_at (cfa_offset
);
11489 hfp_align
= drap_align
= sp_align
= INCOMING_STACK_BOUNDARY
;
11491 /* Filter out any registers that don't meet the requested alignment
11493 if (align_reqested
)
11495 if (m
->fs
.realigned
)
11496 hfp_align
= drap_align
= sp_align
= crtl
->stack_alignment_needed
;
11497 /* SEH unwind code does do not currently support REG_CFA_EXPRESSION
11498 notes (which we would need to use a realigned stack pointer),
11499 so disable on SEH targets. */
11500 else if (m
->fs
.sp_realigned
)
11501 sp_align
= crtl
->stack_alignment_needed
;
11503 hfp_ok
= hfp_ok
&& hfp_align
>= align_reqested
;
11504 drap_ok
= drap_ok
&& drap_align
>= align_reqested
;
11505 sp_ok
= sp_ok
&& sp_align
>= align_reqested
;
11508 if (m
->use_fast_prologue_epilogue
)
11510 /* Choose the base register most likely to allow the most scheduling
11511 opportunities. Generally FP is valid throughout the function,
11512 while DRAP must be reloaded within the epilogue. But choose either
11513 over the SP due to increased encoding size. */
11517 base_reg
= hard_frame_pointer_rtx
;
11518 base_offset
= m
->fs
.fp_offset
- cfa_offset
;
11522 base_reg
= crtl
->drap_reg
;
11523 base_offset
= 0 - cfa_offset
;
11527 base_reg
= stack_pointer_rtx
;
11528 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
11533 HOST_WIDE_INT toffset
;
11534 int len
= 16, tlen
;
11536 /* Choose the base register with the smallest address encoding.
11537 With a tie, choose FP > DRAP > SP. */
11540 base_reg
= stack_pointer_rtx
;
11541 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
11542 len
= choose_baseaddr_len (STACK_POINTER_REGNUM
, base_offset
);
11546 toffset
= 0 - cfa_offset
;
11547 tlen
= choose_baseaddr_len (REGNO (crtl
->drap_reg
), toffset
);
11550 base_reg
= crtl
->drap_reg
;
11551 base_offset
= toffset
;
11557 toffset
= m
->fs
.fp_offset
- cfa_offset
;
11558 tlen
= choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM
, toffset
);
11561 base_reg
= hard_frame_pointer_rtx
;
11562 base_offset
= toffset
;
11568 /* Set the align return value. */
11571 if (base_reg
== stack_pointer_rtx
)
11573 else if (base_reg
== crtl
->drap_reg
)
11574 *align
= drap_align
;
11575 else if (base_reg
== hard_frame_pointer_rtx
)
11576 *align
= hfp_align
;
11580 /* Return an RTX that points to CFA_OFFSET within the stack frame and
11581 the alignment of address. If ALIGN is non-null, it should point to
11582 an alignment value (in bits) that is preferred or zero and will
11583 recieve the alignment of the base register that was selected,
11584 irrespective of rather or not CFA_OFFSET is a multiple of that
11585 alignment value. If it is possible for the base register offset to be
11586 non-immediate then SCRATCH_REGNO should specify a scratch register to
11589 The valid base registers are taken from CFUN->MACHINE->FS. */
11592 choose_baseaddr (HOST_WIDE_INT cfa_offset
, unsigned int *align
,
11593 unsigned int scratch_regno
= INVALID_REGNUM
)
11595 rtx base_reg
= NULL
;
11596 HOST_WIDE_INT base_offset
= 0;
11598 /* If a specific alignment is requested, try to get a base register
11599 with that alignment first. */
11600 if (align
&& *align
)
11601 choose_basereg (cfa_offset
, base_reg
, base_offset
, *align
, align
);
11604 choose_basereg (cfa_offset
, base_reg
, base_offset
, 0, align
);
11606 gcc_assert (base_reg
!= NULL
);
11608 rtx base_offset_rtx
= GEN_INT (base_offset
);
11610 if (!x86_64_immediate_operand (base_offset_rtx
, Pmode
))
11612 gcc_assert (scratch_regno
!= INVALID_REGNUM
);
11614 rtx scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11615 emit_move_insn (scratch_reg
, base_offset_rtx
);
11617 return gen_rtx_PLUS (Pmode
, base_reg
, scratch_reg
);
11620 return plus_constant (Pmode
, base_reg
, base_offset
);
11623 /* Emit code to save registers in the prologue. */
11626 ix86_emit_save_regs (void)
11628 unsigned int regno
;
11631 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
-- > 0; )
11632 if (GENERAL_REGNO_P (regno
) && ix86_save_reg (regno
, true, true))
11634 insn
= emit_insn (gen_push (gen_rtx_REG (word_mode
, regno
)));
11635 RTX_FRAME_RELATED_P (insn
) = 1;
11639 /* Emit a single register save at CFA - CFA_OFFSET. */
11642 ix86_emit_save_reg_using_mov (machine_mode mode
, unsigned int regno
,
11643 HOST_WIDE_INT cfa_offset
)
11645 struct machine_function
*m
= cfun
->machine
;
11646 rtx reg
= gen_rtx_REG (mode
, regno
);
11647 rtx mem
, addr
, base
, insn
;
11648 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
11650 addr
= choose_baseaddr (cfa_offset
, &align
);
11651 mem
= gen_frame_mem (mode
, addr
);
11653 /* The location aligment depends upon the base register. */
11654 align
= MIN (GET_MODE_ALIGNMENT (mode
), align
);
11655 gcc_assert (! (cfa_offset
& (align
/ BITS_PER_UNIT
- 1)));
11656 set_mem_align (mem
, align
);
11658 insn
= emit_insn (gen_rtx_SET (mem
, reg
));
11659 RTX_FRAME_RELATED_P (insn
) = 1;
11662 if (GET_CODE (base
) == PLUS
)
11663 base
= XEXP (base
, 0);
11664 gcc_checking_assert (REG_P (base
));
11666 /* When saving registers into a re-aligned local stack frame, avoid
11667 any tricky guessing by dwarf2out. */
11668 if (m
->fs
.realigned
)
11670 gcc_checking_assert (stack_realign_drap
);
11672 if (regno
== REGNO (crtl
->drap_reg
))
11674 /* A bit of a hack. We force the DRAP register to be saved in
11675 the re-aligned stack frame, which provides us with a copy
11676 of the CFA that will last past the prologue. Install it. */
11677 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
11678 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
11679 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
11680 mem
= gen_rtx_MEM (mode
, addr
);
11681 add_reg_note (insn
, REG_CFA_DEF_CFA
, mem
);
11685 /* The frame pointer is a stable reference within the
11686 aligned frame. Use it. */
11687 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
11688 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
11689 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
11690 mem
= gen_rtx_MEM (mode
, addr
);
11691 add_reg_note (insn
, REG_CFA_EXPRESSION
, gen_rtx_SET (mem
, reg
));
11695 else if (base
== stack_pointer_rtx
&& m
->fs
.sp_realigned
11696 && cfa_offset
>= m
->fs
.sp_realigned_offset
)
11698 gcc_checking_assert (stack_realign_fp
);
11699 add_reg_note (insn
, REG_CFA_EXPRESSION
, gen_rtx_SET (mem
, reg
));
11702 /* The memory may not be relative to the current CFA register,
11703 which means that we may need to generate a new pattern for
11704 use by the unwind info. */
11705 else if (base
!= m
->fs
.cfa_reg
)
11707 addr
= plus_constant (Pmode
, m
->fs
.cfa_reg
,
11708 m
->fs
.cfa_offset
- cfa_offset
);
11709 mem
= gen_rtx_MEM (mode
, addr
);
11710 add_reg_note (insn
, REG_CFA_OFFSET
, gen_rtx_SET (mem
, reg
));
11714 /* Emit code to save registers using MOV insns.
11715 First register is stored at CFA - CFA_OFFSET. */
11717 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset
)
11719 unsigned int regno
;
11721 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
11722 if (GENERAL_REGNO_P (regno
) && ix86_save_reg (regno
, true, true))
11724 ix86_emit_save_reg_using_mov (word_mode
, regno
, cfa_offset
);
11725 cfa_offset
-= UNITS_PER_WORD
;
11729 /* Emit code to save SSE registers using MOV insns.
11730 First register is stored at CFA - CFA_OFFSET. */
11732 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
)
11734 unsigned int regno
;
11736 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
11737 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true, true))
11739 ix86_emit_save_reg_using_mov (V4SFmode
, regno
, cfa_offset
);
11740 cfa_offset
-= GET_MODE_SIZE (V4SFmode
);
11744 static GTY(()) rtx queued_cfa_restores
;
11746 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
11747 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
11748 Don't add the note if the previously saved value will be left untouched
11749 within stack red-zone till return, as unwinders can find the same value
11750 in the register and on the stack. */
11753 ix86_add_cfa_restore_note (rtx_insn
*insn
, rtx reg
, HOST_WIDE_INT cfa_offset
)
11755 if (!crtl
->shrink_wrapped
11756 && cfa_offset
<= cfun
->machine
->fs
.red_zone_offset
)
11761 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
11762 RTX_FRAME_RELATED_P (insn
) = 1;
11765 queued_cfa_restores
11766 = alloc_reg_note (REG_CFA_RESTORE
, reg
, queued_cfa_restores
);
11769 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
11772 ix86_add_queued_cfa_restore_notes (rtx insn
)
11775 if (!queued_cfa_restores
)
11777 for (last
= queued_cfa_restores
; XEXP (last
, 1); last
= XEXP (last
, 1))
11779 XEXP (last
, 1) = REG_NOTES (insn
);
11780 REG_NOTES (insn
) = queued_cfa_restores
;
11781 queued_cfa_restores
= NULL_RTX
;
11782 RTX_FRAME_RELATED_P (insn
) = 1;
11785 /* Expand prologue or epilogue stack adjustment.
11786 The pattern exist to put a dependency on all ebp-based memory accesses.
11787 STYLE should be negative if instructions should be marked as frame related,
11788 zero if %r11 register is live and cannot be freely used and positive
11792 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
,
11793 int style
, bool set_cfa
)
11795 struct machine_function
*m
= cfun
->machine
;
11797 bool add_frame_related_expr
= false;
11799 if (Pmode
== SImode
)
11800 insn
= gen_pro_epilogue_adjust_stack_si_add (dest
, src
, offset
);
11801 else if (x86_64_immediate_operand (offset
, DImode
))
11802 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, offset
);
11806 /* r11 is used by indirect sibcall return as well, set before the
11807 epilogue and used after the epilogue. */
11809 tmp
= gen_rtx_REG (DImode
, R11_REG
);
11812 gcc_assert (src
!= hard_frame_pointer_rtx
11813 && dest
!= hard_frame_pointer_rtx
);
11814 tmp
= hard_frame_pointer_rtx
;
11816 insn
= emit_insn (gen_rtx_SET (tmp
, offset
));
11818 add_frame_related_expr
= true;
11820 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, tmp
);
11823 insn
= emit_insn (insn
);
11825 ix86_add_queued_cfa_restore_notes (insn
);
11831 gcc_assert (m
->fs
.cfa_reg
== src
);
11832 m
->fs
.cfa_offset
+= INTVAL (offset
);
11833 m
->fs
.cfa_reg
= dest
;
11835 r
= gen_rtx_PLUS (Pmode
, src
, offset
);
11836 r
= gen_rtx_SET (dest
, r
);
11837 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, r
);
11838 RTX_FRAME_RELATED_P (insn
) = 1;
11840 else if (style
< 0)
11842 RTX_FRAME_RELATED_P (insn
) = 1;
11843 if (add_frame_related_expr
)
11845 rtx r
= gen_rtx_PLUS (Pmode
, src
, offset
);
11846 r
= gen_rtx_SET (dest
, r
);
11847 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, r
);
11851 if (dest
== stack_pointer_rtx
)
11853 HOST_WIDE_INT ooffset
= m
->fs
.sp_offset
;
11854 bool valid
= m
->fs
.sp_valid
;
11855 bool realigned
= m
->fs
.sp_realigned
;
11857 if (src
== hard_frame_pointer_rtx
)
11859 valid
= m
->fs
.fp_valid
;
11861 ooffset
= m
->fs
.fp_offset
;
11863 else if (src
== crtl
->drap_reg
)
11865 valid
= m
->fs
.drap_valid
;
11871 /* Else there are two possibilities: SP itself, which we set
11872 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
11873 taken care of this by hand along the eh_return path. */
11874 gcc_checking_assert (src
== stack_pointer_rtx
11875 || offset
== const0_rtx
);
11878 m
->fs
.sp_offset
= ooffset
- INTVAL (offset
);
11879 m
->fs
.sp_valid
= valid
;
11880 m
->fs
.sp_realigned
= realigned
;
11885 /* Find an available register to be used as dynamic realign argument
11886 pointer regsiter. Such a register will be written in prologue and
11887 used in begin of body, so it must not be
11888 1. parameter passing register.
11890 We reuse static-chain register if it is available. Otherwise, we
11891 use DI for i386 and R13 for x86-64. We chose R13 since it has
11894 Return: the regno of chosen register. */
11896 static unsigned int
11897 find_drap_reg (void)
11899 tree decl
= cfun
->decl
;
11901 /* Always use callee-saved register if there are no caller-saved
11905 /* Use R13 for nested function or function need static chain.
11906 Since function with tail call may use any caller-saved
11907 registers in epilogue, DRAP must not use caller-saved
11908 register in such case. */
11909 if (DECL_STATIC_CHAIN (decl
)
11910 || cfun
->machine
->no_caller_saved_registers
11911 || crtl
->tail_call_emit
)
11918 /* Use DI for nested function or function need static chain.
11919 Since function with tail call may use any caller-saved
11920 registers in epilogue, DRAP must not use caller-saved
11921 register in such case. */
11922 if (DECL_STATIC_CHAIN (decl
)
11923 || cfun
->machine
->no_caller_saved_registers
11924 || crtl
->tail_call_emit
)
11927 /* Reuse static chain register if it isn't used for parameter
11929 if (ix86_function_regparm (TREE_TYPE (decl
), decl
) <= 2)
11931 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (decl
));
11932 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) == 0)
11939 /* Handle a "force_align_arg_pointer" attribute. */
11942 ix86_handle_force_align_arg_pointer_attribute (tree
*node
, tree name
,
11943 tree
, int, bool *no_add_attrs
)
11945 if (TREE_CODE (*node
) != FUNCTION_TYPE
11946 && TREE_CODE (*node
) != METHOD_TYPE
11947 && TREE_CODE (*node
) != FIELD_DECL
11948 && TREE_CODE (*node
) != TYPE_DECL
)
11950 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
11952 *no_add_attrs
= true;
11958 /* Return minimum incoming stack alignment. */
11960 static unsigned int
11961 ix86_minimum_incoming_stack_boundary (bool sibcall
)
11963 unsigned int incoming_stack_boundary
;
11965 /* Stack of interrupt handler is aligned to 128 bits in 64bit mode. */
11966 if (cfun
->machine
->func_type
!= TYPE_NORMAL
)
11967 incoming_stack_boundary
= TARGET_64BIT
? 128 : MIN_STACK_BOUNDARY
;
11968 /* Prefer the one specified at command line. */
11969 else if (ix86_user_incoming_stack_boundary
)
11970 incoming_stack_boundary
= ix86_user_incoming_stack_boundary
;
11971 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
11972 if -mstackrealign is used, it isn't used for sibcall check and
11973 estimated stack alignment is 128bit. */
11975 && ix86_force_align_arg_pointer
11976 && crtl
->stack_alignment_estimated
== 128)
11977 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
11979 incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
11981 /* Incoming stack alignment can be changed on individual functions
11982 via force_align_arg_pointer attribute. We use the smallest
11983 incoming stack boundary. */
11984 if (incoming_stack_boundary
> MIN_STACK_BOUNDARY
11985 && lookup_attribute (ix86_force_align_arg_pointer_string
,
11986 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
11987 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
11989 /* The incoming stack frame has to be aligned at least at
11990 parm_stack_boundary. */
11991 if (incoming_stack_boundary
< crtl
->parm_stack_boundary
)
11992 incoming_stack_boundary
= crtl
->parm_stack_boundary
;
11994 /* Stack at entrance of main is aligned by runtime. We use the
11995 smallest incoming stack boundary. */
11996 if (incoming_stack_boundary
> MAIN_STACK_BOUNDARY
11997 && DECL_NAME (current_function_decl
)
11998 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
11999 && DECL_FILE_SCOPE_P (current_function_decl
))
12000 incoming_stack_boundary
= MAIN_STACK_BOUNDARY
;
12002 return incoming_stack_boundary
;
12005 /* Update incoming stack boundary and estimated stack alignment. */
12008 ix86_update_stack_boundary (void)
12010 ix86_incoming_stack_boundary
12011 = ix86_minimum_incoming_stack_boundary (false);
12013 /* x86_64 vararg needs 16byte stack alignment for register save
12017 && crtl
->stack_alignment_estimated
< 128)
12018 crtl
->stack_alignment_estimated
= 128;
12020 /* __tls_get_addr needs to be called with 16-byte aligned stack. */
12021 if (ix86_tls_descriptor_calls_expanded_in_cfun
12022 && crtl
->preferred_stack_boundary
< 128)
12023 crtl
->preferred_stack_boundary
= 128;
12026 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
12027 needed or an rtx for DRAP otherwise. */
12030 ix86_get_drap_rtx (void)
12032 /* We must use DRAP if there are outgoing arguments on stack and
12033 ACCUMULATE_OUTGOING_ARGS is false. */
12034 if (ix86_force_drap
12035 || (cfun
->machine
->outgoing_args_on_stack
12036 && !ACCUMULATE_OUTGOING_ARGS
))
12037 crtl
->need_drap
= true;
12039 if (stack_realign_drap
)
12041 /* Assign DRAP to vDRAP and returns vDRAP */
12042 unsigned int regno
= find_drap_reg ();
12045 rtx_insn
*seq
, *insn
;
12047 arg_ptr
= gen_rtx_REG (Pmode
, regno
);
12048 crtl
->drap_reg
= arg_ptr
;
12051 drap_vreg
= copy_to_reg (arg_ptr
);
12052 seq
= get_insns ();
12055 insn
= emit_insn_before (seq
, NEXT_INSN (entry_of_function ()));
12058 add_reg_note (insn
, REG_CFA_SET_VDRAP
, drap_vreg
);
12059 RTX_FRAME_RELATED_P (insn
) = 1;
12067 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
12070 ix86_internal_arg_pointer (void)
12072 return virtual_incoming_args_rtx
;
12075 struct scratch_reg
{
12080 /* Return a short-lived scratch register for use on function entry.
12081 In 32-bit mode, it is valid only after the registers are saved
12082 in the prologue. This register must be released by means of
12083 release_scratch_register_on_entry once it is dead. */
12086 get_scratch_register_on_entry (struct scratch_reg
*sr
)
12094 /* We always use R11 in 64-bit mode. */
12099 tree decl
= current_function_decl
, fntype
= TREE_TYPE (decl
);
12101 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
12103 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
12104 bool static_chain_p
= DECL_STATIC_CHAIN (decl
);
12105 int regparm
= ix86_function_regparm (fntype
, decl
);
12107 = crtl
->drap_reg
? REGNO (crtl
->drap_reg
) : INVALID_REGNUM
;
12109 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
12110 for the static chain register. */
12111 if ((regparm
< 1 || (fastcall_p
&& !static_chain_p
))
12112 && drap_regno
!= AX_REG
)
12114 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
12115 for the static chain register. */
12116 else if (thiscall_p
&& !static_chain_p
&& drap_regno
!= AX_REG
)
12118 else if (regparm
< 2 && !thiscall_p
&& drap_regno
!= DX_REG
)
12120 /* ecx is the static chain register. */
12121 else if (regparm
< 3 && !fastcall_p
&& !thiscall_p
12123 && drap_regno
!= CX_REG
)
12125 else if (ix86_save_reg (BX_REG
, true, false))
12127 /* esi is the static chain register. */
12128 else if (!(regparm
== 3 && static_chain_p
)
12129 && ix86_save_reg (SI_REG
, true, false))
12131 else if (ix86_save_reg (DI_REG
, true, false))
12135 regno
= (drap_regno
== AX_REG
? DX_REG
: AX_REG
);
12140 sr
->reg
= gen_rtx_REG (Pmode
, regno
);
12143 rtx_insn
*insn
= emit_insn (gen_push (sr
->reg
));
12144 RTX_FRAME_RELATED_P (insn
) = 1;
12148 /* Release a scratch register obtained from the preceding function. */
12151 release_scratch_register_on_entry (struct scratch_reg
*sr
)
12155 struct machine_function
*m
= cfun
->machine
;
12156 rtx x
, insn
= emit_insn (gen_pop (sr
->reg
));
12158 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
12159 RTX_FRAME_RELATED_P (insn
) = 1;
12160 x
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (UNITS_PER_WORD
));
12161 x
= gen_rtx_SET (stack_pointer_rtx
, x
);
12162 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
12163 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
12167 /* Return the probing interval for -fstack-clash-protection. */
12169 static HOST_WIDE_INT
12170 get_probe_interval (void)
12172 if (flag_stack_clash_protection
)
12173 return (HOST_WIDE_INT_1U
12174 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL
));
12176 return (HOST_WIDE_INT_1U
<< STACK_CHECK_PROBE_INTERVAL_EXP
);
12179 /* Emit code to adjust the stack pointer by SIZE bytes while probing it.
12181 This differs from the next routine in that it tries hard to prevent
12182 attacks that jump the stack guard. Thus it is never allowed to allocate
12183 more than PROBE_INTERVAL bytes of stack space without a suitable
12187 ix86_adjust_stack_and_probe_stack_clash (const HOST_WIDE_INT size
)
12189 struct machine_function
*m
= cfun
->machine
;
12191 /* If this function does not statically allocate stack space, then
12192 no probes are needed. */
12195 /* However, the allocation of space via pushes for register
12196 saves could be viewed as allocating space, but without the
12198 if (m
->frame
.nregs
|| m
->frame
.nsseregs
|| frame_pointer_needed
)
12199 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME
, true);
12201 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME
, false);
12205 /* If we are a noreturn function, then we have to consider the
12206 possibility that we're called via a jump rather than a call.
12208 Thus we don't have the implicit probe generated by saving the
12209 return address into the stack at the call. Thus, the stack
12210 pointer could be anywhere in the guard page. The safe thing
12211 to do is emit a probe now.
12213 ?!? This should be revamped to work like aarch64 and s390 where
12214 we track the offset from the most recent probe. Normally that
12215 offset would be zero. For a noreturn function we would reset
12216 it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then
12217 we just probe when we cross PROBE_INTERVAL. */
12218 if (TREE_THIS_VOLATILE (cfun
->decl
))
12220 /* We can safely use any register here since we're just going to push
12221 its value and immediately pop it back. But we do try and avoid
12222 argument passing registers so as not to introduce dependencies in
12223 the pipeline. For 32 bit we use %esi and for 64 bit we use %rax. */
12224 rtx dummy_reg
= gen_rtx_REG (word_mode
, TARGET_64BIT
? AX_REG
: SI_REG
);
12225 rtx_insn
*insn
= emit_insn (gen_push (dummy_reg
));
12226 RTX_FRAME_RELATED_P (insn
) = 1;
12227 ix86_emit_restore_reg_using_pop (dummy_reg
);
12228 emit_insn (gen_blockage ());
12231 /* If we allocate less than the size of the guard statically,
12232 then no probing is necessary, but we do need to allocate
12234 if (size
< (1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE
)))
12236 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
12237 GEN_INT (-size
), -1,
12238 m
->fs
.cfa_reg
== stack_pointer_rtx
);
12239 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME
, true);
12243 /* We're allocating a large enough stack frame that we need to
12244 emit probes. Either emit them inline or in a loop depending
12246 HOST_WIDE_INT probe_interval
= get_probe_interval ();
12247 if (size
<= 4 * probe_interval
)
12250 for (i
= probe_interval
; i
<= size
; i
+= probe_interval
)
12252 /* Allocate PROBE_INTERVAL bytes. */
12254 = pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
12255 GEN_INT (-probe_interval
), -1,
12256 m
->fs
.cfa_reg
== stack_pointer_rtx
);
12257 add_reg_note (insn
, REG_STACK_CHECK
, const0_rtx
);
12259 /* And probe at *sp. */
12260 emit_stack_probe (stack_pointer_rtx
);
12261 emit_insn (gen_blockage ());
12264 /* We need to allocate space for the residual, but we do not need
12265 to probe the residual. */
12266 HOST_WIDE_INT residual
= (i
- probe_interval
- size
);
12268 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
12269 GEN_INT (residual
), -1,
12270 m
->fs
.cfa_reg
== stack_pointer_rtx
);
12271 dump_stack_clash_frame_info (PROBE_INLINE
, residual
!= 0);
12275 struct scratch_reg sr
;
12276 get_scratch_register_on_entry (&sr
);
12278 /* Step 1: round SIZE down to a multiple of the interval. */
12279 HOST_WIDE_INT rounded_size
= size
& -probe_interval
;
12281 /* Step 2: compute final value of the loop counter. Use lea if
12283 rtx addr
= plus_constant (Pmode
, stack_pointer_rtx
, -rounded_size
);
12285 if (address_no_seg_operand (addr
, Pmode
))
12286 insn
= emit_insn (gen_rtx_SET (sr
.reg
, addr
));
12289 emit_move_insn (sr
.reg
, GEN_INT (-rounded_size
));
12290 insn
= emit_insn (gen_rtx_SET (sr
.reg
,
12291 gen_rtx_PLUS (Pmode
, sr
.reg
,
12292 stack_pointer_rtx
)));
12294 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
12296 add_reg_note (insn
, REG_CFA_DEF_CFA
,
12297 plus_constant (Pmode
, sr
.reg
,
12298 m
->fs
.cfa_offset
+ rounded_size
));
12299 RTX_FRAME_RELATED_P (insn
) = 1;
12302 /* Step 3: the loop. */
12303 rtx size_rtx
= GEN_INT (rounded_size
);
12304 insn
= emit_insn (ix86_gen_adjust_stack_and_probe (sr
.reg
, sr
.reg
,
12306 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
12308 m
->fs
.cfa_offset
+= rounded_size
;
12309 add_reg_note (insn
, REG_CFA_DEF_CFA
,
12310 plus_constant (Pmode
, stack_pointer_rtx
,
12311 m
->fs
.cfa_offset
));
12312 RTX_FRAME_RELATED_P (insn
) = 1;
12314 m
->fs
.sp_offset
+= rounded_size
;
12315 emit_insn (gen_blockage ());
12317 /* Step 4: adjust SP if we cannot assert at compile-time that SIZE
12318 is equal to ROUNDED_SIZE. */
12320 if (size
!= rounded_size
)
12321 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
12322 GEN_INT (rounded_size
- size
), -1,
12323 m
->fs
.cfa_reg
== stack_pointer_rtx
);
12324 dump_stack_clash_frame_info (PROBE_LOOP
, size
!= rounded_size
);
12326 release_scratch_register_on_entry (&sr
);
12329 /* Make sure nothing is scheduled before we are done. */
12330 emit_insn (gen_blockage ());
12333 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
12336 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size
)
12338 /* We skip the probe for the first interval + a small dope of 4 words and
12339 probe that many bytes past the specified size to maintain a protection
12340 area at the botton of the stack. */
12341 const int dope
= 4 * UNITS_PER_WORD
;
12342 rtx size_rtx
= GEN_INT (size
), last
;
12344 /* See if we have a constant small number of probes to generate. If so,
12345 that's the easy case. The run-time loop is made up of 9 insns in the
12346 generic case while the compile-time loop is made up of 3+2*(n-1) insns
12347 for n # of intervals. */
12348 if (size
<= 4 * get_probe_interval ())
12350 HOST_WIDE_INT i
, adjust
;
12351 bool first_probe
= true;
12353 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
12354 values of N from 1 until it exceeds SIZE. If only one probe is
12355 needed, this will not generate any code. Then adjust and probe
12356 to PROBE_INTERVAL + SIZE. */
12357 for (i
= get_probe_interval (); i
< size
; i
+= get_probe_interval ())
12361 adjust
= 2 * get_probe_interval () + dope
;
12362 first_probe
= false;
12365 adjust
= get_probe_interval ();
12367 emit_insn (gen_rtx_SET (stack_pointer_rtx
,
12368 plus_constant (Pmode
, stack_pointer_rtx
,
12370 emit_stack_probe (stack_pointer_rtx
);
12374 adjust
= size
+ get_probe_interval () + dope
;
12376 adjust
= size
+ get_probe_interval () - i
;
12378 emit_insn (gen_rtx_SET (stack_pointer_rtx
,
12379 plus_constant (Pmode
, stack_pointer_rtx
,
12381 emit_stack_probe (stack_pointer_rtx
);
12383 /* Adjust back to account for the additional first interval. */
12384 last
= emit_insn (gen_rtx_SET (stack_pointer_rtx
,
12385 plus_constant (Pmode
, stack_pointer_rtx
,
12386 (get_probe_interval ()
12390 /* Otherwise, do the same as above, but in a loop. Note that we must be
12391 extra careful with variables wrapping around because we might be at
12392 the very top (or the very bottom) of the address space and we have
12393 to be able to handle this case properly; in particular, we use an
12394 equality test for the loop condition. */
12397 HOST_WIDE_INT rounded_size
;
12398 struct scratch_reg sr
;
12400 get_scratch_register_on_entry (&sr
);
12403 /* Step 1: round SIZE to the previous multiple of the interval. */
12405 rounded_size
= ROUND_DOWN (size
, get_probe_interval ());
12408 /* Step 2: compute initial and final value of the loop counter. */
12410 /* SP = SP_0 + PROBE_INTERVAL. */
12411 emit_insn (gen_rtx_SET (stack_pointer_rtx
,
12412 plus_constant (Pmode
, stack_pointer_rtx
,
12413 - (get_probe_interval () + dope
))));
12415 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
12416 if (rounded_size
<= (HOST_WIDE_INT_1
<< 31))
12417 emit_insn (gen_rtx_SET (sr
.reg
,
12418 plus_constant (Pmode
, stack_pointer_rtx
,
12422 emit_move_insn (sr
.reg
, GEN_INT (-rounded_size
));
12423 emit_insn (gen_rtx_SET (sr
.reg
,
12424 gen_rtx_PLUS (Pmode
, sr
.reg
,
12425 stack_pointer_rtx
)));
12429 /* Step 3: the loop
12433 SP = SP + PROBE_INTERVAL
12436 while (SP != LAST_ADDR)
12438 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
12439 values of N from 1 until it is equal to ROUNDED_SIZE. */
12441 emit_insn (ix86_gen_adjust_stack_and_probe (sr
.reg
, sr
.reg
, size_rtx
));
12444 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
12445 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
12447 if (size
!= rounded_size
)
12449 emit_insn (gen_rtx_SET (stack_pointer_rtx
,
12450 plus_constant (Pmode
, stack_pointer_rtx
,
12451 rounded_size
- size
)));
12452 emit_stack_probe (stack_pointer_rtx
);
12455 /* Adjust back to account for the additional first interval. */
12456 last
= emit_insn (gen_rtx_SET (stack_pointer_rtx
,
12457 plus_constant (Pmode
, stack_pointer_rtx
,
12458 (get_probe_interval ()
12461 release_scratch_register_on_entry (&sr
);
12464 /* Even if the stack pointer isn't the CFA register, we need to correctly
12465 describe the adjustments made to it, in particular differentiate the
12466 frame-related ones from the frame-unrelated ones. */
12469 rtx expr
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (2));
12470 XVECEXP (expr
, 0, 0)
12471 = gen_rtx_SET (stack_pointer_rtx
,
12472 plus_constant (Pmode
, stack_pointer_rtx
, -size
));
12473 XVECEXP (expr
, 0, 1)
12474 = gen_rtx_SET (stack_pointer_rtx
,
12475 plus_constant (Pmode
, stack_pointer_rtx
,
12476 get_probe_interval () + dope
+ size
));
12477 add_reg_note (last
, REG_FRAME_RELATED_EXPR
, expr
);
12478 RTX_FRAME_RELATED_P (last
) = 1;
12480 cfun
->machine
->fs
.sp_offset
+= size
;
12483 /* Make sure nothing is scheduled before we are done. */
12484 emit_insn (gen_blockage ());
12487 /* Adjust the stack pointer up to REG while probing it. */
12490 output_adjust_stack_and_probe (rtx reg
)
12492 static int labelno
= 0;
12496 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
12499 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
12501 /* SP = SP + PROBE_INTERVAL. */
12502 xops
[0] = stack_pointer_rtx
;
12503 xops
[1] = GEN_INT (get_probe_interval ());
12504 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
12507 xops
[1] = const0_rtx
;
12508 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops
);
12510 /* Test if SP == LAST_ADDR. */
12511 xops
[0] = stack_pointer_rtx
;
12513 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
12516 fputs ("\tjne\t", asm_out_file
);
12517 assemble_name_raw (asm_out_file
, loop_lab
);
12518 fputc ('\n', asm_out_file
);
12523 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
12524 inclusive. These are offsets from the current stack pointer. */
12527 ix86_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
)
12529 /* See if we have a constant small number of probes to generate. If so,
12530 that's the easy case. The run-time loop is made up of 6 insns in the
12531 generic case while the compile-time loop is made up of n insns for n #
12533 if (size
<= 6 * get_probe_interval ())
12537 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
12538 it exceeds SIZE. If only one probe is needed, this will not
12539 generate any code. Then probe at FIRST + SIZE. */
12540 for (i
= get_probe_interval (); i
< size
; i
+= get_probe_interval ())
12541 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
12544 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
12548 /* Otherwise, do the same as above, but in a loop. Note that we must be
12549 extra careful with variables wrapping around because we might be at
12550 the very top (or the very bottom) of the address space and we have
12551 to be able to handle this case properly; in particular, we use an
12552 equality test for the loop condition. */
12555 HOST_WIDE_INT rounded_size
, last
;
12556 struct scratch_reg sr
;
12558 get_scratch_register_on_entry (&sr
);
12561 /* Step 1: round SIZE to the previous multiple of the interval. */
12563 rounded_size
= ROUND_DOWN (size
, get_probe_interval ());
12566 /* Step 2: compute initial and final value of the loop counter. */
12568 /* TEST_OFFSET = FIRST. */
12569 emit_move_insn (sr
.reg
, GEN_INT (-first
));
12571 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
12572 last
= first
+ rounded_size
;
12575 /* Step 3: the loop
12579 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
12582 while (TEST_ADDR != LAST_ADDR)
12584 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
12585 until it is equal to ROUNDED_SIZE. */
12587 emit_insn (ix86_gen_probe_stack_range (sr
.reg
, sr
.reg
, GEN_INT (-last
)));
12590 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
12591 that SIZE is equal to ROUNDED_SIZE. */
12593 if (size
!= rounded_size
)
12594 emit_stack_probe (plus_constant (Pmode
,
12595 gen_rtx_PLUS (Pmode
,
12598 rounded_size
- size
));
12600 release_scratch_register_on_entry (&sr
);
12603 /* Make sure nothing is scheduled before we are done. */
12604 emit_insn (gen_blockage ());
12607 /* Probe a range of stack addresses from REG to END, inclusive. These are
12608 offsets from the current stack pointer. */
12611 output_probe_stack_range (rtx reg
, rtx end
)
12613 static int labelno
= 0;
12617 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
12620 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
12622 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
12624 xops
[1] = GEN_INT (get_probe_interval ());
12625 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
12627 /* Probe at TEST_ADDR. */
12628 xops
[0] = stack_pointer_rtx
;
12630 xops
[2] = const0_rtx
;
12631 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops
);
12633 /* Test if TEST_ADDR == LAST_ADDR. */
12636 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
12639 fputs ("\tjne\t", asm_out_file
);
12640 assemble_name_raw (asm_out_file
, loop_lab
);
12641 fputc ('\n', asm_out_file
);
12646 /* Finalize stack_realign_needed and frame_pointer_needed flags, which
12647 will guide prologue/epilogue to be generated in correct form. */
12650 ix86_finalize_stack_frame_flags (void)
12652 /* Check if stack realign is really needed after reload, and
12653 stores result in cfun */
12654 unsigned int incoming_stack_boundary
12655 = (crtl
->parm_stack_boundary
> ix86_incoming_stack_boundary
12656 ? crtl
->parm_stack_boundary
: ix86_incoming_stack_boundary
);
12657 unsigned int stack_alignment
12658 = (crtl
->is_leaf
&& !ix86_current_function_calls_tls_descriptor
12659 ? crtl
->max_used_stack_slot_alignment
12660 : crtl
->stack_alignment_needed
);
12661 unsigned int stack_realign
12662 = (incoming_stack_boundary
< stack_alignment
);
12663 bool recompute_frame_layout_p
= false;
12665 if (crtl
->stack_realign_finalized
)
12667 /* After stack_realign_needed is finalized, we can't no longer
12669 gcc_assert (crtl
->stack_realign_needed
== stack_realign
);
12673 /* If the only reason for frame_pointer_needed is that we conservatively
12674 assumed stack realignment might be needed or -fno-omit-frame-pointer
12675 is used, but in the end nothing that needed the stack alignment had
12676 been spilled nor stack access, clear frame_pointer_needed and say we
12677 don't need stack realignment. */
12678 if ((stack_realign
|| !flag_omit_frame_pointer
)
12679 && frame_pointer_needed
12681 && crtl
->sp_is_unchanging
12682 && !ix86_current_function_calls_tls_descriptor
12683 && !crtl
->accesses_prior_frames
12684 && !cfun
->calls_alloca
12685 && !crtl
->calls_eh_return
12686 /* See ira_setup_eliminable_regset for the rationale. */
12687 && !(STACK_CHECK_MOVING_SP
12688 && flag_stack_check
12690 && cfun
->can_throw_non_call_exceptions
)
12691 && !ix86_frame_pointer_required ()
12692 && get_frame_size () == 0
12693 && ix86_nsaved_sseregs () == 0
12694 && ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
== 0)
12696 HARD_REG_SET set_up_by_prologue
, prologue_used
;
12699 CLEAR_HARD_REG_SET (prologue_used
);
12700 CLEAR_HARD_REG_SET (set_up_by_prologue
);
12701 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, STACK_POINTER_REGNUM
);
12702 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, ARG_POINTER_REGNUM
);
12703 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
,
12704 HARD_FRAME_POINTER_REGNUM
);
12706 /* The preferred stack alignment is the minimum stack alignment. */
12707 if (stack_alignment
> crtl
->preferred_stack_boundary
)
12708 stack_alignment
= crtl
->preferred_stack_boundary
;
12710 bool require_stack_frame
= false;
12712 FOR_EACH_BB_FN (bb
, cfun
)
12715 FOR_BB_INSNS (bb
, insn
)
12716 if (NONDEBUG_INSN_P (insn
)
12717 && requires_stack_frame_p (insn
, prologue_used
,
12718 set_up_by_prologue
))
12720 require_stack_frame
= true;
12724 /* Find the maximum stack alignment. */
12725 subrtx_iterator::array_type array
;
12726 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), ALL
)
12728 && (reg_mentioned_p (stack_pointer_rtx
,
12730 || reg_mentioned_p (frame_pointer_rtx
,
12733 unsigned int alignment
= MEM_ALIGN (*iter
);
12734 if (alignment
> stack_alignment
)
12735 stack_alignment
= alignment
;
12741 if (require_stack_frame
)
12743 /* Stack frame is required. If stack alignment needed is less
12744 than incoming stack boundary, don't realign stack. */
12745 stack_realign
= incoming_stack_boundary
< stack_alignment
;
12746 if (!stack_realign
)
12748 crtl
->max_used_stack_slot_alignment
12749 = incoming_stack_boundary
;
12750 crtl
->stack_alignment_needed
12751 = incoming_stack_boundary
;
12752 /* Also update preferred_stack_boundary for leaf
12754 crtl
->preferred_stack_boundary
12755 = incoming_stack_boundary
;
12760 /* If drap has been set, but it actually isn't live at the
12761 start of the function, there is no reason to set it up. */
12762 if (crtl
->drap_reg
)
12764 basic_block bb
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->next_bb
;
12765 if (! REGNO_REG_SET_P (DF_LR_IN (bb
),
12766 REGNO (crtl
->drap_reg
)))
12768 crtl
->drap_reg
= NULL_RTX
;
12769 crtl
->need_drap
= false;
12773 cfun
->machine
->no_drap_save_restore
= true;
12775 frame_pointer_needed
= false;
12776 stack_realign
= false;
12777 crtl
->max_used_stack_slot_alignment
= incoming_stack_boundary
;
12778 crtl
->stack_alignment_needed
= incoming_stack_boundary
;
12779 crtl
->stack_alignment_estimated
= incoming_stack_boundary
;
12780 if (crtl
->preferred_stack_boundary
> incoming_stack_boundary
)
12781 crtl
->preferred_stack_boundary
= incoming_stack_boundary
;
12782 df_finish_pass (true);
12783 df_scan_alloc (NULL
);
12785 df_compute_regs_ever_live (true);
12788 if (flag_var_tracking
)
12790 /* Since frame pointer is no longer available, replace it with
12791 stack pointer - UNITS_PER_WORD in debug insns. */
12793 for (ref
= DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM
);
12796 next
= DF_REF_NEXT_REG (ref
);
12797 if (!DF_REF_INSN_INFO (ref
))
12800 /* Make sure the next ref is for a different instruction,
12801 so that we're not affected by the rescan. */
12802 rtx_insn
*insn
= DF_REF_INSN (ref
);
12803 while (next
&& DF_REF_INSN (next
) == insn
)
12804 next
= DF_REF_NEXT_REG (next
);
12806 if (DEBUG_INSN_P (insn
))
12808 bool changed
= false;
12809 for (; ref
!= next
; ref
= DF_REF_NEXT_REG (ref
))
12811 rtx
*loc
= DF_REF_LOC (ref
);
12812 if (*loc
== hard_frame_pointer_rtx
)
12814 *loc
= plus_constant (Pmode
,
12821 df_insn_rescan (insn
);
12826 recompute_frame_layout_p
= true;
12830 if (crtl
->stack_realign_needed
!= stack_realign
)
12831 recompute_frame_layout_p
= true;
12832 crtl
->stack_realign_needed
= stack_realign
;
12833 crtl
->stack_realign_finalized
= true;
12834 if (recompute_frame_layout_p
)
12835 ix86_compute_frame_layout ();
12838 /* Delete SET_GOT right after entry block if it is allocated to reg. */
12841 ix86_elim_entry_set_got (rtx reg
)
12843 basic_block bb
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->next_bb
;
12844 rtx_insn
*c_insn
= BB_HEAD (bb
);
12845 if (!NONDEBUG_INSN_P (c_insn
))
12846 c_insn
= next_nonnote_nondebug_insn (c_insn
);
12847 if (c_insn
&& NONJUMP_INSN_P (c_insn
))
12849 rtx pat
= PATTERN (c_insn
);
12850 if (GET_CODE (pat
) == PARALLEL
)
12852 rtx vec
= XVECEXP (pat
, 0, 0);
12853 if (GET_CODE (vec
) == SET
12854 && XINT (XEXP (vec
, 1), 1) == UNSPEC_SET_GOT
12855 && REGNO (XEXP (vec
, 0)) == REGNO (reg
))
12856 delete_insn (c_insn
);
12862 gen_frame_set (rtx reg
, rtx frame_reg
, int offset
, bool store
)
12867 addr
= gen_rtx_PLUS (Pmode
, frame_reg
, GEN_INT (offset
));
12868 mem
= gen_frame_mem (GET_MODE (reg
), offset
? addr
: frame_reg
);
12869 return gen_rtx_SET (store
? mem
: reg
, store
? reg
: mem
);
12873 gen_frame_load (rtx reg
, rtx frame_reg
, int offset
)
12875 return gen_frame_set (reg
, frame_reg
, offset
, false);
12879 gen_frame_store (rtx reg
, rtx frame_reg
, int offset
)
12881 return gen_frame_set (reg
, frame_reg
, offset
, true);
12885 ix86_emit_outlined_ms2sysv_save (const struct ix86_frame
&frame
)
12887 struct machine_function
*m
= cfun
->machine
;
12888 const unsigned ncregs
= NUM_X86_64_MS_CLOBBERED_REGS
12889 + m
->call_ms2sysv_extra_regs
;
12890 rtvec v
= rtvec_alloc (ncregs
+ 1);
12891 unsigned int align
, i
, vi
= 0;
12894 rtx rax
= gen_rtx_REG (word_mode
, AX_REG
);
12895 const struct xlogue_layout
&xlogue
= xlogue_layout::get_instance ();
12897 /* AL should only be live with sysv_abi. */
12898 gcc_assert (!ix86_eax_live_at_start_p ());
12899 gcc_assert (m
->fs
.sp_offset
>= frame
.sse_reg_save_offset
);
12901 /* Setup RAX as the stub's base pointer. We use stack_realign_offset rather
12902 we've actually realigned the stack or not. */
12903 align
= GET_MODE_ALIGNMENT (V4SFmode
);
12904 addr
= choose_baseaddr (frame
.stack_realign_offset
12905 + xlogue
.get_stub_ptr_offset (), &align
, AX_REG
);
12906 gcc_assert (align
>= GET_MODE_ALIGNMENT (V4SFmode
));
12908 emit_insn (gen_rtx_SET (rax
, addr
));
12910 /* Get the stub symbol. */
12911 sym
= xlogue
.get_stub_rtx (frame_pointer_needed
? XLOGUE_STUB_SAVE_HFP
12912 : XLOGUE_STUB_SAVE
);
12913 RTVEC_ELT (v
, vi
++) = gen_rtx_USE (VOIDmode
, sym
);
12915 for (i
= 0; i
< ncregs
; ++i
)
12917 const xlogue_layout::reginfo
&r
= xlogue
.get_reginfo (i
);
12918 rtx reg
= gen_rtx_REG ((SSE_REGNO_P (r
.regno
) ? V4SFmode
: word_mode
),
12920 RTVEC_ELT (v
, vi
++) = gen_frame_store (reg
, rax
, -r
.offset
);
12923 gcc_assert (vi
== (unsigned)GET_NUM_ELEM (v
));
12925 insn
= emit_insn (gen_rtx_PARALLEL (VOIDmode
, v
));
12926 RTX_FRAME_RELATED_P (insn
) = true;
12929 /* Expand the prologue into a bunch of separate insns. */
12932 ix86_expand_prologue (void)
12934 struct machine_function
*m
= cfun
->machine
;
12936 struct ix86_frame frame
;
12937 HOST_WIDE_INT allocate
;
12938 bool int_registers_saved
;
12939 bool sse_registers_saved
;
12940 bool save_stub_call_needed
;
12941 rtx static_chain
= NULL_RTX
;
12943 if (ix86_function_naked (current_function_decl
))
12946 ix86_finalize_stack_frame_flags ();
12948 /* DRAP should not coexist with stack_realign_fp */
12949 gcc_assert (!(crtl
->drap_reg
&& stack_realign_fp
));
12951 memset (&m
->fs
, 0, sizeof (m
->fs
));
12953 /* Initialize CFA state for before the prologue. */
12954 m
->fs
.cfa_reg
= stack_pointer_rtx
;
12955 m
->fs
.cfa_offset
= INCOMING_FRAME_SP_OFFSET
;
12957 /* Track SP offset to the CFA. We continue tracking this after we've
12958 swapped the CFA register away from SP. In the case of re-alignment
12959 this is fudged; we're interested to offsets within the local frame. */
12960 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
12961 m
->fs
.sp_valid
= true;
12962 m
->fs
.sp_realigned
= false;
12966 if (!TARGET_64BIT
&& ix86_function_ms_hook_prologue (current_function_decl
))
12968 /* We should have already generated an error for any use of
12969 ms_hook on a nested function. */
12970 gcc_checking_assert (!ix86_static_chain_on_stack
);
12972 /* Check if profiling is active and we shall use profiling before
12973 prologue variant. If so sorry. */
12974 if (crtl
->profile
&& flag_fentry
!= 0)
12975 sorry ("ms_hook_prologue attribute isn%'t compatible "
12976 "with -mfentry for 32-bit");
12978 /* In ix86_asm_output_function_label we emitted:
12979 8b ff movl.s %edi,%edi
12981 8b ec movl.s %esp,%ebp
12983 This matches the hookable function prologue in Win32 API
12984 functions in Microsoft Windows XP Service Pack 2 and newer.
12985 Wine uses this to enable Windows apps to hook the Win32 API
12986 functions provided by Wine.
12988 What that means is that we've already set up the frame pointer. */
12990 if (frame_pointer_needed
12991 && !(crtl
->drap_reg
&& crtl
->stack_realign_needed
))
12995 /* We've decided to use the frame pointer already set up.
12996 Describe this to the unwinder by pretending that both
12997 push and mov insns happen right here.
12999 Putting the unwind info here at the end of the ms_hook
13000 is done so that we can make absolutely certain we get
13001 the required byte sequence at the start of the function,
13002 rather than relying on an assembler that can produce
13003 the exact encoding required.
13005 However it does mean (in the unpatched case) that we have
13006 a 1 insn window where the asynchronous unwind info is
13007 incorrect. However, if we placed the unwind info at
13008 its correct location we would have incorrect unwind info
13009 in the patched case. Which is probably all moot since
13010 I don't expect Wine generates dwarf2 unwind info for the
13011 system libraries that use this feature. */
13013 insn
= emit_insn (gen_blockage ());
13015 push
= gen_push (hard_frame_pointer_rtx
);
13016 mov
= gen_rtx_SET (hard_frame_pointer_rtx
,
13017 stack_pointer_rtx
);
13018 RTX_FRAME_RELATED_P (push
) = 1;
13019 RTX_FRAME_RELATED_P (mov
) = 1;
13021 RTX_FRAME_RELATED_P (insn
) = 1;
13022 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
13023 gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, push
, mov
)));
13025 /* Note that gen_push incremented m->fs.cfa_offset, even
13026 though we didn't emit the push insn here. */
13027 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
13028 m
->fs
.fp_offset
= m
->fs
.cfa_offset
;
13029 m
->fs
.fp_valid
= true;
13033 /* The frame pointer is not needed so pop %ebp again.
13034 This leaves us with a pristine state. */
13035 emit_insn (gen_pop (hard_frame_pointer_rtx
));
13039 /* The first insn of a function that accepts its static chain on the
13040 stack is to push the register that would be filled in by a direct
13041 call. This insn will be skipped by the trampoline. */
13042 else if (ix86_static_chain_on_stack
)
13044 static_chain
= ix86_static_chain (cfun
->decl
, false);
13045 insn
= emit_insn (gen_push (static_chain
));
13046 emit_insn (gen_blockage ());
13048 /* We don't want to interpret this push insn as a register save,
13049 only as a stack adjustment. The real copy of the register as
13050 a save will be done later, if needed. */
13051 t
= plus_constant (Pmode
, stack_pointer_rtx
, -UNITS_PER_WORD
);
13052 t
= gen_rtx_SET (stack_pointer_rtx
, t
);
13053 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, t
);
13054 RTX_FRAME_RELATED_P (insn
) = 1;
13057 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
13058 of DRAP is needed and stack realignment is really needed after reload */
13059 if (stack_realign_drap
)
13061 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
13063 /* Can't use DRAP in interrupt function. */
13064 if (cfun
->machine
->func_type
!= TYPE_NORMAL
)
13065 sorry ("Dynamic Realign Argument Pointer (DRAP) not supported "
13066 "in interrupt service routine. This may be worked "
13067 "around by avoiding functions with aggregate return.");
13069 /* Only need to push parameter pointer reg if it is caller saved. */
13070 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
13072 /* Push arg pointer reg */
13073 insn
= emit_insn (gen_push (crtl
->drap_reg
));
13074 RTX_FRAME_RELATED_P (insn
) = 1;
13077 /* Grab the argument pointer. */
13078 t
= plus_constant (Pmode
, stack_pointer_rtx
, m
->fs
.sp_offset
);
13079 insn
= emit_insn (gen_rtx_SET (crtl
->drap_reg
, t
));
13080 RTX_FRAME_RELATED_P (insn
) = 1;
13081 m
->fs
.cfa_reg
= crtl
->drap_reg
;
13082 m
->fs
.cfa_offset
= 0;
13084 /* Align the stack. */
13085 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
13087 GEN_INT (-align_bytes
)));
13088 RTX_FRAME_RELATED_P (insn
) = 1;
13090 /* Replicate the return address on the stack so that return
13091 address can be reached via (argp - 1) slot. This is needed
13092 to implement macro RETURN_ADDR_RTX and intrinsic function
13093 expand_builtin_return_addr etc. */
13094 t
= plus_constant (Pmode
, crtl
->drap_reg
, -UNITS_PER_WORD
);
13095 t
= gen_frame_mem (word_mode
, t
);
13096 insn
= emit_insn (gen_push (t
));
13097 RTX_FRAME_RELATED_P (insn
) = 1;
13099 /* For the purposes of frame and register save area addressing,
13100 we've started over with a new frame. */
13101 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
13102 m
->fs
.realigned
= true;
13106 /* Replicate static chain on the stack so that static chain
13107 can be reached via (argp - 2) slot. This is needed for
13108 nested function with stack realignment. */
13109 insn
= emit_insn (gen_push (static_chain
));
13110 RTX_FRAME_RELATED_P (insn
) = 1;
13114 int_registers_saved
= (frame
.nregs
== 0);
13115 sse_registers_saved
= (frame
.nsseregs
== 0);
13116 save_stub_call_needed
= (m
->call_ms2sysv
);
13117 gcc_assert (sse_registers_saved
|| !save_stub_call_needed
);
13119 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
13121 /* Note: AT&T enter does NOT have reversed args. Enter is probably
13122 slower on all targets. Also sdb didn't like it. */
13123 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
13124 RTX_FRAME_RELATED_P (insn
) = 1;
13126 /* Push registers now, before setting the frame pointer
13128 if (!int_registers_saved
13130 && !frame
.save_regs_using_mov
)
13132 ix86_emit_save_regs ();
13133 int_registers_saved
= true;
13134 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
13137 if (m
->fs
.sp_offset
== frame
.hard_frame_pointer_offset
)
13139 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
13140 RTX_FRAME_RELATED_P (insn
) = 1;
13142 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
13143 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
13144 m
->fs
.fp_offset
= m
->fs
.sp_offset
;
13145 m
->fs
.fp_valid
= true;
13149 if (!int_registers_saved
)
13151 /* If saving registers via PUSH, do so now. */
13152 if (!frame
.save_regs_using_mov
)
13154 ix86_emit_save_regs ();
13155 int_registers_saved
= true;
13156 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
13159 /* When using red zone we may start register saving before allocating
13160 the stack frame saving one cycle of the prologue. However, avoid
13161 doing this if we have to probe the stack; at least on x86_64 the
13162 stack probe can turn into a call that clobbers a red zone location. */
13163 else if (ix86_using_red_zone ()
13164 && (! TARGET_STACK_PROBE
13165 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
))
13167 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
13168 int_registers_saved
= true;
13172 if (stack_realign_fp
)
13174 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
13175 gcc_assert (align_bytes
> MIN_STACK_BOUNDARY
/ BITS_PER_UNIT
);
13177 /* Record last valid frame pointer offset. */
13178 m
->fs
.sp_realigned_fp_last
= frame
.reg_save_offset
;
13180 /* The computation of the size of the re-aligned stack frame means
13181 that we must allocate the size of the register save area before
13182 performing the actual alignment. Otherwise we cannot guarantee
13183 that there's enough storage above the realignment point. */
13184 allocate
= frame
.reg_save_offset
- m
->fs
.sp_offset
13185 + frame
.stack_realign_allocate
;
13187 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
13188 GEN_INT (-allocate
), -1, false);
13190 /* Align the stack. */
13191 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
13193 GEN_INT (-align_bytes
)));
13194 m
->fs
.sp_offset
= ROUND_UP (m
->fs
.sp_offset
, align_bytes
);
13195 m
->fs
.sp_realigned_offset
= m
->fs
.sp_offset
13196 - frame
.stack_realign_allocate
;
13197 /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset.
13198 Beyond this point, stack access should be done via choose_baseaddr or
13199 by using sp_valid_at and fp_valid_at to determine the correct base
13200 register. Henceforth, any CFA offset should be thought of as logical
13201 and not physical. */
13202 gcc_assert (m
->fs
.sp_realigned_offset
>= m
->fs
.sp_realigned_fp_last
);
13203 gcc_assert (m
->fs
.sp_realigned_offset
== frame
.stack_realign_offset
);
13204 m
->fs
.sp_realigned
= true;
13206 /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which
13207 is needed to describe where a register is saved using a realigned
13208 stack pointer, so we need to invalidate the stack pointer for that
13211 m
->fs
.sp_valid
= false;
13213 /* If SP offset is non-immediate after allocation of the stack frame,
13214 then emit SSE saves or stub call prior to allocating the rest of the
13215 stack frame. This is less efficient for the out-of-line stub because
13216 we can't combine allocations across the call barrier, but it's better
13217 than using a scratch register. */
13218 else if (!x86_64_immediate_operand (GEN_INT (frame
.stack_pointer_offset
13219 - m
->fs
.sp_realigned_offset
),
13222 if (!sse_registers_saved
)
13224 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
13225 sse_registers_saved
= true;
13227 else if (save_stub_call_needed
)
13229 ix86_emit_outlined_ms2sysv_save (frame
);
13230 save_stub_call_needed
= false;
13235 allocate
= frame
.stack_pointer_offset
- m
->fs
.sp_offset
;
13237 if (flag_stack_usage_info
)
13239 /* We start to count from ARG_POINTER. */
13240 HOST_WIDE_INT stack_size
= frame
.stack_pointer_offset
;
13242 /* If it was realigned, take into account the fake frame. */
13243 if (stack_realign_drap
)
13245 if (ix86_static_chain_on_stack
)
13246 stack_size
+= UNITS_PER_WORD
;
13248 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
13249 stack_size
+= UNITS_PER_WORD
;
13251 /* This over-estimates by 1 minimal-stack-alignment-unit but
13252 mitigates that by counting in the new return address slot. */
13253 current_function_dynamic_stack_size
13254 += crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
13257 current_function_static_stack_size
= stack_size
;
13260 /* On SEH target with very large frame size, allocate an area to save
13261 SSE registers (as the very large allocation won't be described). */
13263 && frame
.stack_pointer_offset
> SEH_MAX_FRAME_SIZE
13264 && !sse_registers_saved
)
13266 HOST_WIDE_INT sse_size
=
13267 frame
.sse_reg_save_offset
- frame
.reg_save_offset
;
13269 gcc_assert (int_registers_saved
);
13271 /* No need to do stack checking as the area will be immediately
13273 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
13274 GEN_INT (-sse_size
), -1,
13275 m
->fs
.cfa_reg
== stack_pointer_rtx
);
13276 allocate
-= sse_size
;
13277 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
13278 sse_registers_saved
= true;
13281 /* The stack has already been decremented by the instruction calling us
13282 so probe if the size is non-negative to preserve the protection area. */
13284 && (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
13285 || flag_stack_clash_protection
))
13287 /* This assert wants to verify that integer registers were saved
13288 prior to probing. This is necessary when probing may be implemented
13289 as a function call (Windows). It is not necessary for stack clash
13290 protection probing. */
13291 if (!flag_stack_clash_protection
)
13292 gcc_assert (int_registers_saved
);
13294 if (flag_stack_clash_protection
)
13296 ix86_adjust_stack_and_probe_stack_clash (allocate
);
13299 else if (STACK_CHECK_MOVING_SP
)
13301 if (!(crtl
->is_leaf
&& !cfun
->calls_alloca
13302 && allocate
<= get_probe_interval ()))
13304 ix86_adjust_stack_and_probe (allocate
);
13310 HOST_WIDE_INT size
= allocate
;
13312 if (TARGET_64BIT
&& size
>= HOST_WIDE_INT_C (0x80000000))
13313 size
= 0x80000000 - get_stack_check_protect () - 1;
13315 if (TARGET_STACK_PROBE
)
13317 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
13319 if (size
> get_probe_interval ())
13320 ix86_emit_probe_stack_range (0, size
);
13323 ix86_emit_probe_stack_range (0,
13324 size
+ get_stack_check_protect ());
13328 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
13330 if (size
> get_probe_interval ()
13331 && size
> get_stack_check_protect ())
13332 ix86_emit_probe_stack_range (get_stack_check_protect (),
13333 size
- get_stack_check_protect ());
13336 ix86_emit_probe_stack_range (get_stack_check_protect (), size
);
13343 else if (!ix86_target_stack_probe ()
13344 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
)
13346 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
13347 GEN_INT (-allocate
), -1,
13348 m
->fs
.cfa_reg
== stack_pointer_rtx
);
13352 rtx eax
= gen_rtx_REG (Pmode
, AX_REG
);
13354 rtx (*adjust_stack_insn
)(rtx
, rtx
, rtx
);
13355 const bool sp_is_cfa_reg
= (m
->fs
.cfa_reg
== stack_pointer_rtx
);
13356 bool eax_live
= ix86_eax_live_at_start_p ();
13357 bool r10_live
= false;
13360 r10_live
= (DECL_STATIC_CHAIN (current_function_decl
) != 0);
13364 insn
= emit_insn (gen_push (eax
));
13365 allocate
-= UNITS_PER_WORD
;
13366 /* Note that SEH directives need to continue tracking the stack
13367 pointer even after the frame pointer has been set up. */
13368 if (sp_is_cfa_reg
|| TARGET_SEH
)
13371 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
13372 RTX_FRAME_RELATED_P (insn
) = 1;
13373 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
13374 gen_rtx_SET (stack_pointer_rtx
,
13375 plus_constant (Pmode
, stack_pointer_rtx
,
13376 -UNITS_PER_WORD
)));
13382 r10
= gen_rtx_REG (Pmode
, R10_REG
);
13383 insn
= emit_insn (gen_push (r10
));
13384 allocate
-= UNITS_PER_WORD
;
13385 if (sp_is_cfa_reg
|| TARGET_SEH
)
13388 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
13389 RTX_FRAME_RELATED_P (insn
) = 1;
13390 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
13391 gen_rtx_SET (stack_pointer_rtx
,
13392 plus_constant (Pmode
, stack_pointer_rtx
,
13393 -UNITS_PER_WORD
)));
13397 emit_move_insn (eax
, GEN_INT (allocate
));
13398 emit_insn (ix86_gen_allocate_stack_worker (eax
, eax
));
13400 /* Use the fact that AX still contains ALLOCATE. */
13401 adjust_stack_insn
= (Pmode
== DImode
13402 ? gen_pro_epilogue_adjust_stack_di_sub
13403 : gen_pro_epilogue_adjust_stack_si_sub
);
13405 insn
= emit_insn (adjust_stack_insn (stack_pointer_rtx
,
13406 stack_pointer_rtx
, eax
));
13408 if (sp_is_cfa_reg
|| TARGET_SEH
)
13411 m
->fs
.cfa_offset
+= allocate
;
13412 RTX_FRAME_RELATED_P (insn
) = 1;
13413 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
13414 gen_rtx_SET (stack_pointer_rtx
,
13415 plus_constant (Pmode
, stack_pointer_rtx
,
13418 m
->fs
.sp_offset
+= allocate
;
13420 /* Use stack_pointer_rtx for relative addressing so that code
13421 works for realigned stack, too. */
13422 if (r10_live
&& eax_live
)
13424 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, eax
);
13425 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
13426 gen_frame_mem (word_mode
, t
));
13427 t
= plus_constant (Pmode
, t
, UNITS_PER_WORD
);
13428 emit_move_insn (gen_rtx_REG (word_mode
, AX_REG
),
13429 gen_frame_mem (word_mode
, t
));
13431 else if (eax_live
|| r10_live
)
13433 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, eax
);
13434 emit_move_insn (gen_rtx_REG (word_mode
,
13435 (eax_live
? AX_REG
: R10_REG
)),
13436 gen_frame_mem (word_mode
, t
));
13439 gcc_assert (m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
13441 /* If we havn't already set up the frame pointer, do so now. */
13442 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
13444 insn
= ix86_gen_add3 (hard_frame_pointer_rtx
, stack_pointer_rtx
,
13445 GEN_INT (frame
.stack_pointer_offset
13446 - frame
.hard_frame_pointer_offset
));
13447 insn
= emit_insn (insn
);
13448 RTX_FRAME_RELATED_P (insn
) = 1;
13449 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
13451 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
13452 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
13453 m
->fs
.fp_offset
= frame
.hard_frame_pointer_offset
;
13454 m
->fs
.fp_valid
= true;
13457 if (!int_registers_saved
)
13458 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
13459 if (!sse_registers_saved
)
13460 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
13461 else if (save_stub_call_needed
)
13462 ix86_emit_outlined_ms2sysv_save (frame
);
13464 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
13466 if (!TARGET_64BIT
&& pic_offset_table_rtx
&& crtl
->profile
&& !flag_fentry
)
13468 rtx pic
= gen_rtx_REG (Pmode
, REAL_PIC_OFFSET_TABLE_REGNUM
);
13469 insn
= emit_insn (gen_set_got (pic
));
13470 RTX_FRAME_RELATED_P (insn
) = 1;
13471 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
13472 emit_insn (gen_prologue_use (pic
));
13473 /* Deleting already emmitted SET_GOT if exist and allocated to
13474 REAL_PIC_OFFSET_TABLE_REGNUM. */
13475 ix86_elim_entry_set_got (pic
);
13478 if (crtl
->drap_reg
&& !crtl
->stack_realign_needed
)
13480 /* vDRAP is setup but after reload it turns out stack realign
13481 isn't necessary, here we will emit prologue to setup DRAP
13482 without stack realign adjustment */
13483 t
= choose_baseaddr (0, NULL
);
13484 emit_insn (gen_rtx_SET (crtl
->drap_reg
, t
));
13487 /* Prevent instructions from being scheduled into register save push
13488 sequence when access to the redzone area is done through frame pointer.
13489 The offset between the frame pointer and the stack pointer is calculated
13490 relative to the value of the stack pointer at the end of the function
13491 prologue, and moving instructions that access redzone area via frame
13492 pointer inside push sequence violates this assumption. */
13493 if (frame_pointer_needed
&& frame
.red_zone_size
)
13494 emit_insn (gen_memory_blockage ());
13496 /* SEH requires that the prologue end within 256 bytes of the start of
13497 the function. Prevent instruction schedules that would extend that.
13498 Further, prevent alloca modifications to the stack pointer from being
13499 combined with prologue modifications. */
13501 emit_insn (gen_prologue_use (stack_pointer_rtx
));
13504 /* Emit code to restore REG using a POP insn. */
13507 ix86_emit_restore_reg_using_pop (rtx reg
)
13509 struct machine_function
*m
= cfun
->machine
;
13510 rtx_insn
*insn
= emit_insn (gen_pop (reg
));
13512 ix86_add_cfa_restore_note (insn
, reg
, m
->fs
.sp_offset
);
13513 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
13515 if (m
->fs
.cfa_reg
== crtl
->drap_reg
13516 && REGNO (reg
) == REGNO (crtl
->drap_reg
))
13518 /* Previously we'd represented the CFA as an expression
13519 like *(%ebp - 8). We've just popped that value from
13520 the stack, which means we need to reset the CFA to
13521 the drap register. This will remain until we restore
13522 the stack pointer. */
13523 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
13524 RTX_FRAME_RELATED_P (insn
) = 1;
13526 /* This means that the DRAP register is valid for addressing too. */
13527 m
->fs
.drap_valid
= true;
13531 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
13533 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
13534 x
= gen_rtx_SET (stack_pointer_rtx
, x
);
13535 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
13536 RTX_FRAME_RELATED_P (insn
) = 1;
13538 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
13541 /* When the frame pointer is the CFA, and we pop it, we are
13542 swapping back to the stack pointer as the CFA. This happens
13543 for stack frames that don't allocate other data, so we assume
13544 the stack pointer is now pointing at the return address, i.e.
13545 the function entry state, which makes the offset be 1 word. */
13546 if (reg
== hard_frame_pointer_rtx
)
13548 m
->fs
.fp_valid
= false;
13549 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
13551 m
->fs
.cfa_reg
= stack_pointer_rtx
;
13552 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
13554 add_reg_note (insn
, REG_CFA_DEF_CFA
,
13555 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
13556 GEN_INT (m
->fs
.cfa_offset
)));
13557 RTX_FRAME_RELATED_P (insn
) = 1;
13562 /* Emit code to restore saved registers using POP insns. */
13565 ix86_emit_restore_regs_using_pop (void)
13567 unsigned int regno
;
13569 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
13570 if (GENERAL_REGNO_P (regno
) && ix86_save_reg (regno
, false, true))
13571 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode
, regno
));
13574 /* Emit code and notes for the LEAVE instruction. If insn is non-null,
13575 omits the emit and only attaches the notes. */
13578 ix86_emit_leave (rtx_insn
*insn
)
13580 struct machine_function
*m
= cfun
->machine
;
13582 insn
= emit_insn (ix86_gen_leave ());
13584 ix86_add_queued_cfa_restore_notes (insn
);
13586 gcc_assert (m
->fs
.fp_valid
);
13587 m
->fs
.sp_valid
= true;
13588 m
->fs
.sp_realigned
= false;
13589 m
->fs
.sp_offset
= m
->fs
.fp_offset
- UNITS_PER_WORD
;
13590 m
->fs
.fp_valid
= false;
13592 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
13594 m
->fs
.cfa_reg
= stack_pointer_rtx
;
13595 m
->fs
.cfa_offset
= m
->fs
.sp_offset
;
13597 add_reg_note (insn
, REG_CFA_DEF_CFA
,
13598 plus_constant (Pmode
, stack_pointer_rtx
,
13600 RTX_FRAME_RELATED_P (insn
) = 1;
13602 ix86_add_cfa_restore_note (insn
, hard_frame_pointer_rtx
,
13606 /* Emit code to restore saved registers using MOV insns.
13607 First register is restored from CFA - CFA_OFFSET. */
13609 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset
,
13610 bool maybe_eh_return
)
13612 struct machine_function
*m
= cfun
->machine
;
13613 unsigned int regno
;
13615 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
13616 if (GENERAL_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
, true))
13618 rtx reg
= gen_rtx_REG (word_mode
, regno
);
13622 mem
= choose_baseaddr (cfa_offset
, NULL
);
13623 mem
= gen_frame_mem (word_mode
, mem
);
13624 insn
= emit_move_insn (reg
, mem
);
13626 if (m
->fs
.cfa_reg
== crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
13628 /* Previously we'd represented the CFA as an expression
13629 like *(%ebp - 8). We've just popped that value from
13630 the stack, which means we need to reset the CFA to
13631 the drap register. This will remain until we restore
13632 the stack pointer. */
13633 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
13634 RTX_FRAME_RELATED_P (insn
) = 1;
13636 /* This means that the DRAP register is valid for addressing. */
13637 m
->fs
.drap_valid
= true;
13640 ix86_add_cfa_restore_note (NULL
, reg
, cfa_offset
);
13642 cfa_offset
-= UNITS_PER_WORD
;
13646 /* Emit code to restore saved registers using MOV insns.
13647 First register is restored from CFA - CFA_OFFSET. */
13649 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
,
13650 bool maybe_eh_return
)
13652 unsigned int regno
;
13654 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
13655 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
, true))
13657 rtx reg
= gen_rtx_REG (V4SFmode
, regno
);
13659 unsigned int align
= GET_MODE_ALIGNMENT (V4SFmode
);
13661 mem
= choose_baseaddr (cfa_offset
, &align
);
13662 mem
= gen_rtx_MEM (V4SFmode
, mem
);
13664 /* The location aligment depends upon the base register. */
13665 align
= MIN (GET_MODE_ALIGNMENT (V4SFmode
), align
);
13666 gcc_assert (! (cfa_offset
& (align
/ BITS_PER_UNIT
- 1)));
13667 set_mem_align (mem
, align
);
13668 emit_insn (gen_rtx_SET (reg
, mem
));
13670 ix86_add_cfa_restore_note (NULL
, reg
, cfa_offset
);
13672 cfa_offset
-= GET_MODE_SIZE (V4SFmode
);
13677 ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame
&frame
,
13678 bool use_call
, int style
)
13680 struct machine_function
*m
= cfun
->machine
;
13681 const unsigned ncregs
= NUM_X86_64_MS_CLOBBERED_REGS
13682 + m
->call_ms2sysv_extra_regs
;
13684 unsigned int elems_needed
, align
, i
, vi
= 0;
13687 rtx rsi
= gen_rtx_REG (word_mode
, SI_REG
);
13688 rtx r10
= NULL_RTX
;
13689 const struct xlogue_layout
&xlogue
= xlogue_layout::get_instance ();
13690 HOST_WIDE_INT stub_ptr_offset
= xlogue
.get_stub_ptr_offset ();
13691 HOST_WIDE_INT rsi_offset
= frame
.stack_realign_offset
+ stub_ptr_offset
;
13692 rtx rsi_frame_load
= NULL_RTX
;
13693 HOST_WIDE_INT rsi_restore_offset
= (HOST_WIDE_INT
)-1;
13694 enum xlogue_stub stub
;
13696 gcc_assert (!m
->fs
.fp_valid
|| frame_pointer_needed
);
13698 /* If using a realigned stack, we should never start with padding. */
13699 gcc_assert (!stack_realign_fp
|| !xlogue
.get_stack_align_off_in ());
13701 /* Setup RSI as the stub's base pointer. */
13702 align
= GET_MODE_ALIGNMENT (V4SFmode
);
13703 tmp
= choose_baseaddr (rsi_offset
, &align
, SI_REG
);
13704 gcc_assert (align
>= GET_MODE_ALIGNMENT (V4SFmode
));
13706 emit_insn (gen_rtx_SET (rsi
, tmp
));
13708 /* Get a symbol for the stub. */
13709 if (frame_pointer_needed
)
13710 stub
= use_call
? XLOGUE_STUB_RESTORE_HFP
13711 : XLOGUE_STUB_RESTORE_HFP_TAIL
;
13713 stub
= use_call
? XLOGUE_STUB_RESTORE
13714 : XLOGUE_STUB_RESTORE_TAIL
;
13715 sym
= xlogue
.get_stub_rtx (stub
);
13717 elems_needed
= ncregs
;
13721 elems_needed
+= frame_pointer_needed
? 5 : 3;
13722 v
= rtvec_alloc (elems_needed
);
13724 /* We call the epilogue stub when we need to pop incoming args or we are
13725 doing a sibling call as the tail. Otherwise, we will emit a jmp to the
13726 epilogue stub and it is the tail-call. */
13728 RTVEC_ELT (v
, vi
++) = gen_rtx_USE (VOIDmode
, sym
);
13731 RTVEC_ELT (v
, vi
++) = ret_rtx
;
13732 RTVEC_ELT (v
, vi
++) = gen_rtx_USE (VOIDmode
, sym
);
13733 if (frame_pointer_needed
)
13735 rtx rbp
= gen_rtx_REG (DImode
, BP_REG
);
13736 gcc_assert (m
->fs
.fp_valid
);
13737 gcc_assert (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
);
13739 tmp
= gen_rtx_PLUS (DImode
, rbp
, GEN_INT (8));
13740 RTVEC_ELT (v
, vi
++) = gen_rtx_SET (stack_pointer_rtx
, tmp
);
13741 RTVEC_ELT (v
, vi
++) = gen_rtx_SET (rbp
, gen_rtx_MEM (DImode
, rbp
));
13742 tmp
= gen_rtx_MEM (BLKmode
, gen_rtx_SCRATCH (VOIDmode
));
13743 RTVEC_ELT (v
, vi
++) = gen_rtx_CLOBBER (VOIDmode
, tmp
);
13747 /* If no hard frame pointer, we set R10 to the SP restore value. */
13748 gcc_assert (!m
->fs
.fp_valid
);
13749 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
13750 gcc_assert (m
->fs
.sp_valid
);
13752 r10
= gen_rtx_REG (DImode
, R10_REG
);
13753 tmp
= gen_rtx_PLUS (Pmode
, rsi
, GEN_INT (stub_ptr_offset
));
13754 emit_insn (gen_rtx_SET (r10
, tmp
));
13756 RTVEC_ELT (v
, vi
++) = gen_rtx_SET (stack_pointer_rtx
, r10
);
13760 /* Generate frame load insns and restore notes. */
13761 for (i
= 0; i
< ncregs
; ++i
)
13763 const xlogue_layout::reginfo
&r
= xlogue
.get_reginfo (i
);
13764 machine_mode mode
= SSE_REGNO_P (r
.regno
) ? V4SFmode
: word_mode
;
13765 rtx reg
, frame_load
;
13767 reg
= gen_rtx_REG (mode
, r
.regno
);
13768 frame_load
= gen_frame_load (reg
, rsi
, r
.offset
);
13770 /* Save RSI frame load insn & note to add last. */
13771 if (r
.regno
== SI_REG
)
13773 gcc_assert (!rsi_frame_load
);
13774 rsi_frame_load
= frame_load
;
13775 rsi_restore_offset
= r
.offset
;
13779 RTVEC_ELT (v
, vi
++) = frame_load
;
13780 ix86_add_cfa_restore_note (NULL
, reg
, r
.offset
);
13784 /* Add RSI frame load & restore note at the end. */
13785 gcc_assert (rsi_frame_load
);
13786 gcc_assert (rsi_restore_offset
!= (HOST_WIDE_INT
)-1);
13787 RTVEC_ELT (v
, vi
++) = rsi_frame_load
;
13788 ix86_add_cfa_restore_note (NULL
, gen_rtx_REG (DImode
, SI_REG
),
13789 rsi_restore_offset
);
13791 /* Finally, for tail-call w/o a hard frame pointer, set SP to R10. */
13792 if (!use_call
&& !frame_pointer_needed
)
13794 gcc_assert (m
->fs
.sp_valid
);
13795 gcc_assert (!m
->fs
.sp_realigned
);
13797 /* At this point, R10 should point to frame.stack_realign_offset. */
13798 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
13799 m
->fs
.cfa_offset
+= m
->fs
.sp_offset
- frame
.stack_realign_offset
;
13800 m
->fs
.sp_offset
= frame
.stack_realign_offset
;
13803 gcc_assert (vi
== (unsigned int)GET_NUM_ELEM (v
));
13804 tmp
= gen_rtx_PARALLEL (VOIDmode
, v
);
13806 insn
= emit_insn (tmp
);
13809 insn
= emit_jump_insn (tmp
);
13810 JUMP_LABEL (insn
) = ret_rtx
;
13812 if (frame_pointer_needed
)
13813 ix86_emit_leave (insn
);
13816 /* Need CFA adjust note. */
13817 tmp
= gen_rtx_SET (stack_pointer_rtx
, r10
);
13818 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, tmp
);
13822 RTX_FRAME_RELATED_P (insn
) = true;
13823 ix86_add_queued_cfa_restore_notes (insn
);
13825 /* If we're not doing a tail-call, we need to adjust the stack. */
13826 if (use_call
&& m
->fs
.sp_valid
)
13828 HOST_WIDE_INT dealloc
= m
->fs
.sp_offset
- frame
.stack_realign_offset
;
13829 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
13830 GEN_INT (dealloc
), style
,
13831 m
->fs
.cfa_reg
== stack_pointer_rtx
);
13835 /* Restore function stack, frame, and registers. */
13838 ix86_expand_epilogue (int style
)
13840 struct machine_function
*m
= cfun
->machine
;
13841 struct machine_frame_state frame_state_save
= m
->fs
;
13842 struct ix86_frame frame
;
13843 bool restore_regs_via_mov
;
13845 bool restore_stub_is_tail
= false;
13847 if (ix86_function_naked (current_function_decl
))
13849 /* The program should not reach this point. */
13850 emit_insn (gen_ud2 ());
13854 ix86_finalize_stack_frame_flags ();
13857 m
->fs
.sp_realigned
= stack_realign_fp
;
13858 m
->fs
.sp_valid
= stack_realign_fp
13859 || !frame_pointer_needed
13860 || crtl
->sp_is_unchanging
;
13861 gcc_assert (!m
->fs
.sp_valid
13862 || m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
13864 /* The FP must be valid if the frame pointer is present. */
13865 gcc_assert (frame_pointer_needed
== m
->fs
.fp_valid
);
13866 gcc_assert (!m
->fs
.fp_valid
13867 || m
->fs
.fp_offset
== frame
.hard_frame_pointer_offset
);
13869 /* We must have *some* valid pointer to the stack frame. */
13870 gcc_assert (m
->fs
.sp_valid
|| m
->fs
.fp_valid
);
13872 /* The DRAP is never valid at this point. */
13873 gcc_assert (!m
->fs
.drap_valid
);
13875 /* See the comment about red zone and frame
13876 pointer usage in ix86_expand_prologue. */
13877 if (frame_pointer_needed
&& frame
.red_zone_size
)
13878 emit_insn (gen_memory_blockage ());
13880 using_drap
= crtl
->drap_reg
&& crtl
->stack_realign_needed
;
13881 gcc_assert (!using_drap
|| m
->fs
.cfa_reg
== crtl
->drap_reg
);
13883 /* Determine the CFA offset of the end of the red-zone. */
13884 m
->fs
.red_zone_offset
= 0;
13885 if (ix86_using_red_zone () && crtl
->args
.pops_args
< 65536)
13887 /* The red-zone begins below return address and error code in
13888 exception handler. */
13889 m
->fs
.red_zone_offset
= RED_ZONE_SIZE
+ INCOMING_FRAME_SP_OFFSET
;
13891 /* When the register save area is in the aligned portion of
13892 the stack, determine the maximum runtime displacement that
13893 matches up with the aligned frame. */
13894 if (stack_realign_drap
)
13895 m
->fs
.red_zone_offset
-= (crtl
->stack_alignment_needed
/ BITS_PER_UNIT
13899 /* Special care must be taken for the normal return case of a function
13900 using eh_return: the eax and edx registers are marked as saved, but
13901 not restored along this path. Adjust the save location to match. */
13902 if (crtl
->calls_eh_return
&& style
!= 2)
13903 frame
.reg_save_offset
-= 2 * UNITS_PER_WORD
;
13905 /* EH_RETURN requires the use of moves to function properly. */
13906 if (crtl
->calls_eh_return
)
13907 restore_regs_via_mov
= true;
13908 /* SEH requires the use of pops to identify the epilogue. */
13909 else if (TARGET_SEH
)
13910 restore_regs_via_mov
= false;
13911 /* If we're only restoring one register and sp cannot be used then
13912 using a move instruction to restore the register since it's
13913 less work than reloading sp and popping the register. */
13914 else if (!sp_valid_at (frame
.hfp_save_offset
) && frame
.nregs
<= 1)
13915 restore_regs_via_mov
= true;
13916 else if (TARGET_EPILOGUE_USING_MOVE
13917 && cfun
->machine
->use_fast_prologue_epilogue
13918 && (frame
.nregs
> 1
13919 || m
->fs
.sp_offset
!= frame
.reg_save_offset
))
13920 restore_regs_via_mov
= true;
13921 else if (frame_pointer_needed
13923 && m
->fs
.sp_offset
!= frame
.reg_save_offset
)
13924 restore_regs_via_mov
= true;
13925 else if (frame_pointer_needed
13926 && TARGET_USE_LEAVE
13927 && cfun
->machine
->use_fast_prologue_epilogue
13928 && frame
.nregs
== 1)
13929 restore_regs_via_mov
= true;
13931 restore_regs_via_mov
= false;
13933 if (restore_regs_via_mov
|| frame
.nsseregs
)
13935 /* Ensure that the entire register save area is addressable via
13936 the stack pointer, if we will restore SSE regs via sp. */
13938 && m
->fs
.sp_offset
> 0x7fffffff
13939 && sp_valid_at (frame
.stack_realign_offset
+ 1)
13940 && (frame
.nsseregs
+ frame
.nregs
) != 0)
13942 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
13943 GEN_INT (m
->fs
.sp_offset
13944 - frame
.sse_reg_save_offset
),
13946 m
->fs
.cfa_reg
== stack_pointer_rtx
);
13950 /* If there are any SSE registers to restore, then we have to do it
13951 via moves, since there's obviously no pop for SSE regs. */
13952 if (frame
.nsseregs
)
13953 ix86_emit_restore_sse_regs_using_mov (frame
.sse_reg_save_offset
,
13956 if (m
->call_ms2sysv
)
13958 int pop_incoming_args
= crtl
->args
.pops_args
&& crtl
->args
.size
;
13960 /* We cannot use a tail-call for the stub if:
13961 1. We have to pop incoming args,
13962 2. We have additional int regs to restore, or
13963 3. A sibling call will be the tail-call, or
13964 4. We are emitting an eh_return_internal epilogue.
13966 TODO: Item 4 has not yet tested!
13968 If any of the above are true, we will call the stub rather than
13970 restore_stub_is_tail
= !(pop_incoming_args
|| frame
.nregs
|| style
!= 1);
13971 ix86_emit_outlined_ms2sysv_restore (frame
, !restore_stub_is_tail
, style
);
13974 /* If using out-of-line stub that is a tail-call, then...*/
13975 if (m
->call_ms2sysv
&& restore_stub_is_tail
)
13977 /* TODO: parinoid tests. (remove eventually) */
13978 gcc_assert (m
->fs
.sp_valid
);
13979 gcc_assert (!m
->fs
.sp_realigned
);
13980 gcc_assert (!m
->fs
.fp_valid
);
13981 gcc_assert (!m
->fs
.realigned
);
13982 gcc_assert (m
->fs
.sp_offset
== UNITS_PER_WORD
);
13983 gcc_assert (!crtl
->drap_reg
);
13984 gcc_assert (!frame
.nregs
);
13986 else if (restore_regs_via_mov
)
13991 ix86_emit_restore_regs_using_mov (frame
.reg_save_offset
, style
== 2);
13993 /* eh_return epilogues need %ecx added to the stack pointer. */
13996 rtx sa
= EH_RETURN_STACKADJ_RTX
;
13999 /* %ecx can't be used for both DRAP register and eh_return. */
14000 if (crtl
->drap_reg
)
14001 gcc_assert (REGNO (crtl
->drap_reg
) != CX_REG
);
14003 /* regparm nested functions don't work with eh_return. */
14004 gcc_assert (!ix86_static_chain_on_stack
);
14006 if (frame_pointer_needed
)
14008 t
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
14009 t
= plus_constant (Pmode
, t
, m
->fs
.fp_offset
- UNITS_PER_WORD
);
14010 emit_insn (gen_rtx_SET (sa
, t
));
14012 t
= gen_frame_mem (Pmode
, hard_frame_pointer_rtx
);
14013 insn
= emit_move_insn (hard_frame_pointer_rtx
, t
);
14015 /* Note that we use SA as a temporary CFA, as the return
14016 address is at the proper place relative to it. We
14017 pretend this happens at the FP restore insn because
14018 prior to this insn the FP would be stored at the wrong
14019 offset relative to SA, and after this insn we have no
14020 other reasonable register to use for the CFA. We don't
14021 bother resetting the CFA to the SP for the duration of
14022 the return insn, unless the control flow instrumentation
14023 is done. In this case the SP is used later and we have
14024 to reset CFA to SP. */
14025 add_reg_note (insn
, REG_CFA_DEF_CFA
,
14026 plus_constant (Pmode
, sa
, UNITS_PER_WORD
));
14027 ix86_add_queued_cfa_restore_notes (insn
);
14028 add_reg_note (insn
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
14029 RTX_FRAME_RELATED_P (insn
) = 1;
14031 m
->fs
.cfa_reg
= sa
;
14032 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
14033 m
->fs
.fp_valid
= false;
14035 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
14037 flag_cf_protection
);
14041 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
14042 t
= plus_constant (Pmode
, t
, m
->fs
.sp_offset
- UNITS_PER_WORD
);
14043 insn
= emit_insn (gen_rtx_SET (stack_pointer_rtx
, t
));
14044 ix86_add_queued_cfa_restore_notes (insn
);
14046 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
14047 if (m
->fs
.cfa_offset
!= UNITS_PER_WORD
)
14049 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
14050 add_reg_note (insn
, REG_CFA_DEF_CFA
,
14051 plus_constant (Pmode
, stack_pointer_rtx
,
14053 RTX_FRAME_RELATED_P (insn
) = 1;
14056 m
->fs
.sp_offset
= UNITS_PER_WORD
;
14057 m
->fs
.sp_valid
= true;
14058 m
->fs
.sp_realigned
= false;
14063 /* SEH requires that the function end with (1) a stack adjustment
14064 if necessary, (2) a sequence of pops, and (3) a return or
14065 jump instruction. Prevent insns from the function body from
14066 being scheduled into this sequence. */
14069 /* Prevent a catch region from being adjacent to the standard
14070 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
14071 several other flags that would be interesting to test are
14073 if (flag_non_call_exceptions
)
14074 emit_insn (gen_nops (const1_rtx
));
14076 emit_insn (gen_blockage ());
14079 /* First step is to deallocate the stack frame so that we can
14080 pop the registers. If the stack pointer was realigned, it needs
14081 to be restored now. Also do it on SEH target for very large
14082 frame as the emitted instructions aren't allowed by the ABI
14084 if (!m
->fs
.sp_valid
|| m
->fs
.sp_realigned
14086 && (m
->fs
.sp_offset
- frame
.reg_save_offset
14087 >= SEH_MAX_FRAME_SIZE
)))
14089 pro_epilogue_adjust_stack (stack_pointer_rtx
, hard_frame_pointer_rtx
,
14090 GEN_INT (m
->fs
.fp_offset
14091 - frame
.reg_save_offset
),
14094 else if (m
->fs
.sp_offset
!= frame
.reg_save_offset
)
14096 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
14097 GEN_INT (m
->fs
.sp_offset
14098 - frame
.reg_save_offset
),
14100 m
->fs
.cfa_reg
== stack_pointer_rtx
);
14103 ix86_emit_restore_regs_using_pop ();
14106 /* If we used a stack pointer and haven't already got rid of it,
14108 if (m
->fs
.fp_valid
)
14110 /* If the stack pointer is valid and pointing at the frame
14111 pointer store address, then we only need a pop. */
14112 if (sp_valid_at (frame
.hfp_save_offset
)
14113 && m
->fs
.sp_offset
== frame
.hfp_save_offset
)
14114 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
14115 /* Leave results in shorter dependency chains on CPUs that are
14116 able to grok it fast. */
14117 else if (TARGET_USE_LEAVE
14118 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun
))
14119 || !cfun
->machine
->use_fast_prologue_epilogue
)
14120 ix86_emit_leave (NULL
);
14123 pro_epilogue_adjust_stack (stack_pointer_rtx
,
14124 hard_frame_pointer_rtx
,
14125 const0_rtx
, style
, !using_drap
);
14126 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
14132 int param_ptr_offset
= UNITS_PER_WORD
;
14135 gcc_assert (stack_realign_drap
);
14137 if (ix86_static_chain_on_stack
)
14138 param_ptr_offset
+= UNITS_PER_WORD
;
14139 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
14140 param_ptr_offset
+= UNITS_PER_WORD
;
14142 insn
= emit_insn (gen_rtx_SET
14143 (stack_pointer_rtx
,
14144 gen_rtx_PLUS (Pmode
,
14146 GEN_INT (-param_ptr_offset
))));
14147 m
->fs
.cfa_reg
= stack_pointer_rtx
;
14148 m
->fs
.cfa_offset
= param_ptr_offset
;
14149 m
->fs
.sp_offset
= param_ptr_offset
;
14150 m
->fs
.realigned
= false;
14152 add_reg_note (insn
, REG_CFA_DEF_CFA
,
14153 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
14154 GEN_INT (param_ptr_offset
)));
14155 RTX_FRAME_RELATED_P (insn
) = 1;
14157 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
14158 ix86_emit_restore_reg_using_pop (crtl
->drap_reg
);
14161 /* At this point the stack pointer must be valid, and we must have
14162 restored all of the registers. We may not have deallocated the
14163 entire stack frame. We've delayed this until now because it may
14164 be possible to merge the local stack deallocation with the
14165 deallocation forced by ix86_static_chain_on_stack. */
14166 gcc_assert (m
->fs
.sp_valid
);
14167 gcc_assert (!m
->fs
.sp_realigned
);
14168 gcc_assert (!m
->fs
.fp_valid
);
14169 gcc_assert (!m
->fs
.realigned
);
14170 if (m
->fs
.sp_offset
!= UNITS_PER_WORD
)
14172 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
14173 GEN_INT (m
->fs
.sp_offset
- UNITS_PER_WORD
),
14177 ix86_add_queued_cfa_restore_notes (get_last_insn ());
14179 /* Sibcall epilogues don't want a return instruction. */
14182 m
->fs
= frame_state_save
;
14186 if (cfun
->machine
->func_type
!= TYPE_NORMAL
)
14187 emit_jump_insn (gen_interrupt_return ());
14188 else if (crtl
->args
.pops_args
&& crtl
->args
.size
)
14190 rtx popc
= GEN_INT (crtl
->args
.pops_args
);
14192 /* i386 can only pop 64K bytes. If asked to pop more, pop return
14193 address, do explicit add, and jump indirectly to the caller. */
14195 if (crtl
->args
.pops_args
>= 65536)
14197 rtx ecx
= gen_rtx_REG (SImode
, CX_REG
);
14200 /* There is no "pascal" calling convention in any 64bit ABI. */
14201 gcc_assert (!TARGET_64BIT
);
14203 insn
= emit_insn (gen_pop (ecx
));
14204 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
14205 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
14207 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
14208 x
= gen_rtx_SET (stack_pointer_rtx
, x
);
14209 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
14210 add_reg_note (insn
, REG_CFA_REGISTER
, gen_rtx_SET (ecx
, pc_rtx
));
14211 RTX_FRAME_RELATED_P (insn
) = 1;
14213 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
14215 emit_jump_insn (gen_simple_return_indirect_internal (ecx
));
14218 emit_jump_insn (gen_simple_return_pop_internal (popc
));
14220 else if (!m
->call_ms2sysv
|| !restore_stub_is_tail
)
14222 /* In case of return from EH a simple return cannot be used
14223 as a return address will be compared with a shadow stack
14224 return address. Use indirect jump instead. */
14225 if (style
== 2 && flag_cf_protection
)
14227 /* Register used in indirect jump must be in word_mode. But
14228 Pmode may not be the same as word_mode for x32. */
14229 rtx ecx
= gen_rtx_REG (word_mode
, CX_REG
);
14232 insn
= emit_insn (gen_pop (ecx
));
14233 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
14234 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
14236 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
14237 x
= gen_rtx_SET (stack_pointer_rtx
, x
);
14238 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
14239 add_reg_note (insn
, REG_CFA_REGISTER
, gen_rtx_SET (ecx
, pc_rtx
));
14240 RTX_FRAME_RELATED_P (insn
) = 1;
14242 emit_jump_insn (gen_simple_return_indirect_internal (ecx
));
14245 emit_jump_insn (gen_simple_return_internal ());
14248 /* Restore the state back to the state from the prologue,
14249 so that it's correct for the next epilogue. */
14250 m
->fs
= frame_state_save
;
14253 /* Reset from the function's potential modifications. */
14256 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
)
14258 if (pic_offset_table_rtx
14259 && !ix86_use_pseudo_pic_reg ())
14260 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
14264 rtx_insn
*insn
= get_last_insn ();
14265 rtx_insn
*deleted_debug_label
= NULL
;
14267 /* Mach-O doesn't support labels at the end of objects, so if
14268 it looks like we might want one, take special action.
14269 First, collect any sequence of deleted debug labels. */
14272 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
14274 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
14275 notes only, instead set their CODE_LABEL_NUMBER to -1,
14276 otherwise there would be code generation differences
14277 in between -g and -g0. */
14278 if (NOTE_P (insn
) && NOTE_KIND (insn
)
14279 == NOTE_INSN_DELETED_DEBUG_LABEL
)
14280 deleted_debug_label
= insn
;
14281 insn
= PREV_INSN (insn
);
14287 then this needs to be detected, so skip past the barrier. */
14289 if (insn
&& BARRIER_P (insn
))
14290 insn
= PREV_INSN (insn
);
14292 /* Up to now we've only seen notes or barriers. */
14297 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
))
14298 /* Trailing label. */
14299 fputs ("\tnop\n", file
);
14300 else if (cfun
&& ! cfun
->is_thunk
)
14302 /* See if we have a completely empty function body, skipping
14303 the special case of the picbase thunk emitted as asm. */
14304 while (insn
&& ! INSN_P (insn
))
14305 insn
= PREV_INSN (insn
);
14306 /* If we don't find any insns, we've got an empty function body;
14307 I.e. completely empty - without a return or branch. This is
14308 taken as the case where a function body has been removed
14309 because it contains an inline __builtin_unreachable(). GCC
14310 declares that reaching __builtin_unreachable() means UB so
14311 we're not obliged to do anything special; however, we want
14312 non-zero-sized function bodies. To meet this, and help the
14313 user out, let's trap the case. */
14315 fputs ("\tud2\n", file
);
14318 else if (deleted_debug_label
)
14319 for (insn
= deleted_debug_label
; insn
; insn
= NEXT_INSN (insn
))
14320 if (NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
14321 CODE_LABEL_NUMBER (insn
) = -1;
14325 /* Return a scratch register to use in the split stack prologue. The
14326 split stack prologue is used for -fsplit-stack. It is the first
14327 instructions in the function, even before the regular prologue.
14328 The scratch register can be any caller-saved register which is not
14329 used for parameters or for the static chain. */
14331 static unsigned int
14332 split_stack_prologue_scratch_regno (void)
14338 bool is_fastcall
, is_thiscall
;
14341 is_fastcall
= (lookup_attribute ("fastcall",
14342 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
14344 is_thiscall
= (lookup_attribute ("thiscall",
14345 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
14347 regparm
= ix86_function_regparm (TREE_TYPE (cfun
->decl
), cfun
->decl
);
14351 if (DECL_STATIC_CHAIN (cfun
->decl
))
14353 sorry ("-fsplit-stack does not support fastcall with "
14354 "nested function");
14355 return INVALID_REGNUM
;
14359 else if (is_thiscall
)
14361 if (!DECL_STATIC_CHAIN (cfun
->decl
))
14365 else if (regparm
< 3)
14367 if (!DECL_STATIC_CHAIN (cfun
->decl
))
14373 sorry ("-fsplit-stack does not support 2 register "
14374 "parameters for a nested function");
14375 return INVALID_REGNUM
;
14382 /* FIXME: We could make this work by pushing a register
14383 around the addition and comparison. */
14384 sorry ("-fsplit-stack does not support 3 register parameters");
14385 return INVALID_REGNUM
;
14390 /* A SYMBOL_REF for the function which allocates new stackspace for
14393 static GTY(()) rtx split_stack_fn
;
14395 /* A SYMBOL_REF for the more stack function when using the large
14398 static GTY(()) rtx split_stack_fn_large
;
14400 /* Return location of the stack guard value in the TLS block. */
14403 ix86_split_stack_guard (void)
14406 addr_space_t as
= DEFAULT_TLS_SEG_REG
;
14409 gcc_assert (flag_split_stack
);
14411 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14412 offset
= TARGET_THREAD_SPLIT_STACK_OFFSET
;
14414 gcc_unreachable ();
14417 r
= GEN_INT (offset
);
14418 r
= gen_const_mem (Pmode
, r
);
14419 set_mem_addr_space (r
, as
);
14424 /* Handle -fsplit-stack. These are the first instructions in the
14425 function, even before the regular prologue. */
14428 ix86_expand_split_stack_prologue (void)
14430 HOST_WIDE_INT allocate
;
14431 unsigned HOST_WIDE_INT args_size
;
14432 rtx_code_label
*label
;
14433 rtx limit
, current
, allocate_rtx
, call_insn
, call_fusage
;
14434 rtx scratch_reg
= NULL_RTX
;
14435 rtx_code_label
*varargs_label
= NULL
;
14438 gcc_assert (flag_split_stack
&& reload_completed
);
14440 ix86_finalize_stack_frame_flags ();
14441 struct ix86_frame
&frame
= cfun
->machine
->frame
;
14442 allocate
= frame
.stack_pointer_offset
- INCOMING_FRAME_SP_OFFSET
;
14444 /* This is the label we will branch to if we have enough stack
14445 space. We expect the basic block reordering pass to reverse this
14446 branch if optimizing, so that we branch in the unlikely case. */
14447 label
= gen_label_rtx ();
14449 /* We need to compare the stack pointer minus the frame size with
14450 the stack boundary in the TCB. The stack boundary always gives
14451 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
14452 can compare directly. Otherwise we need to do an addition. */
14454 limit
= ix86_split_stack_guard ();
14456 if (allocate
< SPLIT_STACK_AVAILABLE
)
14457 current
= stack_pointer_rtx
;
14460 unsigned int scratch_regno
;
14463 /* We need a scratch register to hold the stack pointer minus
14464 the required frame size. Since this is the very start of the
14465 function, the scratch register can be any caller-saved
14466 register which is not used for parameters. */
14467 offset
= GEN_INT (- allocate
);
14468 scratch_regno
= split_stack_prologue_scratch_regno ();
14469 if (scratch_regno
== INVALID_REGNUM
)
14471 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
14472 if (!TARGET_64BIT
|| x86_64_immediate_operand (offset
, Pmode
))
14474 /* We don't use ix86_gen_add3 in this case because it will
14475 want to split to lea, but when not optimizing the insn
14476 will not be split after this point. */
14477 emit_insn (gen_rtx_SET (scratch_reg
,
14478 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
14483 emit_move_insn (scratch_reg
, offset
);
14484 emit_insn (ix86_gen_add3 (scratch_reg
, scratch_reg
,
14485 stack_pointer_rtx
));
14487 current
= scratch_reg
;
14490 ix86_expand_branch (GEU
, current
, limit
, label
);
14491 rtx_insn
*jump_insn
= get_last_insn ();
14492 JUMP_LABEL (jump_insn
) = label
;
14494 /* Mark the jump as very likely to be taken. */
14495 add_reg_br_prob_note (jump_insn
, profile_probability::very_likely ());
14497 if (split_stack_fn
== NULL_RTX
)
14499 split_stack_fn
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
14500 SYMBOL_REF_FLAGS (split_stack_fn
) |= SYMBOL_FLAG_LOCAL
;
14502 fn
= split_stack_fn
;
14504 /* Get more stack space. We pass in the desired stack space and the
14505 size of the arguments to copy to the new stack. In 32-bit mode
14506 we push the parameters; __morestack will return on a new stack
14507 anyhow. In 64-bit mode we pass the parameters in r10 and
14509 allocate_rtx
= GEN_INT (allocate
);
14510 args_size
= crtl
->args
.size
>= 0 ? crtl
->args
.size
: 0;
14511 call_fusage
= NULL_RTX
;
14512 rtx pop
= NULL_RTX
;
14517 reg10
= gen_rtx_REG (Pmode
, R10_REG
);
14518 reg11
= gen_rtx_REG (Pmode
, R11_REG
);
14520 /* If this function uses a static chain, it will be in %r10.
14521 Preserve it across the call to __morestack. */
14522 if (DECL_STATIC_CHAIN (cfun
->decl
))
14526 rax
= gen_rtx_REG (word_mode
, AX_REG
);
14527 emit_move_insn (rax
, gen_rtx_REG (word_mode
, R10_REG
));
14528 use_reg (&call_fusage
, rax
);
14531 if ((ix86_cmodel
== CM_LARGE
|| ix86_cmodel
== CM_LARGE_PIC
)
14534 HOST_WIDE_INT argval
;
14536 gcc_assert (Pmode
== DImode
);
14537 /* When using the large model we need to load the address
14538 into a register, and we've run out of registers. So we
14539 switch to a different calling convention, and we call a
14540 different function: __morestack_large. We pass the
14541 argument size in the upper 32 bits of r10 and pass the
14542 frame size in the lower 32 bits. */
14543 gcc_assert ((allocate
& HOST_WIDE_INT_C (0xffffffff)) == allocate
);
14544 gcc_assert ((args_size
& 0xffffffff) == args_size
);
14546 if (split_stack_fn_large
== NULL_RTX
)
14548 split_stack_fn_large
=
14549 gen_rtx_SYMBOL_REF (Pmode
, "__morestack_large_model");
14550 SYMBOL_REF_FLAGS (split_stack_fn_large
) |= SYMBOL_FLAG_LOCAL
;
14552 if (ix86_cmodel
== CM_LARGE_PIC
)
14554 rtx_code_label
*label
;
14557 label
= gen_label_rtx ();
14558 emit_label (label
);
14559 LABEL_PRESERVE_P (label
) = 1;
14560 emit_insn (gen_set_rip_rex64 (reg10
, label
));
14561 emit_insn (gen_set_got_offset_rex64 (reg11
, label
));
14562 emit_insn (ix86_gen_add3 (reg10
, reg10
, reg11
));
14563 x
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, split_stack_fn_large
),
14565 x
= gen_rtx_CONST (Pmode
, x
);
14566 emit_move_insn (reg11
, x
);
14567 x
= gen_rtx_PLUS (Pmode
, reg10
, reg11
);
14568 x
= gen_const_mem (Pmode
, x
);
14569 emit_move_insn (reg11
, x
);
14572 emit_move_insn (reg11
, split_stack_fn_large
);
14576 argval
= ((args_size
<< 16) << 16) + allocate
;
14577 emit_move_insn (reg10
, GEN_INT (argval
));
14581 emit_move_insn (reg10
, allocate_rtx
);
14582 emit_move_insn (reg11
, GEN_INT (args_size
));
14583 use_reg (&call_fusage
, reg11
);
14586 use_reg (&call_fusage
, reg10
);
14590 rtx_insn
*insn
= emit_insn (gen_push (GEN_INT (args_size
)));
14591 add_reg_note (insn
, REG_ARGS_SIZE
, GEN_INT (UNITS_PER_WORD
));
14592 insn
= emit_insn (gen_push (allocate_rtx
));
14593 add_reg_note (insn
, REG_ARGS_SIZE
, GEN_INT (2 * UNITS_PER_WORD
));
14594 pop
= GEN_INT (2 * UNITS_PER_WORD
);
14596 call_insn
= ix86_expand_call (NULL_RTX
, gen_rtx_MEM (QImode
, fn
),
14597 GEN_INT (UNITS_PER_WORD
), constm1_rtx
,
14599 add_function_usage_to (call_insn
, call_fusage
);
14601 add_reg_note (call_insn
, REG_ARGS_SIZE
, GEN_INT (0));
14602 /* Indicate that this function can't jump to non-local gotos. */
14603 make_reg_eh_region_note_nothrow_nononlocal (as_a
<rtx_insn
*> (call_insn
));
14605 /* In order to make call/return prediction work right, we now need
14606 to execute a return instruction. See
14607 libgcc/config/i386/morestack.S for the details on how this works.
14609 For flow purposes gcc must not see this as a return
14610 instruction--we need control flow to continue at the subsequent
14611 label. Therefore, we use an unspec. */
14612 gcc_assert (crtl
->args
.pops_args
< 65536);
14613 emit_insn (gen_split_stack_return (GEN_INT (crtl
->args
.pops_args
)));
14615 /* If we are in 64-bit mode and this function uses a static chain,
14616 we saved %r10 in %rax before calling _morestack. */
14617 if (TARGET_64BIT
&& DECL_STATIC_CHAIN (cfun
->decl
))
14618 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
14619 gen_rtx_REG (word_mode
, AX_REG
));
14621 /* If this function calls va_start, we need to store a pointer to
14622 the arguments on the old stack, because they may not have been
14623 all copied to the new stack. At this point the old stack can be
14624 found at the frame pointer value used by __morestack, because
14625 __morestack has set that up before calling back to us. Here we
14626 store that pointer in a scratch register, and in
14627 ix86_expand_prologue we store the scratch register in a stack
14629 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
14631 unsigned int scratch_regno
;
14635 scratch_regno
= split_stack_prologue_scratch_regno ();
14636 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
14637 frame_reg
= gen_rtx_REG (Pmode
, BP_REG
);
14641 return address within this function
14642 return address of caller of this function
14644 So we add three words to get to the stack arguments.
14648 return address within this function
14649 first argument to __morestack
14650 second argument to __morestack
14651 return address of caller of this function
14653 So we add five words to get to the stack arguments.
14655 words
= TARGET_64BIT
? 3 : 5;
14656 emit_insn (gen_rtx_SET (scratch_reg
,
14657 gen_rtx_PLUS (Pmode
, frame_reg
,
14658 GEN_INT (words
* UNITS_PER_WORD
))));
14660 varargs_label
= gen_label_rtx ();
14661 emit_jump_insn (gen_jump (varargs_label
));
14662 JUMP_LABEL (get_last_insn ()) = varargs_label
;
14667 emit_label (label
);
14668 LABEL_NUSES (label
) = 1;
14670 /* If this function calls va_start, we now have to set the scratch
14671 register for the case where we do not call __morestack. In this
14672 case we need to set it based on the stack pointer. */
14673 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
14675 emit_insn (gen_rtx_SET (scratch_reg
,
14676 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
14677 GEN_INT (UNITS_PER_WORD
))));
14679 emit_label (varargs_label
);
14680 LABEL_NUSES (varargs_label
) = 1;
14684 /* We may have to tell the dataflow pass that the split stack prologue
14685 is initializing a scratch register. */
14688 ix86_live_on_entry (bitmap regs
)
14690 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
14692 gcc_assert (flag_split_stack
);
14693 bitmap_set_bit (regs
, split_stack_prologue_scratch_regno ());
14697 /* Extract the parts of an RTL expression that is a valid memory address
14698 for an instruction. Return 0 if the structure of the address is
14699 grossly off. Return -1 if the address contains ASHIFT, so it is not
14700 strictly valid, but still used for computing length of lea instruction. */
14703 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
14705 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
14706 rtx base_reg
, index_reg
;
14707 HOST_WIDE_INT scale
= 1;
14708 rtx scale_rtx
= NULL_RTX
;
14711 addr_space_t seg
= ADDR_SPACE_GENERIC
;
14713 /* Allow zero-extended SImode addresses,
14714 they will be emitted with addr32 prefix. */
14715 if (TARGET_64BIT
&& GET_MODE (addr
) == DImode
)
14717 if (GET_CODE (addr
) == ZERO_EXTEND
14718 && GET_MODE (XEXP (addr
, 0)) == SImode
)
14720 addr
= XEXP (addr
, 0);
14721 if (CONST_INT_P (addr
))
14724 else if (GET_CODE (addr
) == AND
14725 && const_32bit_mask (XEXP (addr
, 1), DImode
))
14727 addr
= lowpart_subreg (SImode
, XEXP (addr
, 0), DImode
);
14728 if (addr
== NULL_RTX
)
14731 if (CONST_INT_P (addr
))
14736 /* Allow SImode subregs of DImode addresses,
14737 they will be emitted with addr32 prefix. */
14738 if (TARGET_64BIT
&& GET_MODE (addr
) == SImode
)
14740 if (SUBREG_P (addr
)
14741 && GET_MODE (SUBREG_REG (addr
)) == DImode
)
14743 addr
= SUBREG_REG (addr
);
14744 if (CONST_INT_P (addr
))
14751 else if (SUBREG_P (addr
))
14753 if (REG_P (SUBREG_REG (addr
)))
14758 else if (GET_CODE (addr
) == PLUS
)
14760 rtx addends
[4], op
;
14768 addends
[n
++] = XEXP (op
, 1);
14771 while (GET_CODE (op
) == PLUS
);
14776 for (i
= n
; i
>= 0; --i
)
14779 switch (GET_CODE (op
))
14784 index
= XEXP (op
, 0);
14785 scale_rtx
= XEXP (op
, 1);
14791 index
= XEXP (op
, 0);
14792 tmp
= XEXP (op
, 1);
14793 if (!CONST_INT_P (tmp
))
14795 scale
= INTVAL (tmp
);
14796 if ((unsigned HOST_WIDE_INT
) scale
> 3)
14798 scale
= 1 << scale
;
14803 if (GET_CODE (op
) != UNSPEC
)
14808 if (XINT (op
, 1) == UNSPEC_TP
14809 && TARGET_TLS_DIRECT_SEG_REFS
14810 && seg
== ADDR_SPACE_GENERIC
)
14811 seg
= DEFAULT_TLS_SEG_REG
;
14817 if (!REG_P (SUBREG_REG (op
)))
14844 else if (GET_CODE (addr
) == MULT
)
14846 index
= XEXP (addr
, 0); /* index*scale */
14847 scale_rtx
= XEXP (addr
, 1);
14849 else if (GET_CODE (addr
) == ASHIFT
)
14851 /* We're called for lea too, which implements ashift on occasion. */
14852 index
= XEXP (addr
, 0);
14853 tmp
= XEXP (addr
, 1);
14854 if (!CONST_INT_P (tmp
))
14856 scale
= INTVAL (tmp
);
14857 if ((unsigned HOST_WIDE_INT
) scale
> 3)
14859 scale
= 1 << scale
;
14863 disp
= addr
; /* displacement */
14869 else if (SUBREG_P (index
)
14870 && REG_P (SUBREG_REG (index
)))
14876 /* Extract the integral value of scale. */
14879 if (!CONST_INT_P (scale_rtx
))
14881 scale
= INTVAL (scale_rtx
);
14884 base_reg
= base
&& SUBREG_P (base
) ? SUBREG_REG (base
) : base
;
14885 index_reg
= index
&& SUBREG_P (index
) ? SUBREG_REG (index
) : index
;
14887 /* Avoid useless 0 displacement. */
14888 if (disp
== const0_rtx
&& (base
|| index
))
14891 /* Allow arg pointer and stack pointer as index if there is not scaling. */
14892 if (base_reg
&& index_reg
&& scale
== 1
14893 && (REGNO (index_reg
) == ARG_POINTER_REGNUM
14894 || REGNO (index_reg
) == FRAME_POINTER_REGNUM
14895 || REGNO (index_reg
) == SP_REG
))
14897 std::swap (base
, index
);
14898 std::swap (base_reg
, index_reg
);
14901 /* Special case: %ebp cannot be encoded as a base without a displacement.
14903 if (!disp
&& base_reg
14904 && (REGNO (base_reg
) == ARG_POINTER_REGNUM
14905 || REGNO (base_reg
) == FRAME_POINTER_REGNUM
14906 || REGNO (base_reg
) == BP_REG
14907 || REGNO (base_reg
) == R13_REG
))
14910 /* Special case: on K6, [%esi] makes the instruction vector decoded.
14911 Avoid this by transforming to [%esi+0].
14912 Reload calls address legitimization without cfun defined, so we need
14913 to test cfun for being non-NULL. */
14914 if (TARGET_K6
&& cfun
&& optimize_function_for_speed_p (cfun
)
14915 && base_reg
&& !index_reg
&& !disp
14916 && REGNO (base_reg
) == SI_REG
)
14919 /* Special case: encode reg+reg instead of reg*2. */
14920 if (!base
&& index
&& scale
== 2)
14921 base
= index
, base_reg
= index_reg
, scale
= 1;
14923 /* Special case: scaling cannot be encoded without base or displacement. */
14924 if (!base
&& !disp
&& index
&& scale
!= 1)
14928 out
->index
= index
;
14930 out
->scale
= scale
;
14936 /* Return cost of the memory address x.
14937 For i386, it is better to use a complex address than let gcc copy
14938 the address into a reg and make a new pseudo. But not if the address
14939 requires to two regs - that would mean more pseudos with longer
14942 ix86_address_cost (rtx x
, machine_mode
, addr_space_t
, bool)
14944 struct ix86_address parts
;
14946 int ok
= ix86_decompose_address (x
, &parts
);
14950 if (parts
.base
&& SUBREG_P (parts
.base
))
14951 parts
.base
= SUBREG_REG (parts
.base
);
14952 if (parts
.index
&& SUBREG_P (parts
.index
))
14953 parts
.index
= SUBREG_REG (parts
.index
);
14955 /* Attempt to minimize number of registers in the address by increasing
14956 address cost for each used register. We don't increase address cost
14957 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
14958 is not invariant itself it most likely means that base or index is not
14959 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
14960 which is not profitable for x86. */
14962 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
14963 && (current_pass
->type
== GIMPLE_PASS
14964 || !pic_offset_table_rtx
14965 || !REG_P (parts
.base
)
14966 || REGNO (pic_offset_table_rtx
) != REGNO (parts
.base
)))
14970 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
14971 && (current_pass
->type
== GIMPLE_PASS
14972 || !pic_offset_table_rtx
14973 || !REG_P (parts
.index
)
14974 || REGNO (pic_offset_table_rtx
) != REGNO (parts
.index
)))
14977 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
14978 since it's predecode logic can't detect the length of instructions
14979 and it degenerates to vector decoded. Increase cost of such
14980 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
14981 to split such addresses or even refuse such addresses at all.
14983 Following addressing modes are affected:
14988 The first and last case may be avoidable by explicitly coding the zero in
14989 memory address, but I don't have AMD-K6 machine handy to check this
14993 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
14994 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
14995 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
15001 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
15002 this is used for to form addresses to local data when -fPIC is in
15006 darwin_local_data_pic (rtx disp
)
15008 return (GET_CODE (disp
) == UNSPEC
15009 && XINT (disp
, 1) == UNSPEC_MACHOPIC_OFFSET
);
15012 /* True if operand X should be loaded from GOT. */
15015 ix86_force_load_from_GOT_p (rtx x
)
15017 return ((TARGET_64BIT
|| HAVE_AS_IX86_GOT32X
)
15018 && !TARGET_PECOFF
&& !TARGET_MACHO
15019 && !flag_plt
&& !flag_pic
15020 && ix86_cmodel
!= CM_LARGE
15021 && GET_CODE (x
) == SYMBOL_REF
15022 && SYMBOL_REF_FUNCTION_P (x
)
15023 && !SYMBOL_REF_LOCAL_P (x
));
15026 /* Determine if a given RTX is a valid constant. We already know this
15027 satisfies CONSTANT_P. */
15030 ix86_legitimate_constant_p (machine_mode mode
, rtx x
)
15032 /* Pointer bounds constants are not valid. */
15033 if (POINTER_BOUNDS_MODE_P (GET_MODE (x
)))
15036 switch (GET_CODE (x
))
15041 if (GET_CODE (x
) == PLUS
)
15043 if (!CONST_INT_P (XEXP (x
, 1)))
15048 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
15051 /* Only some unspecs are valid as "constants". */
15052 if (GET_CODE (x
) == UNSPEC
)
15053 switch (XINT (x
, 1))
15056 case UNSPEC_GOTOFF
:
15057 case UNSPEC_PLTOFF
:
15058 return TARGET_64BIT
;
15060 case UNSPEC_NTPOFF
:
15061 x
= XVECEXP (x
, 0, 0);
15062 return (GET_CODE (x
) == SYMBOL_REF
15063 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
15064 case UNSPEC_DTPOFF
:
15065 x
= XVECEXP (x
, 0, 0);
15066 return (GET_CODE (x
) == SYMBOL_REF
15067 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
15072 /* We must have drilled down to a symbol. */
15073 if (GET_CODE (x
) == LABEL_REF
)
15075 if (GET_CODE (x
) != SYMBOL_REF
)
15080 /* TLS symbols are never valid. */
15081 if (SYMBOL_REF_TLS_MODEL (x
))
15084 /* DLLIMPORT symbols are never valid. */
15085 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15086 && SYMBOL_REF_DLLIMPORT_P (x
))
15090 /* mdynamic-no-pic */
15091 if (MACHO_DYNAMIC_NO_PIC_P
)
15092 return machopic_symbol_defined_p (x
);
15095 /* External function address should be loaded
15096 via the GOT slot to avoid PLT. */
15097 if (ix86_force_load_from_GOT_p (x
))
15102 CASE_CONST_SCALAR_INT
:
15111 if (!standard_sse_constant_p (x
, mode
))
15119 if (!standard_sse_constant_p (x
, mode
))
15126 /* Otherwise we handle everything else in the move patterns. */
15130 /* Determine if it's legal to put X into the constant pool. This
15131 is not possible for the address of thread-local symbols, which
15132 is checked above. */
15135 ix86_cannot_force_const_mem (machine_mode mode
, rtx x
)
15137 /* We can put any immediate constant in memory. */
15138 switch (GET_CODE (x
))
15147 return !ix86_legitimate_constant_p (mode
, x
);
15150 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
15154 is_imported_p (rtx x
)
15156 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
15157 || GET_CODE (x
) != SYMBOL_REF
)
15160 return SYMBOL_REF_DLLIMPORT_P (x
) || SYMBOL_REF_STUBVAR_P (x
);
15164 /* Nonzero if the constant value X is a legitimate general operand
15165 when generating PIC code. It is given that flag_pic is on and
15166 that X satisfies CONSTANT_P. */
15169 legitimate_pic_operand_p (rtx x
)
15173 switch (GET_CODE (x
))
15176 inner
= XEXP (x
, 0);
15177 if (GET_CODE (inner
) == PLUS
15178 && CONST_INT_P (XEXP (inner
, 1)))
15179 inner
= XEXP (inner
, 0);
15181 /* Only some unspecs are valid as "constants". */
15182 if (GET_CODE (inner
) == UNSPEC
)
15183 switch (XINT (inner
, 1))
15186 case UNSPEC_GOTOFF
:
15187 case UNSPEC_PLTOFF
:
15188 return TARGET_64BIT
;
15190 x
= XVECEXP (inner
, 0, 0);
15191 return (GET_CODE (x
) == SYMBOL_REF
15192 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
15193 case UNSPEC_MACHOPIC_OFFSET
:
15194 return legitimate_pic_address_disp_p (x
);
15202 return legitimate_pic_address_disp_p (x
);
15209 /* Determine if a given CONST RTX is a valid memory displacement
15213 legitimate_pic_address_disp_p (rtx disp
)
15217 /* In 64bit mode we can allow direct addresses of symbols and labels
15218 when they are not dynamic symbols. */
15221 rtx op0
= disp
, op1
;
15223 switch (GET_CODE (disp
))
15229 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
15231 op0
= XEXP (XEXP (disp
, 0), 0);
15232 op1
= XEXP (XEXP (disp
, 0), 1);
15233 if (!CONST_INT_P (op1
))
15235 if (GET_CODE (op0
) == UNSPEC
15236 && (XINT (op0
, 1) == UNSPEC_DTPOFF
15237 || XINT (op0
, 1) == UNSPEC_NTPOFF
)
15238 && trunc_int_for_mode (INTVAL (op1
), SImode
) == INTVAL (op1
))
15240 if (INTVAL (op1
) >= 16*1024*1024
15241 || INTVAL (op1
) < -16*1024*1024)
15243 if (GET_CODE (op0
) == LABEL_REF
)
15245 if (GET_CODE (op0
) == CONST
15246 && GET_CODE (XEXP (op0
, 0)) == UNSPEC
15247 && XINT (XEXP (op0
, 0), 1) == UNSPEC_PCREL
)
15249 if (GET_CODE (op0
) == UNSPEC
15250 && XINT (op0
, 1) == UNSPEC_PCREL
)
15252 if (GET_CODE (op0
) != SYMBOL_REF
)
15257 /* TLS references should always be enclosed in UNSPEC.
15258 The dllimported symbol needs always to be resolved. */
15259 if (SYMBOL_REF_TLS_MODEL (op0
)
15260 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& SYMBOL_REF_DLLIMPORT_P (op0
)))
15265 if (is_imported_p (op0
))
15268 if (SYMBOL_REF_FAR_ADDR_P (op0
)
15269 || !SYMBOL_REF_LOCAL_P (op0
))
15272 /* Function-symbols need to be resolved only for
15274 For the small-model we don't need to resolve anything
15276 if ((ix86_cmodel
!= CM_LARGE_PIC
15277 && SYMBOL_REF_FUNCTION_P (op0
))
15278 || ix86_cmodel
== CM_SMALL_PIC
)
15280 /* Non-external symbols don't need to be resolved for
15281 large, and medium-model. */
15282 if ((ix86_cmodel
== CM_LARGE_PIC
15283 || ix86_cmodel
== CM_MEDIUM_PIC
)
15284 && !SYMBOL_REF_EXTERNAL_P (op0
))
15287 else if (!SYMBOL_REF_FAR_ADDR_P (op0
)
15288 && (SYMBOL_REF_LOCAL_P (op0
)
15289 || (HAVE_LD_PIE_COPYRELOC
15291 && !SYMBOL_REF_WEAK (op0
)
15292 && !SYMBOL_REF_FUNCTION_P (op0
)))
15293 && ix86_cmodel
!= CM_LARGE_PIC
)
15301 if (GET_CODE (disp
) != CONST
)
15303 disp
= XEXP (disp
, 0);
15307 /* We are unsafe to allow PLUS expressions. This limit allowed distance
15308 of GOT tables. We should not need these anyway. */
15309 if (GET_CODE (disp
) != UNSPEC
15310 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
15311 && XINT (disp
, 1) != UNSPEC_GOTOFF
15312 && XINT (disp
, 1) != UNSPEC_PCREL
15313 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
15316 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
15317 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
15323 if (GET_CODE (disp
) == PLUS
)
15325 if (!CONST_INT_P (XEXP (disp
, 1)))
15327 disp
= XEXP (disp
, 0);
15331 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
15334 if (GET_CODE (disp
) != UNSPEC
)
15337 switch (XINT (disp
, 1))
15342 /* We need to check for both symbols and labels because VxWorks loads
15343 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
15345 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
15346 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
15347 case UNSPEC_GOTOFF
:
15348 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
15349 While ABI specify also 32bit relocation but we don't produce it in
15350 small PIC model at all. */
15351 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
15352 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
15354 return !TARGET_PECOFF
&& gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
15356 case UNSPEC_GOTTPOFF
:
15357 case UNSPEC_GOTNTPOFF
:
15358 case UNSPEC_INDNTPOFF
:
15361 disp
= XVECEXP (disp
, 0, 0);
15362 return (GET_CODE (disp
) == SYMBOL_REF
15363 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
15364 case UNSPEC_NTPOFF
:
15365 disp
= XVECEXP (disp
, 0, 0);
15366 return (GET_CODE (disp
) == SYMBOL_REF
15367 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
15368 case UNSPEC_DTPOFF
:
15369 disp
= XVECEXP (disp
, 0, 0);
15370 return (GET_CODE (disp
) == SYMBOL_REF
15371 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
15377 /* Determine if op is suitable RTX for an address register.
15378 Return naked register if a register or a register subreg is
15379 found, otherwise return NULL_RTX. */
15382 ix86_validate_address_register (rtx op
)
15384 machine_mode mode
= GET_MODE (op
);
15386 /* Only SImode or DImode registers can form the address. */
15387 if (mode
!= SImode
&& mode
!= DImode
)
15392 else if (SUBREG_P (op
))
15394 rtx reg
= SUBREG_REG (op
);
15399 mode
= GET_MODE (reg
);
15401 /* Don't allow SUBREGs that span more than a word. It can
15402 lead to spill failures when the register is one word out
15403 of a two word structure. */
15404 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
15407 /* Allow only SUBREGs of non-eliminable hard registers. */
15408 if (register_no_elim_operand (reg
, mode
))
15412 /* Op is not a register. */
15416 /* Recognizes RTL expressions that are valid memory addresses for an
15417 instruction. The MODE argument is the machine mode for the MEM
15418 expression that wants to use this address.
15420 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
15421 convert common non-canonical forms to canonical form so that they will
15425 ix86_legitimate_address_p (machine_mode
, rtx addr
, bool strict
)
15427 struct ix86_address parts
;
15428 rtx base
, index
, disp
;
15429 HOST_WIDE_INT scale
;
15432 if (ix86_decompose_address (addr
, &parts
) <= 0)
15433 /* Decomposition failed. */
15437 index
= parts
.index
;
15439 scale
= parts
.scale
;
15442 /* Validate base register. */
15445 rtx reg
= ix86_validate_address_register (base
);
15447 if (reg
== NULL_RTX
)
15450 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
15451 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
15452 /* Base is not valid. */
15456 /* Validate index register. */
15459 rtx reg
= ix86_validate_address_register (index
);
15461 if (reg
== NULL_RTX
)
15464 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
15465 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
15466 /* Index is not valid. */
15470 /* Index and base should have the same mode. */
15472 && GET_MODE (base
) != GET_MODE (index
))
15475 /* Address override works only on the (%reg) part of %fs:(%reg). */
15476 if (seg
!= ADDR_SPACE_GENERIC
15477 && ((base
&& GET_MODE (base
) != word_mode
)
15478 || (index
&& GET_MODE (index
) != word_mode
)))
15481 /* Validate scale factor. */
15485 /* Scale without index. */
15488 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
15489 /* Scale is not a valid multiplier. */
15493 /* Validate displacement. */
15496 if (GET_CODE (disp
) == CONST
15497 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
15498 && XINT (XEXP (disp
, 0), 1) != UNSPEC_MACHOPIC_OFFSET
)
15499 switch (XINT (XEXP (disp
, 0), 1))
15501 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit
15502 when used. While ABI specify also 32bit relocations, we
15503 don't produce them at all and use IP relative instead.
15504 Allow GOT in 32bit mode for both PIC and non-PIC if symbol
15505 should be loaded via GOT. */
15508 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp
, 0), 0, 0)))
15509 goto is_legitimate_pic
;
15511 case UNSPEC_GOTOFF
:
15512 gcc_assert (flag_pic
);
15514 goto is_legitimate_pic
;
15516 /* 64bit address unspec. */
15519 case UNSPEC_GOTPCREL
:
15520 if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp
, 0), 0, 0)))
15521 goto is_legitimate_pic
;
15524 gcc_assert (flag_pic
);
15525 goto is_legitimate_pic
;
15527 case UNSPEC_GOTTPOFF
:
15528 case UNSPEC_GOTNTPOFF
:
15529 case UNSPEC_INDNTPOFF
:
15530 case UNSPEC_NTPOFF
:
15531 case UNSPEC_DTPOFF
:
15535 /* Invalid address unspec. */
15539 else if (SYMBOLIC_CONST (disp
)
15543 && MACHOPIC_INDIRECT
15544 && !machopic_operand_p (disp
)
15550 if (TARGET_64BIT
&& (index
|| base
))
15552 /* foo@dtpoff(%rX) is ok. */
15553 if (GET_CODE (disp
) != CONST
15554 || GET_CODE (XEXP (disp
, 0)) != PLUS
15555 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
15556 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
15557 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
15558 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
15559 /* Non-constant pic memory reference. */
15562 else if ((!TARGET_MACHO
|| flag_pic
)
15563 && ! legitimate_pic_address_disp_p (disp
))
15564 /* Displacement is an invalid pic construct. */
15567 else if (MACHO_DYNAMIC_NO_PIC_P
15568 && !ix86_legitimate_constant_p (Pmode
, disp
))
15569 /* displacment must be referenced via non_lazy_pointer */
15573 /* This code used to verify that a symbolic pic displacement
15574 includes the pic_offset_table_rtx register.
15576 While this is good idea, unfortunately these constructs may
15577 be created by "adds using lea" optimization for incorrect
15586 This code is nonsensical, but results in addressing
15587 GOT table with pic_offset_table_rtx base. We can't
15588 just refuse it easily, since it gets matched by
15589 "addsi3" pattern, that later gets split to lea in the
15590 case output register differs from input. While this
15591 can be handled by separate addsi pattern for this case
15592 that never results in lea, this seems to be easier and
15593 correct fix for crash to disable this test. */
15595 else if (GET_CODE (disp
) != LABEL_REF
15596 && !CONST_INT_P (disp
)
15597 && (GET_CODE (disp
) != CONST
15598 || !ix86_legitimate_constant_p (Pmode
, disp
))
15599 && (GET_CODE (disp
) != SYMBOL_REF
15600 || !ix86_legitimate_constant_p (Pmode
, disp
)))
15601 /* Displacement is not constant. */
15603 else if (TARGET_64BIT
15604 && !x86_64_immediate_operand (disp
, VOIDmode
))
15605 /* Displacement is out of range. */
15607 /* In x32 mode, constant addresses are sign extended to 64bit, so
15608 we have to prevent addresses from 0x80000000 to 0xffffffff. */
15609 else if (TARGET_X32
&& !(index
|| base
)
15610 && CONST_INT_P (disp
)
15611 && val_signbit_known_set_p (SImode
, INTVAL (disp
)))
15615 /* Everything looks valid. */
15619 /* Determine if a given RTX is a valid constant address. */
15622 constant_address_p (rtx x
)
15624 return CONSTANT_P (x
) && ix86_legitimate_address_p (Pmode
, x
, 1);
15627 /* Return a unique alias set for the GOT. */
15629 static alias_set_type
15630 ix86_GOT_alias_set (void)
15632 static alias_set_type set
= -1;
15634 set
= new_alias_set ();
15638 /* Return a legitimate reference for ORIG (an address) using the
15639 register REG. If REG is 0, a new pseudo is generated.
15641 There are two types of references that must be handled:
15643 1. Global data references must load the address from the GOT, via
15644 the PIC reg. An insn is emitted to do this load, and the reg is
15647 2. Static data references, constant pool addresses, and code labels
15648 compute the address as an offset from the GOT, whose base is in
15649 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
15650 differentiate them from global data objects. The returned
15651 address is the PIC reg + an unspec constant.
15653 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
15654 reg also appears in the address. */
15657 legitimize_pic_address (rtx orig
, rtx reg
)
15660 rtx new_rtx
= orig
;
15663 if (TARGET_MACHO
&& !TARGET_64BIT
)
15666 reg
= gen_reg_rtx (Pmode
);
15667 /* Use the generic Mach-O PIC machinery. */
15668 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
15672 if (TARGET_64BIT
&& TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
15674 rtx tmp
= legitimize_pe_coff_symbol (addr
, true);
15679 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
15681 else if ((!TARGET_64BIT
15682 || /* TARGET_64BIT && */ ix86_cmodel
!= CM_SMALL_PIC
)
15684 && gotoff_operand (addr
, Pmode
))
15686 /* This symbol may be referenced via a displacement
15687 from the PIC base address (@GOTOFF). */
15688 if (GET_CODE (addr
) == CONST
)
15689 addr
= XEXP (addr
, 0);
15691 if (GET_CODE (addr
) == PLUS
)
15693 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
15695 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
15698 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
15700 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
15703 new_rtx
= copy_to_suggested_reg (new_rtx
, reg
, Pmode
);
15707 gcc_assert (REG_P (reg
));
15708 new_rtx
= expand_simple_binop (Pmode
, PLUS
, pic_offset_table_rtx
,
15709 new_rtx
, reg
, 1, OPTAB_DIRECT
);
15712 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
15714 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
15715 /* We can't use @GOTOFF for text labels
15716 on VxWorks, see gotoff_operand. */
15717 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
15719 rtx tmp
= legitimize_pe_coff_symbol (addr
, true);
15723 /* For x64 PE-COFF there is no GOT table,
15724 so we use address directly. */
15725 if (TARGET_64BIT
&& TARGET_PECOFF
)
15727 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_PCREL
);
15728 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
15730 else if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
15732 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
),
15734 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
15735 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
15736 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
15740 /* This symbol must be referenced via a load
15741 from the Global Offset Table (@GOT). */
15742 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
15743 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
15745 new_rtx
= force_reg (Pmode
, new_rtx
);
15746 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
15747 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
15748 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
15751 new_rtx
= copy_to_suggested_reg (new_rtx
, reg
, Pmode
);
15755 if (CONST_INT_P (addr
)
15756 && !x86_64_immediate_operand (addr
, VOIDmode
))
15757 new_rtx
= copy_to_suggested_reg (addr
, reg
, Pmode
);
15758 else if (GET_CODE (addr
) == CONST
)
15760 addr
= XEXP (addr
, 0);
15762 /* We must match stuff we generate before. Assume the only
15763 unspecs that can get here are ours. Not that we could do
15764 anything with them anyway.... */
15765 if (GET_CODE (addr
) == UNSPEC
15766 || (GET_CODE (addr
) == PLUS
15767 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
15769 gcc_assert (GET_CODE (addr
) == PLUS
);
15772 if (GET_CODE (addr
) == PLUS
)
15774 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
15776 /* Check first to see if this is a constant
15777 offset from a @GOTOFF symbol reference. */
15779 && gotoff_operand (op0
, Pmode
)
15780 && CONST_INT_P (op1
))
15784 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
15786 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
15787 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
15791 gcc_assert (REG_P (reg
));
15792 new_rtx
= expand_simple_binop (Pmode
, PLUS
,
15793 pic_offset_table_rtx
,
15799 = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
15803 if (INTVAL (op1
) < -16*1024*1024
15804 || INTVAL (op1
) >= 16*1024*1024)
15806 if (!x86_64_immediate_operand (op1
, Pmode
))
15807 op1
= force_reg (Pmode
, op1
);
15810 = gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
15816 rtx base
= legitimize_pic_address (op0
, reg
);
15817 machine_mode mode
= GET_MODE (base
);
15819 = legitimize_pic_address (op1
, base
== reg
? NULL_RTX
: reg
);
15821 if (CONST_INT_P (new_rtx
))
15823 if (INTVAL (new_rtx
) < -16*1024*1024
15824 || INTVAL (new_rtx
) >= 16*1024*1024)
15826 if (!x86_64_immediate_operand (new_rtx
, mode
))
15827 new_rtx
= force_reg (mode
, new_rtx
);
15830 = gen_rtx_PLUS (mode
, force_reg (mode
, base
), new_rtx
);
15833 new_rtx
= plus_constant (mode
, base
, INTVAL (new_rtx
));
15837 /* For %rip addressing, we have to use
15838 just disp32, not base nor index. */
15840 && (GET_CODE (base
) == SYMBOL_REF
15841 || GET_CODE (base
) == LABEL_REF
))
15842 base
= force_reg (mode
, base
);
15843 if (GET_CODE (new_rtx
) == PLUS
15844 && CONSTANT_P (XEXP (new_rtx
, 1)))
15846 base
= gen_rtx_PLUS (mode
, base
, XEXP (new_rtx
, 0));
15847 new_rtx
= XEXP (new_rtx
, 1);
15849 new_rtx
= gen_rtx_PLUS (mode
, base
, new_rtx
);
15857 /* Load the thread pointer. If TO_REG is true, force it into a register. */
15860 get_thread_pointer (machine_mode tp_mode
, bool to_reg
)
15862 rtx tp
= gen_rtx_UNSPEC (ptr_mode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
15864 if (GET_MODE (tp
) != tp_mode
)
15866 gcc_assert (GET_MODE (tp
) == SImode
);
15867 gcc_assert (tp_mode
== DImode
);
15869 tp
= gen_rtx_ZERO_EXTEND (tp_mode
, tp
);
15873 tp
= copy_to_mode_reg (tp_mode
, tp
);
15878 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15880 static GTY(()) rtx ix86_tls_symbol
;
15883 ix86_tls_get_addr (void)
15885 if (!ix86_tls_symbol
)
15888 = ((TARGET_ANY_GNU_TLS
&& !TARGET_64BIT
)
15889 ? "___tls_get_addr" : "__tls_get_addr");
15891 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, sym
);
15894 if (ix86_cmodel
== CM_LARGE_PIC
&& !TARGET_PECOFF
)
15896 rtx unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, ix86_tls_symbol
),
15898 return gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
,
15899 gen_rtx_CONST (Pmode
, unspec
));
15902 return ix86_tls_symbol
;
15905 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15907 static GTY(()) rtx ix86_tls_module_base_symbol
;
15910 ix86_tls_module_base (void)
15912 if (!ix86_tls_module_base_symbol
)
15914 ix86_tls_module_base_symbol
15915 = gen_rtx_SYMBOL_REF (Pmode
, "_TLS_MODULE_BASE_");
15917 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
15918 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
15921 return ix86_tls_module_base_symbol
;
15924 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
15925 false if we expect this to be used for a memory address and true if
15926 we expect to load the address into a register. */
15929 legitimize_tls_address (rtx x
, enum tls_model model
, bool for_mov
)
15931 rtx dest
, base
, off
;
15932 rtx pic
= NULL_RTX
, tp
= NULL_RTX
;
15933 machine_mode tp_mode
= Pmode
;
15936 /* Fall back to global dynamic model if tool chain cannot support local
15938 if (TARGET_SUN_TLS
&& !TARGET_64BIT
15939 && !HAVE_AS_IX86_TLSLDMPLT
&& !HAVE_AS_IX86_TLSLDM
15940 && model
== TLS_MODEL_LOCAL_DYNAMIC
)
15941 model
= TLS_MODEL_GLOBAL_DYNAMIC
;
15945 case TLS_MODEL_GLOBAL_DYNAMIC
:
15946 dest
= gen_reg_rtx (Pmode
);
15950 if (flag_pic
&& !TARGET_PECOFF
)
15951 pic
= pic_offset_table_rtx
;
15954 pic
= gen_reg_rtx (Pmode
);
15955 emit_insn (gen_set_got (pic
));
15959 if (TARGET_GNU2_TLS
)
15962 emit_insn (gen_tls_dynamic_gnu2_64 (dest
, x
));
15964 emit_insn (gen_tls_dynamic_gnu2_32 (dest
, x
, pic
));
15966 tp
= get_thread_pointer (Pmode
, true);
15967 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
15969 if (GET_MODE (x
) != Pmode
)
15970 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
15972 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
15976 rtx caddr
= ix86_tls_get_addr ();
15980 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
);
15985 (ix86_gen_tls_global_dynamic_64 (rax
, x
, caddr
));
15986 insns
= get_insns ();
15989 if (GET_MODE (x
) != Pmode
)
15990 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
15992 RTL_CONST_CALL_P (insns
) = 1;
15993 emit_libcall_block (insns
, dest
, rax
, x
);
15996 emit_insn (gen_tls_global_dynamic_32 (dest
, x
, pic
, caddr
));
16000 case TLS_MODEL_LOCAL_DYNAMIC
:
16001 base
= gen_reg_rtx (Pmode
);
16006 pic
= pic_offset_table_rtx
;
16009 pic
= gen_reg_rtx (Pmode
);
16010 emit_insn (gen_set_got (pic
));
16014 if (TARGET_GNU2_TLS
)
16016 rtx tmp
= ix86_tls_module_base ();
16019 emit_insn (gen_tls_dynamic_gnu2_64 (base
, tmp
));
16021 emit_insn (gen_tls_dynamic_gnu2_32 (base
, tmp
, pic
));
16023 tp
= get_thread_pointer (Pmode
, true);
16024 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
16025 gen_rtx_MINUS (Pmode
, tmp
, tp
));
16029 rtx caddr
= ix86_tls_get_addr ();
16033 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
);
16039 (ix86_gen_tls_local_dynamic_base_64 (rax
, caddr
));
16040 insns
= get_insns ();
16043 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
16044 share the LD_BASE result with other LD model accesses. */
16045 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
16046 UNSPEC_TLS_LD_BASE
);
16048 RTL_CONST_CALL_P (insns
) = 1;
16049 emit_libcall_block (insns
, base
, rax
, eqv
);
16052 emit_insn (gen_tls_local_dynamic_base_32 (base
, pic
, caddr
));
16055 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
16056 off
= gen_rtx_CONST (Pmode
, off
);
16058 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
16060 if (TARGET_GNU2_TLS
)
16062 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
16064 if (GET_MODE (x
) != Pmode
)
16065 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
16067 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
16071 case TLS_MODEL_INITIAL_EXEC
:
16074 if (TARGET_SUN_TLS
&& !TARGET_X32
)
16076 /* The Sun linker took the AMD64 TLS spec literally
16077 and can only handle %rax as destination of the
16078 initial executable code sequence. */
16080 dest
= gen_reg_rtx (DImode
);
16081 emit_insn (gen_tls_initial_exec_64_sun (dest
, x
));
16085 /* Generate DImode references to avoid %fs:(%reg32)
16086 problems and linker IE->LE relaxation bug. */
16089 type
= UNSPEC_GOTNTPOFF
;
16093 pic
= pic_offset_table_rtx
;
16094 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
16096 else if (!TARGET_ANY_GNU_TLS
)
16098 pic
= gen_reg_rtx (Pmode
);
16099 emit_insn (gen_set_got (pic
));
16100 type
= UNSPEC_GOTTPOFF
;
16105 type
= UNSPEC_INDNTPOFF
;
16108 off
= gen_rtx_UNSPEC (tp_mode
, gen_rtvec (1, x
), type
);
16109 off
= gen_rtx_CONST (tp_mode
, off
);
16111 off
= gen_rtx_PLUS (tp_mode
, pic
, off
);
16112 off
= gen_const_mem (tp_mode
, off
);
16113 set_mem_alias_set (off
, ix86_GOT_alias_set ());
16115 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
16117 base
= get_thread_pointer (tp_mode
,
16118 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
16119 off
= force_reg (tp_mode
, off
);
16120 dest
= gen_rtx_PLUS (tp_mode
, base
, off
);
16121 if (tp_mode
!= Pmode
)
16122 dest
= convert_to_mode (Pmode
, dest
, 1);
16126 base
= get_thread_pointer (Pmode
, true);
16127 dest
= gen_reg_rtx (Pmode
);
16128 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
16132 case TLS_MODEL_LOCAL_EXEC
:
16133 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
16134 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
16135 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
16136 off
= gen_rtx_CONST (Pmode
, off
);
16138 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
16140 base
= get_thread_pointer (Pmode
,
16141 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
16142 return gen_rtx_PLUS (Pmode
, base
, off
);
16146 base
= get_thread_pointer (Pmode
, true);
16147 dest
= gen_reg_rtx (Pmode
);
16148 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
16153 gcc_unreachable ();
16159 /* Return true if OP refers to a TLS address. */
16161 ix86_tls_address_pattern_p (rtx op
)
16163 subrtx_var_iterator::array_type array
;
16164 FOR_EACH_SUBRTX_VAR (iter
, array
, op
, ALL
)
16169 rtx
*x
= &XEXP (op
, 0);
16170 while (GET_CODE (*x
) == PLUS
)
16173 for (i
= 0; i
< 2; i
++)
16175 rtx u
= XEXP (*x
, i
);
16176 if (GET_CODE (u
) == ZERO_EXTEND
)
16178 if (GET_CODE (u
) == UNSPEC
16179 && XINT (u
, 1) == UNSPEC_TP
)
16185 iter
.skip_subrtxes ();
16192 /* Rewrite *LOC so that it refers to a default TLS address space. */
16194 ix86_rewrite_tls_address_1 (rtx
*loc
)
16196 subrtx_ptr_iterator::array_type array
;
16197 FOR_EACH_SUBRTX_PTR (iter
, array
, loc
, ALL
)
16202 rtx addr
= XEXP (*loc
, 0);
16204 while (GET_CODE (*x
) == PLUS
)
16207 for (i
= 0; i
< 2; i
++)
16209 rtx u
= XEXP (*x
, i
);
16210 if (GET_CODE (u
) == ZERO_EXTEND
)
16212 if (GET_CODE (u
) == UNSPEC
16213 && XINT (u
, 1) == UNSPEC_TP
)
16215 addr_space_t as
= DEFAULT_TLS_SEG_REG
;
16217 *x
= XEXP (*x
, 1 - i
);
16219 *loc
= replace_equiv_address_nv (*loc
, addr
, true);
16220 set_mem_addr_space (*loc
, as
);
16227 iter
.skip_subrtxes ();
16232 /* Rewrite instruction pattern involvning TLS address
16233 so that it refers to a default TLS address space. */
16235 ix86_rewrite_tls_address (rtx pattern
)
16237 pattern
= copy_insn (pattern
);
16238 ix86_rewrite_tls_address_1 (&pattern
);
16242 /* Create or return the unique __imp_DECL dllimport symbol corresponding
16243 to symbol DECL if BEIMPORT is true. Otherwise create or return the
16244 unique refptr-DECL symbol corresponding to symbol DECL. */
16246 struct dllimport_hasher
: ggc_cache_ptr_hash
<tree_map
>
16248 static inline hashval_t
hash (tree_map
*m
) { return m
->hash
; }
16250 equal (tree_map
*a
, tree_map
*b
)
16252 return a
->base
.from
== b
->base
.from
;
16256 keep_cache_entry (tree_map
*&m
)
16258 return ggc_marked_p (m
->base
.from
);
16262 static GTY((cache
)) hash_table
<dllimport_hasher
> *dllimport_map
;
16265 get_dllimport_decl (tree decl
, bool beimport
)
16267 struct tree_map
*h
, in
;
16269 const char *prefix
;
16270 size_t namelen
, prefixlen
;
16275 if (!dllimport_map
)
16276 dllimport_map
= hash_table
<dllimport_hasher
>::create_ggc (512);
16278 in
.hash
= htab_hash_pointer (decl
);
16279 in
.base
.from
= decl
;
16280 tree_map
**loc
= dllimport_map
->find_slot_with_hash (&in
, in
.hash
, INSERT
);
16285 *loc
= h
= ggc_alloc
<tree_map
> ();
16287 h
->base
.from
= decl
;
16288 h
->to
= to
= build_decl (DECL_SOURCE_LOCATION (decl
),
16289 VAR_DECL
, NULL
, ptr_type_node
);
16290 DECL_ARTIFICIAL (to
) = 1;
16291 DECL_IGNORED_P (to
) = 1;
16292 DECL_EXTERNAL (to
) = 1;
16293 TREE_READONLY (to
) = 1;
16295 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
16296 name
= targetm
.strip_name_encoding (name
);
16298 prefix
= name
[0] == FASTCALL_PREFIX
|| user_label_prefix
[0] == 0
16299 ? "*__imp_" : "*__imp__";
16301 prefix
= user_label_prefix
[0] == 0 ? "*.refptr." : "*refptr.";
16302 namelen
= strlen (name
);
16303 prefixlen
= strlen (prefix
);
16304 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
16305 memcpy (imp_name
, prefix
, prefixlen
);
16306 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
16308 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
16309 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
16310 SET_SYMBOL_REF_DECL (rtl
, to
);
16311 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
| SYMBOL_FLAG_STUBVAR
;
16314 SYMBOL_REF_FLAGS (rtl
) |= SYMBOL_FLAG_EXTERNAL
;
16315 #ifdef SUB_TARGET_RECORD_STUB
16316 SUB_TARGET_RECORD_STUB (name
);
16320 rtl
= gen_const_mem (Pmode
, rtl
);
16321 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
16323 SET_DECL_RTL (to
, rtl
);
16324 SET_DECL_ASSEMBLER_NAME (to
, get_identifier (name
));
16329 /* Expand SYMBOL into its corresponding far-address symbol.
16330 WANT_REG is true if we require the result be a register. */
16333 legitimize_pe_coff_extern_decl (rtx symbol
, bool want_reg
)
16338 gcc_assert (SYMBOL_REF_DECL (symbol
));
16339 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
), false);
16341 x
= DECL_RTL (imp_decl
);
16343 x
= force_reg (Pmode
, x
);
16347 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
16348 true if we require the result be a register. */
16351 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
16356 gcc_assert (SYMBOL_REF_DECL (symbol
));
16357 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
), true);
16359 x
= DECL_RTL (imp_decl
);
16361 x
= force_reg (Pmode
, x
);
16365 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
16366 is true if we require the result be a register. */
16369 legitimize_pe_coff_symbol (rtx addr
, bool inreg
)
16371 if (!TARGET_PECOFF
)
16374 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
16376 if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (addr
))
16377 return legitimize_dllimport_symbol (addr
, inreg
);
16378 if (GET_CODE (addr
) == CONST
16379 && GET_CODE (XEXP (addr
, 0)) == PLUS
16380 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
16381 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr
, 0), 0)))
16383 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (addr
, 0), 0), inreg
);
16384 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
16388 if (ix86_cmodel
!= CM_LARGE_PIC
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
16390 if (GET_CODE (addr
) == SYMBOL_REF
16391 && !is_imported_p (addr
)
16392 && SYMBOL_REF_EXTERNAL_P (addr
)
16393 && SYMBOL_REF_DECL (addr
))
16394 return legitimize_pe_coff_extern_decl (addr
, inreg
);
16396 if (GET_CODE (addr
) == CONST
16397 && GET_CODE (XEXP (addr
, 0)) == PLUS
16398 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
16399 && !is_imported_p (XEXP (XEXP (addr
, 0), 0))
16400 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr
, 0), 0))
16401 && SYMBOL_REF_DECL (XEXP (XEXP (addr
, 0), 0)))
16403 rtx t
= legitimize_pe_coff_extern_decl (XEXP (XEXP (addr
, 0), 0), inreg
);
16404 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
16409 /* Try machine-dependent ways of modifying an illegitimate address
16410 to be legitimate. If we find one, return the new, valid address.
16411 This macro is used in only one place: `memory_address' in explow.c.
16413 OLDX is the address as it was before break_out_memory_refs was called.
16414 In some cases it is useful to look at this to decide what needs to be done.
16416 It is always safe for this macro to do nothing. It exists to recognize
16417 opportunities to optimize the output.
16419 For the 80386, we handle X+REG by loading X into a register R and
16420 using R+REG. R will go in a general reg and indexing will be used.
16421 However, if REG is a broken-out memory address or multiplication,
16422 nothing needs to be done because REG can certainly go in a general reg.
16424 When -fpic is used, special handling is needed for symbolic references.
16425 See comments by legitimize_pic_address in i386.c for details. */
16428 ix86_legitimize_address (rtx x
, rtx
, machine_mode mode
)
16430 bool changed
= false;
16433 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
16435 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
16436 if (GET_CODE (x
) == CONST
16437 && GET_CODE (XEXP (x
, 0)) == PLUS
16438 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
16439 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
16441 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
16442 (enum tls_model
) log
, false);
16443 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
16446 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
16448 rtx tmp
= legitimize_pe_coff_symbol (x
, true);
16453 if (flag_pic
&& SYMBOLIC_CONST (x
))
16454 return legitimize_pic_address (x
, 0);
16457 if (MACHO_DYNAMIC_NO_PIC_P
&& SYMBOLIC_CONST (x
))
16458 return machopic_indirect_data_reference (x
, 0);
16461 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
16462 if (GET_CODE (x
) == ASHIFT
16463 && CONST_INT_P (XEXP (x
, 1))
16464 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
16467 log
= INTVAL (XEXP (x
, 1));
16468 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
16469 GEN_INT (1 << log
));
16472 if (GET_CODE (x
) == PLUS
)
16474 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
16476 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
16477 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
16478 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
16481 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
16482 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
16483 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
16484 GEN_INT (1 << log
));
16487 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
16488 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
16489 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
16492 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
16493 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
16494 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
16495 GEN_INT (1 << log
));
16498 /* Put multiply first if it isn't already. */
16499 if (GET_CODE (XEXP (x
, 1)) == MULT
)
16501 std::swap (XEXP (x
, 0), XEXP (x
, 1));
16505 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
16506 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
16507 created by virtual register instantiation, register elimination, and
16508 similar optimizations. */
16509 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
16512 x
= gen_rtx_PLUS (Pmode
,
16513 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
16514 XEXP (XEXP (x
, 1), 0)),
16515 XEXP (XEXP (x
, 1), 1));
16519 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
16520 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
16521 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
16522 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
16523 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
16524 && CONSTANT_P (XEXP (x
, 1)))
16527 rtx other
= NULL_RTX
;
16529 if (CONST_INT_P (XEXP (x
, 1)))
16531 constant
= XEXP (x
, 1);
16532 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
16534 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
16536 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
16537 other
= XEXP (x
, 1);
16545 x
= gen_rtx_PLUS (Pmode
,
16546 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
16547 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
16548 plus_constant (Pmode
, other
,
16549 INTVAL (constant
)));
16553 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
16556 if (GET_CODE (XEXP (x
, 0)) == MULT
)
16559 XEXP (x
, 0) = copy_addr_to_reg (XEXP (x
, 0));
16562 if (GET_CODE (XEXP (x
, 1)) == MULT
)
16565 XEXP (x
, 1) = copy_addr_to_reg (XEXP (x
, 1));
16569 && REG_P (XEXP (x
, 1))
16570 && REG_P (XEXP (x
, 0)))
16573 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
16576 x
= legitimize_pic_address (x
, 0);
16579 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
16582 if (REG_P (XEXP (x
, 0)))
16584 rtx temp
= gen_reg_rtx (Pmode
);
16585 rtx val
= force_operand (XEXP (x
, 1), temp
);
16588 val
= convert_to_mode (Pmode
, val
, 1);
16589 emit_move_insn (temp
, val
);
16592 XEXP (x
, 1) = temp
;
16596 else if (REG_P (XEXP (x
, 1)))
16598 rtx temp
= gen_reg_rtx (Pmode
);
16599 rtx val
= force_operand (XEXP (x
, 0), temp
);
16602 val
= convert_to_mode (Pmode
, val
, 1);
16603 emit_move_insn (temp
, val
);
16606 XEXP (x
, 0) = temp
;
16614 /* Print an integer constant expression in assembler syntax. Addition
16615 and subtraction are the only arithmetic that may appear in these
16616 expressions. FILE is the stdio stream to write to, X is the rtx, and
16617 CODE is the operand print code from the output string. */
16620 output_pic_addr_const (FILE *file
, rtx x
, int code
)
16624 switch (GET_CODE (x
))
16627 gcc_assert (flag_pic
);
16632 if (TARGET_64BIT
|| ! TARGET_MACHO_BRANCH_ISLANDS
)
16633 output_addr_const (file
, x
);
16636 const char *name
= XSTR (x
, 0);
16638 /* Mark the decl as referenced so that cgraph will
16639 output the function. */
16640 if (SYMBOL_REF_DECL (x
))
16641 mark_decl_referenced (SYMBOL_REF_DECL (x
));
16644 if (MACHOPIC_INDIRECT
16645 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
16646 name
= machopic_indirection_name (x
, /*stub_p=*/true);
16648 assemble_name (file
, name
);
16650 if (!TARGET_MACHO
&& !(TARGET_64BIT
&& TARGET_PECOFF
)
16651 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
16652 fputs ("@PLT", file
);
16659 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
16660 assemble_name (asm_out_file
, buf
);
16664 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
16668 /* This used to output parentheses around the expression,
16669 but that does not work on the 386 (either ATT or BSD assembler). */
16670 output_pic_addr_const (file
, XEXP (x
, 0), code
);
16674 /* We can't handle floating point constants;
16675 TARGET_PRINT_OPERAND must handle them. */
16676 output_operand_lossage ("floating constant misused");
16680 /* Some assemblers need integer constants to appear first. */
16681 if (CONST_INT_P (XEXP (x
, 0)))
16683 output_pic_addr_const (file
, XEXP (x
, 0), code
);
16685 output_pic_addr_const (file
, XEXP (x
, 1), code
);
16689 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
16690 output_pic_addr_const (file
, XEXP (x
, 1), code
);
16692 output_pic_addr_const (file
, XEXP (x
, 0), code
);
16698 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
16699 output_pic_addr_const (file
, XEXP (x
, 0), code
);
16701 output_pic_addr_const (file
, XEXP (x
, 1), code
);
16703 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
16707 gcc_assert (XVECLEN (x
, 0) == 1);
16708 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
16709 switch (XINT (x
, 1))
16712 fputs ("@GOT", file
);
16714 case UNSPEC_GOTOFF
:
16715 fputs ("@GOTOFF", file
);
16717 case UNSPEC_PLTOFF
:
16718 fputs ("@PLTOFF", file
);
16721 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
16722 "(%rip)" : "[rip]", file
);
16724 case UNSPEC_GOTPCREL
:
16725 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
16726 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file
);
16728 case UNSPEC_GOTTPOFF
:
16729 /* FIXME: This might be @TPOFF in Sun ld too. */
16730 fputs ("@gottpoff", file
);
16733 fputs ("@tpoff", file
);
16735 case UNSPEC_NTPOFF
:
16737 fputs ("@tpoff", file
);
16739 fputs ("@ntpoff", file
);
16741 case UNSPEC_DTPOFF
:
16742 fputs ("@dtpoff", file
);
16744 case UNSPEC_GOTNTPOFF
:
16746 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
16747 "@gottpoff(%rip)": "@gottpoff[rip]", file
);
16749 fputs ("@gotntpoff", file
);
16751 case UNSPEC_INDNTPOFF
:
16752 fputs ("@indntpoff", file
);
16755 case UNSPEC_MACHOPIC_OFFSET
:
16757 machopic_output_function_base_name (file
);
16761 output_operand_lossage ("invalid UNSPEC as operand");
16767 output_operand_lossage ("invalid expression as operand");
16771 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
16772 We need to emit DTP-relative relocations. */
16774 static void ATTRIBUTE_UNUSED
16775 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
16777 fputs (ASM_LONG
, file
);
16778 output_addr_const (file
, x
);
16779 fputs ("@dtpoff", file
);
16785 fputs (", 0", file
);
16788 gcc_unreachable ();
16792 /* Return true if X is a representation of the PIC register. This copes
16793 with calls from ix86_find_base_term, where the register might have
16794 been replaced by a cselib value. */
16797 ix86_pic_register_p (rtx x
)
16799 if (GET_CODE (x
) == VALUE
&& CSELIB_VAL_PTR (x
))
16800 return (pic_offset_table_rtx
16801 && rtx_equal_for_cselib_p (x
, pic_offset_table_rtx
));
16802 else if (!REG_P (x
))
16804 else if (pic_offset_table_rtx
)
16806 if (REGNO (x
) == REGNO (pic_offset_table_rtx
))
16808 if (HARD_REGISTER_P (x
)
16809 && !HARD_REGISTER_P (pic_offset_table_rtx
)
16810 && ORIGINAL_REGNO (x
) == REGNO (pic_offset_table_rtx
))
16815 return REGNO (x
) == PIC_OFFSET_TABLE_REGNUM
;
16818 /* Helper function for ix86_delegitimize_address.
16819 Attempt to delegitimize TLS local-exec accesses. */
16822 ix86_delegitimize_tls_address (rtx orig_x
)
16824 rtx x
= orig_x
, unspec
;
16825 struct ix86_address addr
;
16827 if (!TARGET_TLS_DIRECT_SEG_REFS
)
16831 if (GET_CODE (x
) != PLUS
|| GET_MODE (x
) != Pmode
)
16833 if (ix86_decompose_address (x
, &addr
) == 0
16834 || addr
.seg
!= DEFAULT_TLS_SEG_REG
16835 || addr
.disp
== NULL_RTX
16836 || GET_CODE (addr
.disp
) != CONST
)
16838 unspec
= XEXP (addr
.disp
, 0);
16839 if (GET_CODE (unspec
) == PLUS
&& CONST_INT_P (XEXP (unspec
, 1)))
16840 unspec
= XEXP (unspec
, 0);
16841 if (GET_CODE (unspec
) != UNSPEC
|| XINT (unspec
, 1) != UNSPEC_NTPOFF
)
16843 x
= XVECEXP (unspec
, 0, 0);
16844 gcc_assert (GET_CODE (x
) == SYMBOL_REF
);
16845 if (unspec
!= XEXP (addr
.disp
, 0))
16846 x
= gen_rtx_PLUS (Pmode
, x
, XEXP (XEXP (addr
.disp
, 0), 1));
16849 rtx idx
= addr
.index
;
16850 if (addr
.scale
!= 1)
16851 idx
= gen_rtx_MULT (Pmode
, idx
, GEN_INT (addr
.scale
));
16852 x
= gen_rtx_PLUS (Pmode
, idx
, x
);
16855 x
= gen_rtx_PLUS (Pmode
, addr
.base
, x
);
16856 if (MEM_P (orig_x
))
16857 x
= replace_equiv_address_nv (orig_x
, x
);
16861 /* In the name of slightly smaller debug output, and to cater to
16862 general assembler lossage, recognize PIC+GOTOFF and turn it back
16863 into a direct symbol reference.
16865 On Darwin, this is necessary to avoid a crash, because Darwin
16866 has a different PIC label for each routine but the DWARF debugging
16867 information is not associated with any particular routine, so it's
16868 necessary to remove references to the PIC label from RTL stored by
16869 the DWARF output code.
16871 This helper is used in the normal ix86_delegitimize_address
16872 entrypoint (e.g. used in the target delegitimization hook) and
16873 in ix86_find_base_term. As compile time memory optimization, we
16874 avoid allocating rtxes that will not change anything on the outcome
16875 of the callers (find_base_value and find_base_term). */
16878 ix86_delegitimize_address_1 (rtx x
, bool base_term_p
)
16880 rtx orig_x
= delegitimize_mem_from_attrs (x
);
16881 /* addend is NULL or some rtx if x is something+GOTOFF where
16882 something doesn't include the PIC register. */
16883 rtx addend
= NULL_RTX
;
16884 /* reg_addend is NULL or a multiple of some register. */
16885 rtx reg_addend
= NULL_RTX
;
16886 /* const_addend is NULL or a const_int. */
16887 rtx const_addend
= NULL_RTX
;
16888 /* This is the result, or NULL. */
16889 rtx result
= NULL_RTX
;
16898 if (GET_CODE (x
) == CONST
16899 && GET_CODE (XEXP (x
, 0)) == PLUS
16900 && GET_MODE (XEXP (x
, 0)) == Pmode
16901 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
16902 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == UNSPEC
16903 && XINT (XEXP (XEXP (x
, 0), 0), 1) == UNSPEC_PCREL
)
16905 /* find_base_{value,term} only care about MEMs with arg_pointer_rtx
16906 base. A CONST can't be arg_pointer_rtx based. */
16907 if (base_term_p
&& MEM_P (orig_x
))
16909 rtx x2
= XVECEXP (XEXP (XEXP (x
, 0), 0), 0, 0);
16910 x
= gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 1), x2
);
16911 if (MEM_P (orig_x
))
16912 x
= replace_equiv_address_nv (orig_x
, x
);
16916 if (GET_CODE (x
) == CONST
16917 && GET_CODE (XEXP (x
, 0)) == UNSPEC
16918 && (XINT (XEXP (x
, 0), 1) == UNSPEC_GOTPCREL
16919 || XINT (XEXP (x
, 0), 1) == UNSPEC_PCREL
)
16920 && (MEM_P (orig_x
) || XINT (XEXP (x
, 0), 1) == UNSPEC_PCREL
))
16922 x
= XVECEXP (XEXP (x
, 0), 0, 0);
16923 if (GET_MODE (orig_x
) != GET_MODE (x
) && MEM_P (orig_x
))
16925 x
= lowpart_subreg (GET_MODE (orig_x
), x
, GET_MODE (x
));
16932 if (ix86_cmodel
!= CM_MEDIUM_PIC
&& ix86_cmodel
!= CM_LARGE_PIC
)
16933 return ix86_delegitimize_tls_address (orig_x
);
16935 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
16936 and -mcmodel=medium -fpic. */
16939 if (GET_CODE (x
) != PLUS
16940 || GET_CODE (XEXP (x
, 1)) != CONST
)
16941 return ix86_delegitimize_tls_address (orig_x
);
16943 if (ix86_pic_register_p (XEXP (x
, 0)))
16944 /* %ebx + GOT/GOTOFF */
16946 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
16948 /* %ebx + %reg * scale + GOT/GOTOFF */
16949 reg_addend
= XEXP (x
, 0);
16950 if (ix86_pic_register_p (XEXP (reg_addend
, 0)))
16951 reg_addend
= XEXP (reg_addend
, 1);
16952 else if (ix86_pic_register_p (XEXP (reg_addend
, 1)))
16953 reg_addend
= XEXP (reg_addend
, 0);
16956 reg_addend
= NULL_RTX
;
16957 addend
= XEXP (x
, 0);
16961 addend
= XEXP (x
, 0);
16963 x
= XEXP (XEXP (x
, 1), 0);
16964 if (GET_CODE (x
) == PLUS
16965 && CONST_INT_P (XEXP (x
, 1)))
16967 const_addend
= XEXP (x
, 1);
16971 if (GET_CODE (x
) == UNSPEC
16972 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
) && !addend
)
16973 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))
16974 || (XINT (x
, 1) == UNSPEC_PLTOFF
&& ix86_cmodel
== CM_LARGE_PIC
16975 && !MEM_P (orig_x
) && !addend
)))
16976 result
= XVECEXP (x
, 0, 0);
16978 if (!TARGET_64BIT
&& TARGET_MACHO
&& darwin_local_data_pic (x
)
16979 && !MEM_P (orig_x
))
16980 result
= XVECEXP (x
, 0, 0);
16983 return ix86_delegitimize_tls_address (orig_x
);
16985 /* For (PLUS something CONST_INT) both find_base_{value,term} just
16986 recurse on the first operand. */
16987 if (const_addend
&& !base_term_p
)
16988 result
= gen_rtx_CONST (Pmode
, gen_rtx_PLUS (Pmode
, result
, const_addend
));
16990 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
16993 /* If the rest of original X doesn't involve the PIC register, add
16994 addend and subtract pic_offset_table_rtx. This can happen e.g.
16996 leal (%ebx, %ecx, 4), %ecx
16998 movl foo@GOTOFF(%ecx), %edx
16999 in which case we return (%ecx - %ebx) + foo
17000 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
17001 and reload has completed. Don't do the latter for debug,
17002 as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly. */
17003 if (pic_offset_table_rtx
17004 && (!reload_completed
|| !ix86_use_pseudo_pic_reg ()))
17005 result
= gen_rtx_PLUS (Pmode
, gen_rtx_MINUS (Pmode
, copy_rtx (addend
),
17006 pic_offset_table_rtx
),
17008 else if (base_term_p
17009 && pic_offset_table_rtx
17011 && !TARGET_VXWORKS_RTP
)
17013 rtx tmp
= gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
17014 tmp
= gen_rtx_MINUS (Pmode
, copy_rtx (addend
), tmp
);
17015 result
= gen_rtx_PLUS (Pmode
, tmp
, result
);
17020 if (GET_MODE (orig_x
) != Pmode
&& MEM_P (orig_x
))
17022 result
= lowpart_subreg (GET_MODE (orig_x
), result
, Pmode
);
17023 if (result
== NULL_RTX
)
17029 /* The normal instantiation of the above template. */
17032 ix86_delegitimize_address (rtx x
)
17034 return ix86_delegitimize_address_1 (x
, false);
17037 /* If X is a machine specific address (i.e. a symbol or label being
17038 referenced as a displacement from the GOT implemented using an
17039 UNSPEC), then return the base term. Otherwise return X. */
17042 ix86_find_base_term (rtx x
)
17048 if (GET_CODE (x
) != CONST
)
17050 term
= XEXP (x
, 0);
17051 if (GET_CODE (term
) == PLUS
17052 && CONST_INT_P (XEXP (term
, 1)))
17053 term
= XEXP (term
, 0);
17054 if (GET_CODE (term
) != UNSPEC
17055 || (XINT (term
, 1) != UNSPEC_GOTPCREL
17056 && XINT (term
, 1) != UNSPEC_PCREL
))
17059 return XVECEXP (term
, 0, 0);
17062 return ix86_delegitimize_address_1 (x
, true);
17065 /* Return true if X shouldn't be emitted into the debug info.
17066 Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_
17067 symbol easily into the .debug_info section, so we need not to
17068 delegitimize, but instead assemble as @gotoff.
17069 Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically
17070 assembles that as _GLOBAL_OFFSET_TABLE_-. expression. */
17073 ix86_const_not_ok_for_debug_p (rtx x
)
17075 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) != UNSPEC_GOTOFF
)
17078 if (SYMBOL_REF_P (x
) && strcmp (XSTR (x
, 0), GOT_SYMBOL_NAME
) == 0)
17085 put_condition_code (enum rtx_code code
, machine_mode mode
, bool reverse
,
17086 bool fp
, FILE *file
)
17088 const char *suffix
;
17090 if (mode
== CCFPmode
)
17092 code
= ix86_fp_compare_code_to_integer (code
);
17096 code
= reverse_condition (code
);
17101 gcc_assert (mode
!= CCGZmode
);
17125 gcc_assert (mode
!= CCGZmode
);
17149 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
17153 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
17154 Those same assemblers have the same but opposite lossage on cmov. */
17155 if (mode
== CCmode
)
17156 suffix
= fp
? "nbe" : "a";
17158 gcc_unreachable ();
17175 gcc_unreachable ();
17179 if (mode
== CCmode
|| mode
== CCGZmode
)
17181 else if (mode
== CCCmode
)
17182 suffix
= fp
? "b" : "c";
17184 gcc_unreachable ();
17201 gcc_unreachable ();
17205 if (mode
== CCmode
|| mode
== CCGZmode
)
17207 else if (mode
== CCCmode
)
17208 suffix
= fp
? "nb" : "nc";
17210 gcc_unreachable ();
17213 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
17217 if (mode
== CCmode
)
17220 gcc_unreachable ();
17223 suffix
= fp
? "u" : "p";
17226 suffix
= fp
? "nu" : "np";
17229 gcc_unreachable ();
17231 fputs (suffix
, file
);
17234 /* Print the name of register X to FILE based on its machine mode and number.
17235 If CODE is 'w', pretend the mode is HImode.
17236 If CODE is 'b', pretend the mode is QImode.
17237 If CODE is 'k', pretend the mode is SImode.
17238 If CODE is 'q', pretend the mode is DImode.
17239 If CODE is 'x', pretend the mode is V4SFmode.
17240 If CODE is 't', pretend the mode is V8SFmode.
17241 If CODE is 'g', pretend the mode is V16SFmode.
17242 If CODE is 'h', pretend the reg is the 'high' byte register.
17243 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
17244 If CODE is 'd', duplicate the operand for AVX instruction.
17248 print_reg (rtx x
, int code
, FILE *file
)
17252 unsigned int regno
;
17255 if (ASSEMBLER_DIALECT
== ASM_ATT
)
17260 gcc_assert (TARGET_64BIT
);
17261 fputs ("rip", file
);
17265 if (code
== 'y' && STACK_TOP_P (x
))
17267 fputs ("st(0)", file
);
17273 else if (code
== 'b')
17275 else if (code
== 'k')
17277 else if (code
== 'q')
17279 else if (code
== 'h')
17281 else if (code
== 'x')
17283 else if (code
== 't')
17285 else if (code
== 'g')
17288 msize
= GET_MODE_SIZE (GET_MODE (x
));
17292 if (regno
== ARG_POINTER_REGNUM
17293 || regno
== FRAME_POINTER_REGNUM
17294 || regno
== FPSR_REG
17295 || regno
== FPCR_REG
)
17297 output_operand_lossage
17298 ("invalid use of register '%s'", reg_names
[regno
]);
17301 else if (regno
== FLAGS_REG
)
17303 output_operand_lossage ("invalid use of asm flag output");
17307 duplicated
= code
== 'd' && TARGET_AVX
;
17314 if (GENERAL_REGNO_P (regno
) && msize
> GET_MODE_SIZE (word_mode
))
17315 warning (0, "unsupported size for integer register");
17318 if (LEGACY_INT_REGNO_P (regno
))
17319 putc (msize
> 4 && TARGET_64BIT
? 'r' : 'e', file
);
17323 reg
= hi_reg_name
[regno
];
17326 if (regno
>= ARRAY_SIZE (qi_reg_name
))
17328 if (!ANY_QI_REGNO_P (regno
))
17329 error ("unsupported size for integer register");
17330 reg
= qi_reg_name
[regno
];
17333 if (regno
>= ARRAY_SIZE (qi_high_reg_name
))
17335 reg
= qi_high_reg_name
[regno
];
17339 if (SSE_REGNO_P (regno
))
17341 gcc_assert (!duplicated
);
17342 putc (msize
== 32 ? 'y' : 'z', file
);
17343 reg
= hi_reg_name
[regno
] + 1;
17348 gcc_unreachable ();
17353 /* Irritatingly, AMD extended registers use
17354 different naming convention: "r%d[bwd]" */
17355 if (REX_INT_REGNO_P (regno
))
17357 gcc_assert (TARGET_64BIT
);
17361 error ("extended registers have no high halves");
17376 error ("unsupported operand size for extended register");
17384 if (ASSEMBLER_DIALECT
== ASM_ATT
)
17385 fprintf (file
, ", %%%s", reg
);
17387 fprintf (file
, ", %s", reg
);
17391 /* Meaning of CODE:
17392 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
17393 C -- print opcode suffix for set/cmov insn.
17394 c -- like C, but print reversed condition
17395 F,f -- likewise, but for floating-point.
17396 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
17398 R -- print embeded rounding and sae.
17399 r -- print only sae.
17400 z -- print the opcode suffix for the size of the current operand.
17401 Z -- likewise, with special suffixes for x87 instructions.
17402 * -- print a star (in certain assembler syntax)
17403 A -- print an absolute memory reference.
17404 E -- print address with DImode register names if TARGET_64BIT.
17405 w -- print the operand as if it's a "word" (HImode) even if it isn't.
17406 s -- print a shift double count, followed by the assemblers argument
17408 b -- print the QImode name of the register for the indicated operand.
17409 %b0 would print %al if operands[0] is reg 0.
17410 w -- likewise, print the HImode name of the register.
17411 k -- likewise, print the SImode name of the register.
17412 q -- likewise, print the DImode name of the register.
17413 x -- likewise, print the V4SFmode name of the register.
17414 t -- likewise, print the V8SFmode name of the register.
17415 g -- likewise, print the V16SFmode name of the register.
17416 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
17417 y -- print "st(0)" instead of "st" as a register.
17418 d -- print duplicated register operand for AVX instruction.
17419 D -- print condition for SSE cmp instruction.
17420 P -- if PIC, print an @PLT suffix.
17421 p -- print raw symbol name.
17422 X -- don't print any sort of PIC '@' suffix for a symbol.
17423 & -- print some in-use local-dynamic symbol name.
17424 H -- print a memory address offset by 8; used for sse high-parts
17425 Y -- print condition for XOP pcom* instruction.
17426 + -- print a branch hint as 'cs' or 'ds' prefix
17427 ; -- print a semicolon (after prefixes due to bug in older gas).
17428 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
17429 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
17430 ! -- print MPX prefix for jxx/call/ret instructions if required.
17434 ix86_print_operand (FILE *file
, rtx x
, int code
)
17441 switch (ASSEMBLER_DIALECT
)
17448 /* Intel syntax. For absolute addresses, registers should not
17449 be surrounded by braces. */
17453 ix86_print_operand (file
, x
, 0);
17460 gcc_unreachable ();
17463 ix86_print_operand (file
, x
, 0);
17467 /* Wrap address in an UNSPEC to declare special handling. */
17469 x
= gen_rtx_UNSPEC (DImode
, gen_rtvec (1, x
), UNSPEC_LEA_ADDR
);
17471 output_address (VOIDmode
, x
);
17475 if (ASSEMBLER_DIALECT
== ASM_ATT
)
17480 if (ASSEMBLER_DIALECT
== ASM_ATT
)
17485 if (ASSEMBLER_DIALECT
== ASM_ATT
)
17490 if (ASSEMBLER_DIALECT
== ASM_ATT
)
17495 if (ASSEMBLER_DIALECT
== ASM_ATT
)
17500 if (ASSEMBLER_DIALECT
== ASM_ATT
)
17505 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
17506 if (ASSEMBLER_DIALECT
!= ASM_ATT
)
17509 switch (GET_MODE_SIZE (GET_MODE (x
)))
17524 output_operand_lossage ("invalid operand size for operand "
17534 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
17536 /* Opcodes don't get size suffixes if using Intel opcodes. */
17537 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
17540 switch (GET_MODE_SIZE (GET_MODE (x
)))
17559 output_operand_lossage ("invalid operand size for operand "
17565 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
17566 warning (0, "non-integer operand used with operand code 'z'");
17570 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
17571 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
17574 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
17576 switch (GET_MODE_SIZE (GET_MODE (x
)))
17579 #ifdef HAVE_AS_IX86_FILDS
17589 #ifdef HAVE_AS_IX86_FILDQ
17592 fputs ("ll", file
);
17600 else if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
17602 /* 387 opcodes don't get size suffixes
17603 if the operands are registers. */
17604 if (STACK_REG_P (x
))
17607 switch (GET_MODE_SIZE (GET_MODE (x
)))
17628 output_operand_lossage ("invalid operand type used with "
17629 "operand code 'Z'");
17633 output_operand_lossage ("invalid operand size for operand code 'Z'");
17652 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
17654 ix86_print_operand (file
, x
, 0);
17655 fputs (", ", file
);
17660 switch (GET_CODE (x
))
17663 fputs ("neq", file
);
17666 fputs ("eq", file
);
17670 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "ge" : "unlt", file
);
17674 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "gt" : "unle", file
);
17678 fputs ("le", file
);
17682 fputs ("lt", file
);
17685 fputs ("unord", file
);
17688 fputs ("ord", file
);
17691 fputs ("ueq", file
);
17694 fputs ("nlt", file
);
17697 fputs ("nle", file
);
17700 fputs ("ule", file
);
17703 fputs ("ult", file
);
17706 fputs ("une", file
);
17709 output_operand_lossage ("operand is not a condition code, "
17710 "invalid operand code 'Y'");
17716 /* Little bit of braindamage here. The SSE compare instructions
17717 does use completely different names for the comparisons that the
17718 fp conditional moves. */
17719 switch (GET_CODE (x
))
17724 fputs ("eq_us", file
);
17729 fputs ("eq", file
);
17734 fputs ("nge", file
);
17739 fputs ("lt", file
);
17744 fputs ("ngt", file
);
17749 fputs ("le", file
);
17752 fputs ("unord", file
);
17757 fputs ("neq_oq", file
);
17762 fputs ("neq", file
);
17767 fputs ("ge", file
);
17772 fputs ("nlt", file
);
17777 fputs ("gt", file
);
17782 fputs ("nle", file
);
17785 fputs ("ord", file
);
17788 output_operand_lossage ("operand is not a condition code, "
17789 "invalid operand code 'D'");
17796 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
17797 if (ASSEMBLER_DIALECT
== ASM_ATT
)
17799 gcc_fallthrough ();
17804 if (!COMPARISON_P (x
))
17806 output_operand_lossage ("operand is not a condition code, "
17807 "invalid operand code '%c'", code
);
17810 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)),
17811 code
== 'c' || code
== 'f',
17812 code
== 'F' || code
== 'f',
17817 if (!offsettable_memref_p (x
))
17819 output_operand_lossage ("operand is not an offsettable memory "
17820 "reference, invalid operand code 'H'");
17823 /* It doesn't actually matter what mode we use here, as we're
17824 only going to use this for printing. */
17825 x
= adjust_address_nv (x
, DImode
, 8);
17826 /* Output 'qword ptr' for intel assembler dialect. */
17827 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
17832 if (!CONST_INT_P (x
))
17834 output_operand_lossage ("operand is not an integer, invalid "
17835 "operand code 'K'");
17839 if (INTVAL (x
) & IX86_HLE_ACQUIRE
)
17840 #ifdef HAVE_AS_IX86_HLE
17841 fputs ("xacquire ", file
);
17843 fputs ("\n" ASM_BYTE
"0xf2\n\t", file
);
17845 else if (INTVAL (x
) & IX86_HLE_RELEASE
)
17846 #ifdef HAVE_AS_IX86_HLE
17847 fputs ("xrelease ", file
);
17849 fputs ("\n" ASM_BYTE
"0xf3\n\t", file
);
17851 /* We do not want to print value of the operand. */
17855 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
17856 fputs ("{z}", file
);
17860 if (!CONST_INT_P (x
) || INTVAL (x
) != ROUND_SAE
)
17862 output_operand_lossage ("operand is not a specific integer, "
17863 "invalid operand code 'r'");
17867 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
17868 fputs (", ", file
);
17870 fputs ("{sae}", file
);
17872 if (ASSEMBLER_DIALECT
== ASM_ATT
)
17873 fputs (", ", file
);
17878 if (!CONST_INT_P (x
))
17880 output_operand_lossage ("operand is not an integer, invalid "
17881 "operand code 'R'");
17885 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
17886 fputs (", ", file
);
17888 switch (INTVAL (x
))
17890 case ROUND_NEAREST_INT
| ROUND_SAE
:
17891 fputs ("{rn-sae}", file
);
17893 case ROUND_NEG_INF
| ROUND_SAE
:
17894 fputs ("{rd-sae}", file
);
17896 case ROUND_POS_INF
| ROUND_SAE
:
17897 fputs ("{ru-sae}", file
);
17899 case ROUND_ZERO
| ROUND_SAE
:
17900 fputs ("{rz-sae}", file
);
17903 output_operand_lossage ("operand is not a specific integer, "
17904 "invalid operand code 'R'");
17907 if (ASSEMBLER_DIALECT
== ASM_ATT
)
17908 fputs (", ", file
);
17913 if (ASSEMBLER_DIALECT
== ASM_ATT
)
17919 const char *name
= get_some_local_dynamic_name ();
17921 output_operand_lossage ("'%%&' used without any "
17922 "local dynamic TLS references");
17924 assemble_name (file
, name
);
17933 || optimize_function_for_size_p (cfun
)
17934 || !TARGET_BRANCH_PREDICTION_HINTS
)
17937 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
17940 int pred_val
= profile_probability::from_reg_br_prob_note
17941 (XINT (x
, 0)).to_reg_br_prob_base ();
17943 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
17944 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
17946 bool taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
17948 = final_forward_branch_p (current_output_insn
) == 0;
17950 /* Emit hints only in the case default branch prediction
17951 heuristics would fail. */
17952 if (taken
!= cputaken
)
17954 /* We use 3e (DS) prefix for taken branches and
17955 2e (CS) prefix for not taken branches. */
17957 fputs ("ds ; ", file
);
17959 fputs ("cs ; ", file
);
17967 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
17973 putc (TARGET_AVX2
? 'i' : 'f', file
);
17977 if (TARGET_64BIT
&& Pmode
!= word_mode
)
17978 fputs ("addr32 ", file
);
17982 if (ix86_bnd_prefixed_insn_p (current_output_insn
))
17983 fputs ("bnd ", file
);
17984 if (ix86_notrack_prefixed_insn_p (current_output_insn
))
17985 fputs ("notrack ", file
);
17989 output_operand_lossage ("invalid operand code '%c'", code
);
17994 print_reg (x
, code
, file
);
17996 else if (MEM_P (x
))
17998 rtx addr
= XEXP (x
, 0);
18000 /* No `byte ptr' prefix for call instructions ... */
18001 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
18003 machine_mode mode
= GET_MODE (x
);
18006 /* Check for explicit size override codes. */
18009 else if (code
== 'w')
18011 else if (code
== 'k')
18013 else if (code
== 'q')
18015 else if (code
== 'x')
18017 else if (code
== 't')
18019 else if (code
== 'g')
18021 else if (mode
== BLKmode
)
18022 /* ... or BLKmode operands, when not overridden. */
18025 switch (GET_MODE_SIZE (mode
))
18027 case 1: size
= "BYTE"; break;
18028 case 2: size
= "WORD"; break;
18029 case 4: size
= "DWORD"; break;
18030 case 8: size
= "QWORD"; break;
18031 case 12: size
= "TBYTE"; break;
18033 if (mode
== XFmode
)
18038 case 32: size
= "YMMWORD"; break;
18039 case 64: size
= "ZMMWORD"; break;
18041 gcc_unreachable ();
18045 fputs (size
, file
);
18046 fputs (" PTR ", file
);
18050 if (this_is_asm_operands
&& ! address_operand (addr
, VOIDmode
))
18051 output_operand_lossage ("invalid constraints for operand");
18053 ix86_print_operand_address_as
18054 (file
, addr
, MEM_ADDR_SPACE (x
), code
== 'p' || code
== 'P');
18057 else if (CONST_DOUBLE_P (x
) && GET_MODE (x
) == SFmode
)
18061 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x
), l
);
18063 if (ASSEMBLER_DIALECT
== ASM_ATT
)
18065 /* Sign extend 32bit SFmode immediate to 8 bytes. */
18067 fprintf (file
, "0x%08" HOST_LONG_LONG_FORMAT
"x",
18068 (unsigned long long) (int) l
);
18070 fprintf (file
, "0x%08x", (unsigned int) l
);
18073 else if (CONST_DOUBLE_P (x
) && GET_MODE (x
) == DFmode
)
18077 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x
), l
);
18079 if (ASSEMBLER_DIALECT
== ASM_ATT
)
18081 fprintf (file
, "0x%lx%08lx", l
[1] & 0xffffffff, l
[0] & 0xffffffff);
18084 /* These float cases don't actually occur as immediate operands. */
18085 else if (CONST_DOUBLE_P (x
) && GET_MODE (x
) == XFmode
)
18089 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
18090 fputs (dstr
, file
);
18095 /* We have patterns that allow zero sets of memory, for instance.
18096 In 64-bit mode, we should probably support all 8-byte vectors,
18097 since we can in fact encode that into an immediate. */
18098 if (GET_CODE (x
) == CONST_VECTOR
)
18100 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
18104 if (code
!= 'P' && code
!= 'p')
18106 if (CONST_INT_P (x
))
18108 if (ASSEMBLER_DIALECT
== ASM_ATT
)
18111 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
18112 || GET_CODE (x
) == LABEL_REF
)
18114 if (ASSEMBLER_DIALECT
== ASM_ATT
)
18117 fputs ("OFFSET FLAT:", file
);
18120 if (CONST_INT_P (x
))
18121 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
18122 else if (flag_pic
|| MACHOPIC_INDIRECT
)
18123 output_pic_addr_const (file
, x
, code
);
18125 output_addr_const (file
, x
);
18130 ix86_print_operand_punct_valid_p (unsigned char code
)
18132 return (code
== '*' || code
== '+' || code
== '&' || code
== ';'
18133 || code
== '~' || code
== '^' || code
== '!');
18136 /* Print a memory operand whose address is ADDR. */
18139 ix86_print_operand_address_as (FILE *file
, rtx addr
,
18140 addr_space_t as
, bool no_rip
)
18142 struct ix86_address parts
;
18143 rtx base
, index
, disp
;
18149 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_VSIBADDR
)
18151 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
18152 gcc_assert (parts
.index
== NULL_RTX
);
18153 parts
.index
= XVECEXP (addr
, 0, 1);
18154 parts
.scale
= INTVAL (XVECEXP (addr
, 0, 2));
18155 addr
= XVECEXP (addr
, 0, 0);
18158 else if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_LEA_ADDR
)
18160 gcc_assert (TARGET_64BIT
);
18161 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
18164 else if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_BNDMK_ADDR
)
18166 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 1), &parts
);
18167 gcc_assert (parts
.base
== NULL_RTX
|| parts
.index
== NULL_RTX
);
18168 if (parts
.base
!= NULL_RTX
)
18170 parts
.index
= parts
.base
;
18173 parts
.base
= XVECEXP (addr
, 0, 0);
18174 addr
= XVECEXP (addr
, 0, 0);
18176 else if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_BNDLDX_ADDR
)
18178 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
18179 gcc_assert (parts
.index
== NULL_RTX
);
18180 parts
.index
= XVECEXP (addr
, 0, 1);
18181 addr
= XVECEXP (addr
, 0, 0);
18184 ok
= ix86_decompose_address (addr
, &parts
);
18189 index
= parts
.index
;
18191 scale
= parts
.scale
;
18193 if (ADDR_SPACE_GENERIC_P (as
))
18196 gcc_assert (ADDR_SPACE_GENERIC_P (parts
.seg
));
18198 if (!ADDR_SPACE_GENERIC_P (as
))
18200 const char *string
;
18202 if (as
== ADDR_SPACE_SEG_FS
)
18203 string
= (ASSEMBLER_DIALECT
== ASM_ATT
? "%fs:" : "fs:");
18204 else if (as
== ADDR_SPACE_SEG_GS
)
18205 string
= (ASSEMBLER_DIALECT
== ASM_ATT
? "%gs:" : "gs:");
18207 gcc_unreachable ();
18208 fputs (string
, file
);
18211 /* Use one byte shorter RIP relative addressing for 64bit mode. */
18212 if (TARGET_64BIT
&& !base
&& !index
&& !no_rip
)
18216 if (GET_CODE (disp
) == CONST
18217 && GET_CODE (XEXP (disp
, 0)) == PLUS
18218 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
18219 symbol
= XEXP (XEXP (disp
, 0), 0);
18221 if (GET_CODE (symbol
) == LABEL_REF
18222 || (GET_CODE (symbol
) == SYMBOL_REF
18223 && SYMBOL_REF_TLS_MODEL (symbol
) == 0))
18227 if (!base
&& !index
)
18229 /* Displacement only requires special attention. */
18230 if (CONST_INT_P (disp
))
18232 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& ADDR_SPACE_GENERIC_P (as
))
18233 fputs ("ds:", file
);
18234 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
18236 /* Load the external function address via the GOT slot to avoid PLT. */
18237 else if (GET_CODE (disp
) == CONST
18238 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
18239 && (XINT (XEXP (disp
, 0), 1) == UNSPEC_GOTPCREL
18240 || XINT (XEXP (disp
, 0), 1) == UNSPEC_GOT
)
18241 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp
, 0), 0, 0)))
18242 output_pic_addr_const (file
, disp
, 0);
18244 output_pic_addr_const (file
, disp
, 0);
18246 output_addr_const (file
, disp
);
18250 /* Print SImode register names to force addr32 prefix. */
18251 if (SImode_address_operand (addr
, VOIDmode
))
18255 gcc_assert (TARGET_64BIT
);
18256 switch (GET_CODE (addr
))
18259 gcc_assert (GET_MODE (addr
) == SImode
);
18260 gcc_assert (GET_MODE (SUBREG_REG (addr
)) == DImode
);
18264 gcc_assert (GET_MODE (addr
) == DImode
);
18267 gcc_unreachable ();
18270 gcc_assert (!code
);
18276 && CONST_INT_P (disp
)
18277 && INTVAL (disp
) < -16*1024*1024)
18279 /* X32 runs in 64-bit mode, where displacement, DISP, in
18280 address DISP(%r64), is encoded as 32-bit immediate sign-
18281 extended from 32-bit to 64-bit. For -0x40000300(%r64),
18282 address is %r64 + 0xffffffffbffffd00. When %r64 <
18283 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
18284 which is invalid for x32. The correct address is %r64
18285 - 0x40000300 == 0xf7ffdd64. To properly encode
18286 -0x40000300(%r64) for x32, we zero-extend negative
18287 displacement by forcing addr32 prefix which truncates
18288 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
18289 zero-extend all negative displacements, including -1(%rsp).
18290 However, for small negative displacements, sign-extension
18291 won't cause overflow. We only zero-extend negative
18292 displacements if they < -16*1024*1024, which is also used
18293 to check legitimate address displacements for PIC. */
18297 /* Since the upper 32 bits of RSP are always zero for x32,
18298 we can encode %esp as %rsp to avoid 0x67 prefix if
18299 there is no index register. */
18300 if (TARGET_X32
&& Pmode
== SImode
18301 && !index
&& base
&& REG_P (base
) && REGNO (base
) == SP_REG
)
18304 if (ASSEMBLER_DIALECT
== ASM_ATT
)
18309 output_pic_addr_const (file
, disp
, 0);
18310 else if (GET_CODE (disp
) == LABEL_REF
)
18311 output_asm_label (disp
);
18313 output_addr_const (file
, disp
);
18318 print_reg (base
, code
, file
);
18322 print_reg (index
, vsib
? 0 : code
, file
);
18323 if (scale
!= 1 || vsib
)
18324 fprintf (file
, ",%d", scale
);
18330 rtx offset
= NULL_RTX
;
18334 /* Pull out the offset of a symbol; print any symbol itself. */
18335 if (GET_CODE (disp
) == CONST
18336 && GET_CODE (XEXP (disp
, 0)) == PLUS
18337 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
18339 offset
= XEXP (XEXP (disp
, 0), 1);
18340 disp
= gen_rtx_CONST (VOIDmode
,
18341 XEXP (XEXP (disp
, 0), 0));
18345 output_pic_addr_const (file
, disp
, 0);
18346 else if (GET_CODE (disp
) == LABEL_REF
)
18347 output_asm_label (disp
);
18348 else if (CONST_INT_P (disp
))
18351 output_addr_const (file
, disp
);
18357 print_reg (base
, code
, file
);
18360 if (INTVAL (offset
) >= 0)
18362 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
18366 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
18373 print_reg (index
, vsib
? 0 : code
, file
);
18374 if (scale
!= 1 || vsib
)
18375 fprintf (file
, "*%d", scale
);
18383 ix86_print_operand_address (FILE *file
, machine_mode
/*mode*/, rtx addr
)
18385 ix86_print_operand_address_as (file
, addr
, ADDR_SPACE_GENERIC
, false);
18388 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
18391 i386_asm_output_addr_const_extra (FILE *file
, rtx x
)
18395 if (GET_CODE (x
) != UNSPEC
)
18398 op
= XVECEXP (x
, 0, 0);
18399 switch (XINT (x
, 1))
18401 case UNSPEC_GOTOFF
:
18402 output_addr_const (file
, op
);
18403 fputs ("@gotoff", file
);
18405 case UNSPEC_GOTTPOFF
:
18406 output_addr_const (file
, op
);
18407 /* FIXME: This might be @TPOFF in Sun ld. */
18408 fputs ("@gottpoff", file
);
18411 output_addr_const (file
, op
);
18412 fputs ("@tpoff", file
);
18414 case UNSPEC_NTPOFF
:
18415 output_addr_const (file
, op
);
18417 fputs ("@tpoff", file
);
18419 fputs ("@ntpoff", file
);
18421 case UNSPEC_DTPOFF
:
18422 output_addr_const (file
, op
);
18423 fputs ("@dtpoff", file
);
18425 case UNSPEC_GOTNTPOFF
:
18426 output_addr_const (file
, op
);
18428 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
18429 "@gottpoff(%rip)" : "@gottpoff[rip]", file
);
18431 fputs ("@gotntpoff", file
);
18433 case UNSPEC_INDNTPOFF
:
18434 output_addr_const (file
, op
);
18435 fputs ("@indntpoff", file
);
18438 case UNSPEC_MACHOPIC_OFFSET
:
18439 output_addr_const (file
, op
);
18441 machopic_output_function_base_name (file
);
18452 /* Split one or more double-mode RTL references into pairs of half-mode
18453 references. The RTL can be REG, offsettable MEM, integer constant, or
18454 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
18455 split and "num" is its length. lo_half and hi_half are output arrays
18456 that parallel "operands". */
18459 split_double_mode (machine_mode mode
, rtx operands
[],
18460 int num
, rtx lo_half
[], rtx hi_half
[])
18462 machine_mode half_mode
;
18468 half_mode
= DImode
;
18471 half_mode
= SImode
;
18474 gcc_unreachable ();
18477 byte
= GET_MODE_SIZE (half_mode
);
18481 rtx op
= operands
[num
];
18483 /* simplify_subreg refuse to split volatile memory addresses,
18484 but we still have to handle it. */
18487 lo_half
[num
] = adjust_address (op
, half_mode
, 0);
18488 hi_half
[num
] = adjust_address (op
, half_mode
, byte
);
18492 lo_half
[num
] = simplify_gen_subreg (half_mode
, op
,
18493 GET_MODE (op
) == VOIDmode
18494 ? mode
: GET_MODE (op
), 0);
18495 hi_half
[num
] = simplify_gen_subreg (half_mode
, op
,
18496 GET_MODE (op
) == VOIDmode
18497 ? mode
: GET_MODE (op
), byte
);
18502 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
18503 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
18504 is the expression of the binary operation. The output may either be
18505 emitted here, or returned to the caller, like all output_* functions.
18507 There is no guarantee that the operands are the same mode, as they
18508 might be within FLOAT or FLOAT_EXTEND expressions. */
18510 #ifndef SYSV386_COMPAT
18511 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
18512 wants to fix the assemblers because that causes incompatibility
18513 with gcc. No-one wants to fix gcc because that causes
18514 incompatibility with assemblers... You can use the option of
18515 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
18516 #define SYSV386_COMPAT 1
18520 output_387_binary_op (rtx_insn
*insn
, rtx
*operands
)
18522 static char buf
[40];
18525 = (SSE_REG_P (operands
[0])
18526 || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]));
18530 else if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
18531 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
18538 switch (GET_CODE (operands
[3]))
18549 gcc_unreachable ();
18556 p
= (GET_MODE (operands
[0]) == SFmode
) ? "ss" : "sd";
18560 p
= "\t{%2, %1, %0|%0, %1, %2}";
18562 p
= "\t{%2, %0|%0, %2}";
18568 /* Even if we do not want to check the inputs, this documents input
18569 constraints. Which helps in understanding the following code. */
18572 if (STACK_REG_P (operands
[0])
18573 && ((REG_P (operands
[1])
18574 && REGNO (operands
[0]) == REGNO (operands
[1])
18575 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
18576 || (REG_P (operands
[2])
18577 && REGNO (operands
[0]) == REGNO (operands
[2])
18578 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
18579 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
18582 gcc_unreachable ();
18585 switch (GET_CODE (operands
[3]))
18589 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
18590 std::swap (operands
[1], operands
[2]);
18592 /* know operands[0] == operands[1]. */
18594 if (MEM_P (operands
[2]))
18600 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
18602 if (STACK_TOP_P (operands
[0]))
18603 /* How is it that we are storing to a dead operand[2]?
18604 Well, presumably operands[1] is dead too. We can't
18605 store the result to st(0) as st(0) gets popped on this
18606 instruction. Instead store to operands[2] (which I
18607 think has to be st(1)). st(1) will be popped later.
18608 gcc <= 2.8.1 didn't have this check and generated
18609 assembly code that the Unixware assembler rejected. */
18610 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
18612 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
18616 if (STACK_TOP_P (operands
[0]))
18617 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
18619 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
18624 if (MEM_P (operands
[1]))
18630 if (MEM_P (operands
[2]))
18636 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
18639 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
18640 derived assemblers, confusingly reverse the direction of
18641 the operation for fsub{r} and fdiv{r} when the
18642 destination register is not st(0). The Intel assembler
18643 doesn't have this brain damage. Read !SYSV386_COMPAT to
18644 figure out what the hardware really does. */
18645 if (STACK_TOP_P (operands
[0]))
18646 p
= "{p\t%0, %2|rp\t%2, %0}";
18648 p
= "{rp\t%2, %0|p\t%0, %2}";
18650 if (STACK_TOP_P (operands
[0]))
18651 /* As above for fmul/fadd, we can't store to st(0). */
18652 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
18654 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
18659 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
18662 if (STACK_TOP_P (operands
[0]))
18663 p
= "{rp\t%0, %1|p\t%1, %0}";
18665 p
= "{p\t%1, %0|rp\t%0, %1}";
18667 if (STACK_TOP_P (operands
[0]))
18668 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
18670 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
18675 if (STACK_TOP_P (operands
[0]))
18677 if (STACK_TOP_P (operands
[1]))
18678 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
18680 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
18683 else if (STACK_TOP_P (operands
[1]))
18686 p
= "{\t%1, %0|r\t%0, %1}";
18688 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
18694 p
= "{r\t%2, %0|\t%0, %2}";
18696 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
18702 gcc_unreachable ();
18709 /* Return needed mode for entity in optimize_mode_switching pass. */
18712 ix86_dirflag_mode_needed (rtx_insn
*insn
)
18716 if (cfun
->machine
->func_type
== TYPE_NORMAL
)
18717 return X86_DIRFLAG_ANY
;
18719 /* No need to emit CLD in interrupt handler for TARGET_CLD. */
18720 return TARGET_CLD
? X86_DIRFLAG_ANY
: X86_DIRFLAG_RESET
;
18723 if (recog_memoized (insn
) < 0)
18724 return X86_DIRFLAG_ANY
;
18726 if (get_attr_type (insn
) == TYPE_STR
)
18728 /* Emit cld instruction if stringops are used in the function. */
18729 if (cfun
->machine
->func_type
== TYPE_NORMAL
)
18730 return TARGET_CLD
? X86_DIRFLAG_RESET
: X86_DIRFLAG_ANY
;
18732 return X86_DIRFLAG_RESET
;
18735 return X86_DIRFLAG_ANY
;
18738 /* Check if a 256bit or 512 bit AVX register is referenced inside of EXP. */
18741 ix86_check_avx_upper_register (const_rtx exp
)
18743 if (SUBREG_P (exp
))
18744 exp
= SUBREG_REG (exp
);
18746 return (REG_P (exp
)
18747 && (VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp
))
18748 || VALID_AVX512F_REG_OR_XI_MODE (GET_MODE (exp
))));
18751 /* Return needed mode for entity in optimize_mode_switching pass. */
18754 ix86_avx_u128_mode_needed (rtx_insn
*insn
)
18760 /* Needed mode is set to AVX_U128_CLEAN if there are
18761 no 256bit or 512bit modes used in function arguments. */
18762 for (link
= CALL_INSN_FUNCTION_USAGE (insn
);
18764 link
= XEXP (link
, 1))
18766 if (GET_CODE (XEXP (link
, 0)) == USE
)
18768 rtx arg
= XEXP (XEXP (link
, 0), 0);
18770 if (ix86_check_avx_upper_register (arg
))
18771 return AVX_U128_DIRTY
;
18775 return AVX_U128_CLEAN
;
18778 /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced.
18779 Hardware changes state only when a 256bit register is written to,
18780 but we need to prevent the compiler from moving optimal insertion
18781 point above eventual read from 256bit or 512 bit register. */
18782 subrtx_iterator::array_type array
;
18783 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), NONCONST
)
18784 if (ix86_check_avx_upper_register (*iter
))
18785 return AVX_U128_DIRTY
;
18787 return AVX_U128_ANY
;
18790 /* Return mode that i387 must be switched into
18791 prior to the execution of insn. */
18794 ix86_i387_mode_needed (int entity
, rtx_insn
*insn
)
18796 enum attr_i387_cw mode
;
18798 /* The mode UNINITIALIZED is used to store control word after a
18799 function call or ASM pattern. The mode ANY specify that function
18800 has no requirements on the control word and make no changes in the
18801 bits we are interested in. */
18804 || (NONJUMP_INSN_P (insn
)
18805 && (asm_noperands (PATTERN (insn
)) >= 0
18806 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
18807 return I387_CW_UNINITIALIZED
;
18809 if (recog_memoized (insn
) < 0)
18810 return I387_CW_ANY
;
18812 mode
= get_attr_i387_cw (insn
);
18817 if (mode
== I387_CW_TRUNC
)
18822 if (mode
== I387_CW_FLOOR
)
18827 if (mode
== I387_CW_CEIL
)
18832 if (mode
== I387_CW_MASK_PM
)
18837 gcc_unreachable ();
18840 return I387_CW_ANY
;
18843 /* Return mode that entity must be switched into
18844 prior to the execution of insn. */
18847 ix86_mode_needed (int entity
, rtx_insn
*insn
)
18852 return ix86_dirflag_mode_needed (insn
);
18854 return ix86_avx_u128_mode_needed (insn
);
18859 return ix86_i387_mode_needed (entity
, insn
);
18861 gcc_unreachable ();
18866 /* Check if a 256bit or 512bit AVX register is referenced in stores. */
18869 ix86_check_avx_upper_stores (rtx dest
, const_rtx
, void *data
)
18871 if (ix86_check_avx_upper_register (dest
))
18873 bool *used
= (bool *) data
;
18878 /* Calculate mode of upper 128bit AVX registers after the insn. */
18881 ix86_avx_u128_mode_after (int mode
, rtx_insn
*insn
)
18883 rtx pat
= PATTERN (insn
);
18885 if (vzeroupper_operation (pat
, VOIDmode
)
18886 || vzeroall_operation (pat
, VOIDmode
))
18887 return AVX_U128_CLEAN
;
18889 /* We know that state is clean after CALL insn if there are no
18890 256bit or 512bit registers used in the function return register. */
18893 bool avx_upper_reg_found
= false;
18894 note_stores (pat
, ix86_check_avx_upper_stores
, &avx_upper_reg_found
);
18896 return avx_upper_reg_found
? AVX_U128_DIRTY
: AVX_U128_CLEAN
;
18899 /* Otherwise, return current mode. Remember that if insn
18900 references AVX 256bit or 512bit registers, the mode was already
18901 changed to DIRTY from MODE_NEEDED. */
18905 /* Return the mode that an insn results in. */
18908 ix86_mode_after (int entity
, int mode
, rtx_insn
*insn
)
18915 return ix86_avx_u128_mode_after (mode
, insn
);
18922 gcc_unreachable ();
18927 ix86_dirflag_mode_entry (void)
18929 /* For TARGET_CLD or in the interrupt handler we can't assume
18930 direction flag state at function entry. */
18932 || cfun
->machine
->func_type
!= TYPE_NORMAL
)
18933 return X86_DIRFLAG_ANY
;
18935 return X86_DIRFLAG_RESET
;
18939 ix86_avx_u128_mode_entry (void)
18943 /* Entry mode is set to AVX_U128_DIRTY if there are
18944 256bit or 512bit modes used in function arguments. */
18945 for (arg
= DECL_ARGUMENTS (current_function_decl
); arg
;
18946 arg
= TREE_CHAIN (arg
))
18948 rtx incoming
= DECL_INCOMING_RTL (arg
);
18950 if (incoming
&& ix86_check_avx_upper_register (incoming
))
18951 return AVX_U128_DIRTY
;
18954 return AVX_U128_CLEAN
;
18957 /* Return a mode that ENTITY is assumed to be
18958 switched to at function entry. */
18961 ix86_mode_entry (int entity
)
18966 return ix86_dirflag_mode_entry ();
18968 return ix86_avx_u128_mode_entry ();
18973 return I387_CW_ANY
;
18975 gcc_unreachable ();
18980 ix86_avx_u128_mode_exit (void)
18982 rtx reg
= crtl
->return_rtx
;
18984 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit
18985 or 512 bit modes used in the function return register. */
18986 if (reg
&& ix86_check_avx_upper_register (reg
))
18987 return AVX_U128_DIRTY
;
18989 return AVX_U128_CLEAN
;
18992 /* Return a mode that ENTITY is assumed to be
18993 switched to at function exit. */
18996 ix86_mode_exit (int entity
)
19001 return X86_DIRFLAG_ANY
;
19003 return ix86_avx_u128_mode_exit ();
19008 return I387_CW_ANY
;
19010 gcc_unreachable ();
19015 ix86_mode_priority (int, int n
)
19020 /* Output code to initialize control word copies used by trunc?f?i and
19021 rounding patterns. CURRENT_MODE is set to current control word,
19022 while NEW_MODE is set to new control word. */
19025 emit_i387_cw_initialization (int mode
)
19027 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
19030 enum ix86_stack_slot slot
;
19032 rtx reg
= gen_reg_rtx (HImode
);
19034 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
19035 emit_move_insn (reg
, copy_rtx (stored_mode
));
19037 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
19038 || optimize_insn_for_size_p ())
19042 case I387_CW_TRUNC
:
19043 /* round toward zero (truncate) */
19044 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
19045 slot
= SLOT_CW_TRUNC
;
19048 case I387_CW_FLOOR
:
19049 /* round down toward -oo */
19050 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
19051 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
19052 slot
= SLOT_CW_FLOOR
;
19056 /* round up toward +oo */
19057 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
19058 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
19059 slot
= SLOT_CW_CEIL
;
19062 case I387_CW_MASK_PM
:
19063 /* mask precision exception for nearbyint() */
19064 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
19065 slot
= SLOT_CW_MASK_PM
;
19069 gcc_unreachable ();
19076 case I387_CW_TRUNC
:
19077 /* round toward zero (truncate) */
19078 emit_insn (gen_insvsi_1 (reg
, GEN_INT (0xc)));
19079 slot
= SLOT_CW_TRUNC
;
19082 case I387_CW_FLOOR
:
19083 /* round down toward -oo */
19084 emit_insn (gen_insvsi_1 (reg
, GEN_INT (0x4)));
19085 slot
= SLOT_CW_FLOOR
;
19089 /* round up toward +oo */
19090 emit_insn (gen_insvsi_1 (reg
, GEN_INT (0x8)));
19091 slot
= SLOT_CW_CEIL
;
19094 case I387_CW_MASK_PM
:
19095 /* mask precision exception for nearbyint() */
19096 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
19097 slot
= SLOT_CW_MASK_PM
;
19101 gcc_unreachable ();
19105 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
19107 new_mode
= assign_386_stack_local (HImode
, slot
);
19108 emit_move_insn (new_mode
, reg
);
19111 /* Emit vzeroupper. */
19114 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live
)
19118 /* Cancel automatic vzeroupper insertion if there are
19119 live call-saved SSE registers at the insertion point. */
19121 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
19122 if (TEST_HARD_REG_BIT (regs_live
, i
) && !call_used_regs
[i
])
19126 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
19127 if (TEST_HARD_REG_BIT (regs_live
, i
) && !call_used_regs
[i
])
19130 emit_insn (gen_avx_vzeroupper ());
19133 /* Generate one or more insns to set ENTITY to MODE. */
19135 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
19136 is the set of hard registers live at the point where the insn(s)
19137 are to be inserted. */
19140 ix86_emit_mode_set (int entity
, int mode
, int prev_mode ATTRIBUTE_UNUSED
,
19141 HARD_REG_SET regs_live
)
19146 if (mode
== X86_DIRFLAG_RESET
)
19147 emit_insn (gen_cld ());
19150 if (mode
== AVX_U128_CLEAN
)
19151 ix86_avx_emit_vzeroupper (regs_live
);
19157 if (mode
!= I387_CW_ANY
19158 && mode
!= I387_CW_UNINITIALIZED
)
19159 emit_i387_cw_initialization (mode
);
19162 gcc_unreachable ();
19166 /* Output code for INSN to convert a float to a signed int. OPERANDS
19167 are the insn operands. The output may be [HSD]Imode and the input
19168 operand may be [SDX]Fmode. */
19171 output_fix_trunc (rtx_insn
*insn
, rtx
*operands
, bool fisttp
)
19173 bool stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
);
19174 bool dimode_p
= GET_MODE (operands
[0]) == DImode
;
19175 int round_mode
= get_attr_i387_cw (insn
);
19177 static char buf
[40];
19180 /* Jump through a hoop or two for DImode, since the hardware has no
19181 non-popping instruction. We used to do this a different way, but
19182 that was somewhat fragile and broke with post-reload splitters. */
19183 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
19184 output_asm_insn ("fld\t%y1", operands
);
19186 gcc_assert (STACK_TOP_P (operands
[1]));
19187 gcc_assert (MEM_P (operands
[0]));
19188 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
19191 return "fisttp%Z0\t%0";
19193 strcpy (buf
, "fist");
19195 if (round_mode
!= I387_CW_ANY
)
19196 output_asm_insn ("fldcw\t%3", operands
);
19199 strcat (buf
, p
+ !(stack_top_dies
|| dimode_p
));
19201 output_asm_insn (buf
, operands
);
19203 if (round_mode
!= I387_CW_ANY
)
19204 output_asm_insn ("fldcw\t%2", operands
);
19209 /* Output code for x87 ffreep insn. The OPNO argument, which may only
19210 have the values zero or one, indicates the ffreep insn's operand
19211 from the OPERANDS array. */
19213 static const char *
19214 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
19216 if (TARGET_USE_FFREEP
)
19217 #ifdef HAVE_AS_IX86_FFREEP
19218 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
19221 static char retval
[32];
19222 int regno
= REGNO (operands
[opno
]);
19224 gcc_assert (STACK_REGNO_P (regno
));
19226 regno
-= FIRST_STACK_REG
;
19228 snprintf (retval
, sizeof (retval
), ASM_SHORT
"0xc%ddf", regno
);
19233 return opno
? "fstp\t%y1" : "fstp\t%y0";
19237 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
19238 should be used. UNORDERED_P is true when fucom should be used. */
19241 output_fp_compare (rtx_insn
*insn
, rtx
*operands
,
19242 bool eflags_p
, bool unordered_p
)
19244 rtx
*xops
= eflags_p
? &operands
[0] : &operands
[1];
19245 bool stack_top_dies
;
19247 static char buf
[40];
19250 gcc_assert (STACK_TOP_P (xops
[0]));
19252 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
);
19256 p
= unordered_p
? "fucomi" : "fcomi";
19259 p
= "p\t{%y1, %0|%0, %y1}";
19260 strcat (buf
, p
+ !stack_top_dies
);
19265 if (STACK_REG_P (xops
[1])
19267 && find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
+ 1))
19269 gcc_assert (REGNO (xops
[1]) == FIRST_STACK_REG
+ 1);
19271 /* If both the top of the 387 stack die, and the other operand
19272 is also a stack register that dies, then this must be a
19273 `fcompp' float compare. */
19274 p
= unordered_p
? "fucompp" : "fcompp";
19277 else if (const0_operand (xops
[1], VOIDmode
))
19279 gcc_assert (!unordered_p
);
19280 strcpy (buf
, "ftst");
19284 if (GET_MODE_CLASS (GET_MODE (xops
[1])) == MODE_INT
)
19286 gcc_assert (!unordered_p
);
19290 p
= unordered_p
? "fucom" : "fcom";
19295 strcat (buf
, p
+ !stack_top_dies
);
19298 output_asm_insn (buf
, operands
);
19299 return "fnstsw\t%0";
19303 ix86_output_addr_vec_elt (FILE *file
, int value
)
19305 const char *directive
= ASM_LONG
;
19309 directive
= ASM_QUAD
;
19311 gcc_assert (!TARGET_64BIT
);
19314 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
19318 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
19320 const char *directive
= ASM_LONG
;
19323 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
19324 directive
= ASM_QUAD
;
19326 gcc_assert (!TARGET_64BIT
);
19328 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
19329 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
19330 fprintf (file
, "%s%s%d-%s%d\n",
19331 directive
, LPREFIX
, value
, LPREFIX
, rel
);
19332 else if (HAVE_AS_GOTOFF_IN_DATA
)
19333 fprintf (file
, ASM_LONG
"%s%d@GOTOFF\n", LPREFIX
, value
);
19335 else if (TARGET_MACHO
)
19337 fprintf (file
, ASM_LONG
"%s%d-", LPREFIX
, value
);
19338 machopic_output_function_base_name (file
);
19343 asm_fprintf (file
, ASM_LONG
"%U%s+[.-%s%d]\n",
19344 GOT_SYMBOL_NAME
, LPREFIX
, value
);
19347 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
19351 ix86_expand_clear (rtx dest
)
19355 /* We play register width games, which are only valid after reload. */
19356 gcc_assert (reload_completed
);
19358 /* Avoid HImode and its attendant prefix byte. */
19359 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
19360 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
19361 tmp
= gen_rtx_SET (dest
, const0_rtx
);
19363 if (!TARGET_USE_MOV0
|| optimize_insn_for_size_p ())
19365 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
19366 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
19373 ix86_expand_move (machine_mode mode
, rtx operands
[])
19376 rtx tmp
, addend
= NULL_RTX
;
19377 enum tls_model model
;
19382 switch (GET_CODE (op1
))
19385 tmp
= XEXP (op1
, 0);
19387 if (GET_CODE (tmp
) != PLUS
19388 || GET_CODE (XEXP (tmp
, 0)) != SYMBOL_REF
)
19391 op1
= XEXP (tmp
, 0);
19392 addend
= XEXP (tmp
, 1);
19396 model
= SYMBOL_REF_TLS_MODEL (op1
);
19399 op1
= legitimize_tls_address (op1
, model
, true);
19400 else if (ix86_force_load_from_GOT_p (op1
))
19402 /* Load the external function address via GOT slot to avoid PLT. */
19403 op1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op1
),
19407 op1
= gen_rtx_CONST (Pmode
, op1
);
19408 op1
= gen_const_mem (Pmode
, op1
);
19409 set_mem_alias_set (op1
, ix86_GOT_alias_set ());
19413 tmp
= legitimize_pe_coff_symbol (op1
, addend
!= NULL_RTX
);
19429 op1
= force_operand (op1
, NULL_RTX
);
19430 op1
= expand_simple_binop (Pmode
, PLUS
, op1
, addend
,
19431 op0
, 1, OPTAB_DIRECT
);
19434 op1
= force_operand (op1
, op0
);
19439 op1
= convert_to_mode (mode
, op1
, 1);
19445 if ((flag_pic
|| MACHOPIC_INDIRECT
)
19446 && symbolic_operand (op1
, mode
))
19448 if (TARGET_MACHO
&& !TARGET_64BIT
)
19451 /* dynamic-no-pic */
19452 if (MACHOPIC_INDIRECT
)
19454 rtx temp
= (op0
&& REG_P (op0
) && mode
== Pmode
)
19455 ? op0
: gen_reg_rtx (Pmode
);
19456 op1
= machopic_indirect_data_reference (op1
, temp
);
19458 op1
= machopic_legitimize_pic_address (op1
, mode
,
19459 temp
== op1
? 0 : temp
);
19461 if (op0
!= op1
&& GET_CODE (op0
) != MEM
)
19463 rtx insn
= gen_rtx_SET (op0
, op1
);
19467 if (GET_CODE (op0
) == MEM
)
19468 op1
= force_reg (Pmode
, op1
);
19472 if (GET_CODE (temp
) != REG
)
19473 temp
= gen_reg_rtx (Pmode
);
19474 temp
= legitimize_pic_address (op1
, temp
);
19479 /* dynamic-no-pic */
19485 op1
= force_reg (mode
, op1
);
19486 else if (!(TARGET_64BIT
&& x86_64_movabs_operand (op1
, DImode
)))
19488 rtx reg
= can_create_pseudo_p () ? NULL_RTX
: op0
;
19489 op1
= legitimize_pic_address (op1
, reg
);
19492 op1
= convert_to_mode (mode
, op1
, 1);
19499 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
19500 || !push_operand (op0
, mode
))
19502 op1
= force_reg (mode
, op1
);
19504 if (push_operand (op0
, mode
)
19505 && ! general_no_elim_operand (op1
, mode
))
19506 op1
= copy_to_mode_reg (mode
, op1
);
19508 /* Force large constants in 64bit compilation into register
19509 to get them CSEed. */
19510 if (can_create_pseudo_p ()
19511 && (mode
== DImode
) && TARGET_64BIT
19512 && immediate_operand (op1
, mode
)
19513 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
19514 && !register_operand (op0
, mode
)
19516 op1
= copy_to_mode_reg (mode
, op1
);
19518 if (can_create_pseudo_p ()
19519 && CONST_DOUBLE_P (op1
))
19521 /* If we are loading a floating point constant to a register,
19522 force the value to memory now, since we'll get better code
19523 out the back end. */
19525 op1
= validize_mem (force_const_mem (mode
, op1
));
19526 if (!register_operand (op0
, mode
))
19528 rtx temp
= gen_reg_rtx (mode
);
19529 emit_insn (gen_rtx_SET (temp
, op1
));
19530 emit_move_insn (op0
, temp
);
19536 emit_insn (gen_rtx_SET (op0
, op1
));
19540 ix86_expand_vector_move (machine_mode mode
, rtx operands
[])
19542 rtx op0
= operands
[0], op1
= operands
[1];
19543 /* Use GET_MODE_BITSIZE instead of GET_MODE_ALIGNMENT for IA MCU
19544 psABI since the biggest alignment is 4 byte for IA MCU psABI. */
19545 unsigned int align
= (TARGET_IAMCU
19546 ? GET_MODE_BITSIZE (mode
)
19547 : GET_MODE_ALIGNMENT (mode
));
19549 if (push_operand (op0
, VOIDmode
))
19550 op0
= emit_move_resolve_push (mode
, op0
);
19552 /* Force constants other than zero into memory. We do not know how
19553 the instructions used to build constants modify the upper 64 bits
19554 of the register, once we have that information we may be able
19555 to handle some of them more efficiently. */
19556 if (can_create_pseudo_p ()
19557 && (CONSTANT_P (op1
)
19559 && CONSTANT_P (SUBREG_REG (op1
))))
19560 && ((register_operand (op0
, mode
)
19561 && !standard_sse_constant_p (op1
, mode
))
19562 /* ix86_expand_vector_move_misalign() does not like constants. */
19563 || (SSE_REG_MODE_P (mode
)
19565 && MEM_ALIGN (op0
) < align
)))
19567 if (SUBREG_P (op1
))
19569 machine_mode imode
= GET_MODE (SUBREG_REG (op1
));
19570 rtx r
= force_const_mem (imode
, SUBREG_REG (op1
));
19572 r
= validize_mem (r
);
19574 r
= force_reg (imode
, SUBREG_REG (op1
));
19575 op1
= simplify_gen_subreg (mode
, r
, imode
, SUBREG_BYTE (op1
));
19578 op1
= validize_mem (force_const_mem (mode
, op1
));
19581 /* We need to check memory alignment for SSE mode since attribute
19582 can make operands unaligned. */
19583 if (can_create_pseudo_p ()
19584 && SSE_REG_MODE_P (mode
)
19585 && ((MEM_P (op0
) && (MEM_ALIGN (op0
) < align
))
19586 || (MEM_P (op1
) && (MEM_ALIGN (op1
) < align
))))
19590 /* ix86_expand_vector_move_misalign() does not like both
19591 arguments in memory. */
19592 if (!register_operand (op0
, mode
)
19593 && !register_operand (op1
, mode
))
19594 op1
= force_reg (mode
, op1
);
19596 tmp
[0] = op0
; tmp
[1] = op1
;
19597 ix86_expand_vector_move_misalign (mode
, tmp
);
19601 /* Make operand1 a register if it isn't already. */
19602 if (can_create_pseudo_p ()
19603 && !register_operand (op0
, mode
)
19604 && !register_operand (op1
, mode
))
19606 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
19610 emit_insn (gen_rtx_SET (op0
, op1
));
19613 /* Split 32-byte AVX unaligned load and store if needed. */
19616 ix86_avx256_split_vector_move_misalign (rtx op0
, rtx op1
)
19619 rtx (*extract
) (rtx
, rtx
, rtx
);
19622 if ((MEM_P (op1
) && !TARGET_AVX256_SPLIT_UNALIGNED_LOAD
)
19623 || (MEM_P (op0
) && !TARGET_AVX256_SPLIT_UNALIGNED_STORE
))
19625 emit_insn (gen_rtx_SET (op0
, op1
));
19629 rtx orig_op0
= NULL_RTX
;
19630 mode
= GET_MODE (op0
);
19631 switch (GET_MODE_CLASS (mode
))
19633 case MODE_VECTOR_INT
:
19635 if (mode
!= V32QImode
)
19640 op0
= gen_reg_rtx (V32QImode
);
19643 op0
= gen_lowpart (V32QImode
, op0
);
19644 op1
= gen_lowpart (V32QImode
, op1
);
19648 case MODE_VECTOR_FLOAT
:
19651 gcc_unreachable ();
19657 gcc_unreachable ();
19659 extract
= gen_avx_vextractf128v32qi
;
19663 extract
= gen_avx_vextractf128v8sf
;
19667 extract
= gen_avx_vextractf128v4df
;
19674 rtx r
= gen_reg_rtx (mode
);
19675 m
= adjust_address (op1
, mode
, 0);
19676 emit_move_insn (r
, m
);
19677 m
= adjust_address (op1
, mode
, 16);
19678 r
= gen_rtx_VEC_CONCAT (GET_MODE (op0
), r
, m
);
19679 emit_move_insn (op0
, r
);
19681 else if (MEM_P (op0
))
19683 m
= adjust_address (op0
, mode
, 0);
19684 emit_insn (extract (m
, op1
, const0_rtx
));
19685 m
= adjust_address (op0
, mode
, 16);
19686 emit_insn (extract (m
, copy_rtx (op1
), const1_rtx
));
19689 gcc_unreachable ();
19692 emit_move_insn (orig_op0
, gen_lowpart (GET_MODE (orig_op0
), op0
));
19695 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
19696 straight to ix86_expand_vector_move. */
19697 /* Code generation for scalar reg-reg moves of single and double precision data:
19698 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
19702 if (x86_sse_partial_reg_dependency == true)
19707 Code generation for scalar loads of double precision data:
19708 if (x86_sse_split_regs == true)
19709 movlpd mem, reg (gas syntax)
19713 Code generation for unaligned packed loads of single precision data
19714 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
19715 if (x86_sse_unaligned_move_optimal)
19718 if (x86_sse_partial_reg_dependency == true)
19730 Code generation for unaligned packed loads of double precision data
19731 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
19732 if (x86_sse_unaligned_move_optimal)
19735 if (x86_sse_split_regs == true)
19748 ix86_expand_vector_move_misalign (machine_mode mode
, rtx operands
[])
19755 /* Use unaligned load/store for AVX512 or when optimizing for size. */
19756 if (GET_MODE_SIZE (mode
) == 64 || optimize_insn_for_size_p ())
19758 emit_insn (gen_rtx_SET (op0
, op1
));
19764 if (GET_MODE_SIZE (mode
) == 32)
19765 ix86_avx256_split_vector_move_misalign (op0
, op1
);
19767 /* Always use 128-bit mov<mode>_internal pattern for AVX. */
19768 emit_insn (gen_rtx_SET (op0
, op1
));
19772 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
19773 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
19775 emit_insn (gen_rtx_SET (op0
, op1
));
19779 /* ??? If we have typed data, then it would appear that using
19780 movdqu is the only way to get unaligned data loaded with
19782 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
19784 emit_insn (gen_rtx_SET (op0
, op1
));
19790 if (TARGET_SSE2
&& mode
== V2DFmode
)
19794 /* When SSE registers are split into halves, we can avoid
19795 writing to the top half twice. */
19796 if (TARGET_SSE_SPLIT_REGS
)
19798 emit_clobber (op0
);
19803 /* ??? Not sure about the best option for the Intel chips.
19804 The following would seem to satisfy; the register is
19805 entirely cleared, breaking the dependency chain. We
19806 then store to the upper half, with a dependency depth
19807 of one. A rumor has it that Intel recommends two movsd
19808 followed by an unpacklpd, but this is unconfirmed. And
19809 given that the dependency depth of the unpacklpd would
19810 still be one, I'm not sure why this would be better. */
19811 zero
= CONST0_RTX (V2DFmode
);
19814 m
= adjust_address (op1
, DFmode
, 0);
19815 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
19816 m
= adjust_address (op1
, DFmode
, 8);
19817 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
19823 if (mode
!= V4SFmode
)
19824 t
= gen_reg_rtx (V4SFmode
);
19828 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
19829 emit_move_insn (t
, CONST0_RTX (V4SFmode
));
19833 m
= adjust_address (op1
, V2SFmode
, 0);
19834 emit_insn (gen_sse_loadlps (t
, t
, m
));
19835 m
= adjust_address (op1
, V2SFmode
, 8);
19836 emit_insn (gen_sse_loadhps (t
, t
, m
));
19837 if (mode
!= V4SFmode
)
19838 emit_move_insn (op0
, gen_lowpart (mode
, t
));
19841 else if (MEM_P (op0
))
19843 if (TARGET_SSE2
&& mode
== V2DFmode
)
19845 m
= adjust_address (op0
, DFmode
, 0);
19846 emit_insn (gen_sse2_storelpd (m
, op1
));
19847 m
= adjust_address (op0
, DFmode
, 8);
19848 emit_insn (gen_sse2_storehpd (m
, op1
));
19852 if (mode
!= V4SFmode
)
19853 op1
= gen_lowpart (V4SFmode
, op1
);
19855 m
= adjust_address (op0
, V2SFmode
, 0);
19856 emit_insn (gen_sse_storelps (m
, op1
));
19857 m
= adjust_address (op0
, V2SFmode
, 8);
19858 emit_insn (gen_sse_storehps (m
, copy_rtx (op1
)));
19862 gcc_unreachable ();
19865 /* Helper function of ix86_fixup_binary_operands to canonicalize
19866 operand order. Returns true if the operands should be swapped. */
19869 ix86_swap_binary_operands_p (enum rtx_code code
, machine_mode mode
,
19872 rtx dst
= operands
[0];
19873 rtx src1
= operands
[1];
19874 rtx src2
= operands
[2];
19876 /* If the operation is not commutative, we can't do anything. */
19877 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
19878 && GET_RTX_CLASS (code
) != RTX_COMM_COMPARE
)
19881 /* Highest priority is that src1 should match dst. */
19882 if (rtx_equal_p (dst
, src1
))
19884 if (rtx_equal_p (dst
, src2
))
19887 /* Next highest priority is that immediate constants come second. */
19888 if (immediate_operand (src2
, mode
))
19890 if (immediate_operand (src1
, mode
))
19893 /* Lowest priority is that memory references should come second. */
19903 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
19904 destination to use for the operation. If different from the true
19905 destination in operands[0], a copy operation will be required. */
19908 ix86_fixup_binary_operands (enum rtx_code code
, machine_mode mode
,
19911 rtx dst
= operands
[0];
19912 rtx src1
= operands
[1];
19913 rtx src2
= operands
[2];
19915 /* Canonicalize operand order. */
19916 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
19918 /* It is invalid to swap operands of different modes. */
19919 gcc_assert (GET_MODE (src1
) == GET_MODE (src2
));
19921 std::swap (src1
, src2
);
19924 /* Both source operands cannot be in memory. */
19925 if (MEM_P (src1
) && MEM_P (src2
))
19927 /* Optimization: Only read from memory once. */
19928 if (rtx_equal_p (src1
, src2
))
19930 src2
= force_reg (mode
, src2
);
19933 else if (rtx_equal_p (dst
, src1
))
19934 src2
= force_reg (mode
, src2
);
19936 src1
= force_reg (mode
, src1
);
19939 /* If the destination is memory, and we do not have matching source
19940 operands, do things in registers. */
19941 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
19942 dst
= gen_reg_rtx (mode
);
19944 /* Source 1 cannot be a constant. */
19945 if (CONSTANT_P (src1
))
19946 src1
= force_reg (mode
, src1
);
19948 /* Source 1 cannot be a non-matching memory. */
19949 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
19950 src1
= force_reg (mode
, src1
);
19952 /* Improve address combine. */
19954 && GET_MODE_CLASS (mode
) == MODE_INT
19956 src2
= force_reg (mode
, src2
);
19958 operands
[1] = src1
;
19959 operands
[2] = src2
;
19963 /* Similarly, but assume that the destination has already been
19964 set up properly. */
19967 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
19968 machine_mode mode
, rtx operands
[])
19970 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
19971 gcc_assert (dst
== operands
[0]);
19974 /* Attempt to expand a binary operator. Make the expansion closer to the
19975 actual machine, then just general_operand, which will allow 3 separate
19976 memory references (one output, two input) in a single insn. */
19979 ix86_expand_binary_operator (enum rtx_code code
, machine_mode mode
,
19982 rtx src1
, src2
, dst
, op
, clob
;
19984 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
19985 src1
= operands
[1];
19986 src2
= operands
[2];
19988 /* Emit the instruction. */
19990 op
= gen_rtx_SET (dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
19992 if (reload_completed
19994 && !rtx_equal_p (dst
, src1
))
19996 /* This is going to be an LEA; avoid splitting it later. */
20001 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
20002 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
20005 /* Fix up the destination if needed. */
20006 if (dst
!= operands
[0])
20007 emit_move_insn (operands
[0], dst
);
20010 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
20011 the given OPERANDS. */
20014 ix86_expand_vector_logical_operator (enum rtx_code code
, machine_mode mode
,
20017 rtx op1
= NULL_RTX
, op2
= NULL_RTX
;
20018 if (SUBREG_P (operands
[1]))
20023 else if (SUBREG_P (operands
[2]))
20028 /* Optimize (__m128i) d | (__m128i) e and similar code
20029 when d and e are float vectors into float vector logical
20030 insn. In C/C++ without using intrinsics there is no other way
20031 to express vector logical operation on float vectors than
20032 to cast them temporarily to integer vectors. */
20034 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
20035 && (SUBREG_P (op2
) || GET_CODE (op2
) == CONST_VECTOR
)
20036 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1
))) == MODE_VECTOR_FLOAT
20037 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1
))) == GET_MODE_SIZE (mode
)
20038 && SUBREG_BYTE (op1
) == 0
20039 && (GET_CODE (op2
) == CONST_VECTOR
20040 || (GET_MODE (SUBREG_REG (op1
)) == GET_MODE (SUBREG_REG (op2
))
20041 && SUBREG_BYTE (op2
) == 0))
20042 && can_create_pseudo_p ())
20045 switch (GET_MODE (SUBREG_REG (op1
)))
20053 dst
= gen_reg_rtx (GET_MODE (SUBREG_REG (op1
)));
20054 if (GET_CODE (op2
) == CONST_VECTOR
)
20056 op2
= gen_lowpart (GET_MODE (dst
), op2
);
20057 op2
= force_reg (GET_MODE (dst
), op2
);
20062 op2
= SUBREG_REG (operands
[2]);
20063 if (!vector_operand (op2
, GET_MODE (dst
)))
20064 op2
= force_reg (GET_MODE (dst
), op2
);
20066 op1
= SUBREG_REG (op1
);
20067 if (!vector_operand (op1
, GET_MODE (dst
)))
20068 op1
= force_reg (GET_MODE (dst
), op1
);
20069 emit_insn (gen_rtx_SET (dst
,
20070 gen_rtx_fmt_ee (code
, GET_MODE (dst
),
20072 emit_move_insn (operands
[0], gen_lowpart (mode
, dst
));
20078 if (!vector_operand (operands
[1], mode
))
20079 operands
[1] = force_reg (mode
, operands
[1]);
20080 if (!vector_operand (operands
[2], mode
))
20081 operands
[2] = force_reg (mode
, operands
[2]);
20082 ix86_fixup_binary_operands_no_copy (code
, mode
, operands
);
20083 emit_insn (gen_rtx_SET (operands
[0],
20084 gen_rtx_fmt_ee (code
, mode
, operands
[1],
20088 /* Return TRUE or FALSE depending on whether the binary operator meets the
20089 appropriate constraints. */
20092 ix86_binary_operator_ok (enum rtx_code code
, machine_mode mode
,
20095 rtx dst
= operands
[0];
20096 rtx src1
= operands
[1];
20097 rtx src2
= operands
[2];
20099 /* Both source operands cannot be in memory. */
20100 if (MEM_P (src1
) && MEM_P (src2
))
20103 /* Canonicalize operand order for commutative operators. */
20104 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
20105 std::swap (src1
, src2
);
20107 /* If the destination is memory, we must have a matching source operand. */
20108 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
20111 /* Source 1 cannot be a constant. */
20112 if (CONSTANT_P (src1
))
20115 /* Source 1 cannot be a non-matching memory. */
20116 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
20117 /* Support "andhi/andsi/anddi" as a zero-extending move. */
20118 return (code
== AND
20121 || (TARGET_64BIT
&& mode
== DImode
))
20122 && satisfies_constraint_L (src2
));
20127 /* Attempt to expand a unary operator. Make the expansion closer to the
20128 actual machine, then just general_operand, which will allow 2 separate
20129 memory references (one output, one input) in a single insn. */
20132 ix86_expand_unary_operator (enum rtx_code code
, machine_mode mode
,
20135 bool matching_memory
= false;
20136 rtx src
, dst
, op
, clob
;
20141 /* If the destination is memory, and we do not have matching source
20142 operands, do things in registers. */
20145 if (rtx_equal_p (dst
, src
))
20146 matching_memory
= true;
20148 dst
= gen_reg_rtx (mode
);
20151 /* When source operand is memory, destination must match. */
20152 if (MEM_P (src
) && !matching_memory
)
20153 src
= force_reg (mode
, src
);
20155 /* Emit the instruction. */
20157 op
= gen_rtx_SET (dst
, gen_rtx_fmt_e (code
, mode
, src
));
20163 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
20164 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
20167 /* Fix up the destination if needed. */
20168 if (dst
!= operands
[0])
20169 emit_move_insn (operands
[0], dst
);
20172 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
20173 divisor are within the range [0-255]. */
20176 ix86_split_idivmod (machine_mode mode
, rtx operands
[],
20179 rtx_code_label
*end_label
, *qimode_label
;
20182 rtx scratch
, tmp0
, tmp1
, tmp2
;
20183 rtx (*gen_divmod4_1
) (rtx
, rtx
, rtx
, rtx
);
20184 rtx (*gen_zero_extend
) (rtx
, rtx
);
20185 rtx (*gen_test_ccno_1
) (rtx
, rtx
);
20190 if (GET_MODE (operands
[0]) == SImode
)
20192 if (GET_MODE (operands
[1]) == SImode
)
20193 gen_divmod4_1
= signed_p
? gen_divmodsi4_1
: gen_udivmodsi4_1
;
20196 = signed_p
? gen_divmodsi4_zext_2
: gen_udivmodsi4_zext_2
;
20197 gen_zero_extend
= gen_zero_extendqisi2
;
20202 = signed_p
? gen_divmodsi4_zext_1
: gen_udivmodsi4_zext_1
;
20203 gen_zero_extend
= gen_zero_extendqidi2
;
20205 gen_test_ccno_1
= gen_testsi_ccno_1
;
20208 gen_divmod4_1
= signed_p
? gen_divmoddi4_1
: gen_udivmoddi4_1
;
20209 gen_test_ccno_1
= gen_testdi_ccno_1
;
20210 gen_zero_extend
= gen_zero_extendqidi2
;
20213 gcc_unreachable ();
20216 end_label
= gen_label_rtx ();
20217 qimode_label
= gen_label_rtx ();
20219 scratch
= gen_reg_rtx (mode
);
20221 /* Use 8bit unsigned divimod if dividend and divisor are within
20222 the range [0-255]. */
20223 emit_move_insn (scratch
, operands
[2]);
20224 scratch
= expand_simple_binop (mode
, IOR
, scratch
, operands
[3],
20225 scratch
, 1, OPTAB_DIRECT
);
20226 emit_insn (gen_test_ccno_1 (scratch
, GEN_INT (-0x100)));
20227 tmp0
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
20228 tmp0
= gen_rtx_EQ (VOIDmode
, tmp0
, const0_rtx
);
20229 tmp0
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp0
,
20230 gen_rtx_LABEL_REF (VOIDmode
, qimode_label
),
20232 insn
= emit_jump_insn (gen_rtx_SET (pc_rtx
, tmp0
));
20233 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
20234 JUMP_LABEL (insn
) = qimode_label
;
20236 /* Generate original signed/unsigned divimod. */
20237 div
= gen_divmod4_1 (operands
[0], operands
[1],
20238 operands
[2], operands
[3]);
20241 /* Branch to the end. */
20242 emit_jump_insn (gen_jump (end_label
));
20245 /* Generate 8bit unsigned divide. */
20246 emit_label (qimode_label
);
20247 /* Don't use operands[0] for result of 8bit divide since not all
20248 registers support QImode ZERO_EXTRACT. */
20249 tmp0
= lowpart_subreg (HImode
, scratch
, mode
);
20250 tmp1
= lowpart_subreg (HImode
, operands
[2], mode
);
20251 tmp2
= lowpart_subreg (QImode
, operands
[3], mode
);
20252 emit_insn (gen_udivmodhiqi3 (tmp0
, tmp1
, tmp2
));
20256 div
= gen_rtx_DIV (mode
, operands
[2], operands
[3]);
20257 mod
= gen_rtx_MOD (mode
, operands
[2], operands
[3]);
20261 div
= gen_rtx_UDIV (mode
, operands
[2], operands
[3]);
20262 mod
= gen_rtx_UMOD (mode
, operands
[2], operands
[3]);
20264 if (mode
== SImode
)
20266 if (GET_MODE (operands
[0]) != SImode
)
20267 div
= gen_rtx_ZERO_EXTEND (DImode
, div
);
20268 if (GET_MODE (operands
[1]) != SImode
)
20269 mod
= gen_rtx_ZERO_EXTEND (DImode
, mod
);
20272 /* Extract remainder from AH. */
20273 tmp1
= gen_rtx_ZERO_EXTRACT (GET_MODE (operands
[1]),
20274 tmp0
, GEN_INT (8), GEN_INT (8));
20275 if (REG_P (operands
[1]))
20276 insn
= emit_move_insn (operands
[1], tmp1
);
20279 /* Need a new scratch register since the old one has result
20281 scratch
= gen_reg_rtx (GET_MODE (operands
[1]));
20282 emit_move_insn (scratch
, tmp1
);
20283 insn
= emit_move_insn (operands
[1], scratch
);
20285 set_unique_reg_note (insn
, REG_EQUAL
, mod
);
20287 /* Zero extend quotient from AL. */
20288 tmp1
= gen_lowpart (QImode
, tmp0
);
20289 insn
= emit_insn (gen_zero_extend (operands
[0], tmp1
));
20290 set_unique_reg_note (insn
, REG_EQUAL
, div
);
20292 emit_label (end_label
);
20295 #define LEA_MAX_STALL (3)
20296 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
20298 /* Increase given DISTANCE in half-cycles according to
20299 dependencies between PREV and NEXT instructions.
20300 Add 1 half-cycle if there is no dependency and
20301 go to next cycle if there is some dependecy. */
20303 static unsigned int
20304 increase_distance (rtx_insn
*prev
, rtx_insn
*next
, unsigned int distance
)
20308 if (!prev
|| !next
)
20309 return distance
+ (distance
& 1) + 2;
20311 if (!DF_INSN_USES (next
) || !DF_INSN_DEFS (prev
))
20312 return distance
+ 1;
20314 FOR_EACH_INSN_USE (use
, next
)
20315 FOR_EACH_INSN_DEF (def
, prev
)
20316 if (!DF_REF_IS_ARTIFICIAL (def
)
20317 && DF_REF_REGNO (use
) == DF_REF_REGNO (def
))
20318 return distance
+ (distance
& 1) + 2;
20320 return distance
+ 1;
20323 /* Function checks if instruction INSN defines register number
20324 REGNO1 or REGNO2. */
20327 insn_defines_reg (unsigned int regno1
, unsigned int regno2
,
20332 FOR_EACH_INSN_DEF (def
, insn
)
20333 if (DF_REF_REG_DEF_P (def
)
20334 && !DF_REF_IS_ARTIFICIAL (def
)
20335 && (regno1
== DF_REF_REGNO (def
)
20336 || regno2
== DF_REF_REGNO (def
)))
20342 /* Function checks if instruction INSN uses register number
20343 REGNO as a part of address expression. */
20346 insn_uses_reg_mem (unsigned int regno
, rtx insn
)
20350 FOR_EACH_INSN_USE (use
, insn
)
20351 if (DF_REF_REG_MEM_P (use
) && regno
== DF_REF_REGNO (use
))
20357 /* Search backward for non-agu definition of register number REGNO1
20358 or register number REGNO2 in basic block starting from instruction
20359 START up to head of basic block or instruction INSN.
20361 Function puts true value into *FOUND var if definition was found
20362 and false otherwise.
20364 Distance in half-cycles between START and found instruction or head
20365 of BB is added to DISTANCE and returned. */
20368 distance_non_agu_define_in_bb (unsigned int regno1
, unsigned int regno2
,
20369 rtx_insn
*insn
, int distance
,
20370 rtx_insn
*start
, bool *found
)
20372 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
20373 rtx_insn
*prev
= start
;
20374 rtx_insn
*next
= NULL
;
20380 && distance
< LEA_SEARCH_THRESHOLD
)
20382 if (NONDEBUG_INSN_P (prev
) && NONJUMP_INSN_P (prev
))
20384 distance
= increase_distance (prev
, next
, distance
);
20385 if (insn_defines_reg (regno1
, regno2
, prev
))
20387 if (recog_memoized (prev
) < 0
20388 || get_attr_type (prev
) != TYPE_LEA
)
20397 if (prev
== BB_HEAD (bb
))
20400 prev
= PREV_INSN (prev
);
20406 /* Search backward for non-agu definition of register number REGNO1
20407 or register number REGNO2 in INSN's basic block until
20408 1. Pass LEA_SEARCH_THRESHOLD instructions, or
20409 2. Reach neighbor BBs boundary, or
20410 3. Reach agu definition.
20411 Returns the distance between the non-agu definition point and INSN.
20412 If no definition point, returns -1. */
20415 distance_non_agu_define (unsigned int regno1
, unsigned int regno2
,
20418 basic_block bb
= BLOCK_FOR_INSN (insn
);
20420 bool found
= false;
20422 if (insn
!= BB_HEAD (bb
))
20423 distance
= distance_non_agu_define_in_bb (regno1
, regno2
, insn
,
20424 distance
, PREV_INSN (insn
),
20427 if (!found
&& distance
< LEA_SEARCH_THRESHOLD
)
20431 bool simple_loop
= false;
20433 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
20436 simple_loop
= true;
20441 distance
= distance_non_agu_define_in_bb (regno1
, regno2
,
20443 BB_END (bb
), &found
);
20446 int shortest_dist
= -1;
20447 bool found_in_bb
= false;
20449 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
20452 = distance_non_agu_define_in_bb (regno1
, regno2
,
20458 if (shortest_dist
< 0)
20459 shortest_dist
= bb_dist
;
20460 else if (bb_dist
> 0)
20461 shortest_dist
= MIN (bb_dist
, shortest_dist
);
20467 distance
= shortest_dist
;
20471 /* get_attr_type may modify recog data. We want to make sure
20472 that recog data is valid for instruction INSN, on which
20473 distance_non_agu_define is called. INSN is unchanged here. */
20474 extract_insn_cached (insn
);
20479 return distance
>> 1;
20482 /* Return the distance in half-cycles between INSN and the next
20483 insn that uses register number REGNO in memory address added
20484 to DISTANCE. Return -1 if REGNO0 is set.
20486 Put true value into *FOUND if register usage was found and
20488 Put true value into *REDEFINED if register redefinition was
20489 found and false otherwise. */
20492 distance_agu_use_in_bb (unsigned int regno
,
20493 rtx_insn
*insn
, int distance
, rtx_insn
*start
,
20494 bool *found
, bool *redefined
)
20496 basic_block bb
= NULL
;
20497 rtx_insn
*next
= start
;
20498 rtx_insn
*prev
= NULL
;
20501 *redefined
= false;
20503 if (start
!= NULL_RTX
)
20505 bb
= BLOCK_FOR_INSN (start
);
20506 if (start
!= BB_HEAD (bb
))
20507 /* If insn and start belong to the same bb, set prev to insn,
20508 so the call to increase_distance will increase the distance
20509 between insns by 1. */
20515 && distance
< LEA_SEARCH_THRESHOLD
)
20517 if (NONDEBUG_INSN_P (next
) && NONJUMP_INSN_P (next
))
20519 distance
= increase_distance(prev
, next
, distance
);
20520 if (insn_uses_reg_mem (regno
, next
))
20522 /* Return DISTANCE if OP0 is used in memory
20523 address in NEXT. */
20528 if (insn_defines_reg (regno
, INVALID_REGNUM
, next
))
20530 /* Return -1 if OP0 is set in NEXT. */
20538 if (next
== BB_END (bb
))
20541 next
= NEXT_INSN (next
);
20547 /* Return the distance between INSN and the next insn that uses
20548 register number REGNO0 in memory address. Return -1 if no such
20549 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
20552 distance_agu_use (unsigned int regno0
, rtx_insn
*insn
)
20554 basic_block bb
= BLOCK_FOR_INSN (insn
);
20556 bool found
= false;
20557 bool redefined
= false;
20559 if (insn
!= BB_END (bb
))
20560 distance
= distance_agu_use_in_bb (regno0
, insn
, distance
,
20562 &found
, &redefined
);
20564 if (!found
&& !redefined
&& distance
< LEA_SEARCH_THRESHOLD
)
20568 bool simple_loop
= false;
20570 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
20573 simple_loop
= true;
20578 distance
= distance_agu_use_in_bb (regno0
, insn
,
20579 distance
, BB_HEAD (bb
),
20580 &found
, &redefined
);
20583 int shortest_dist
= -1;
20584 bool found_in_bb
= false;
20585 bool redefined_in_bb
= false;
20587 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
20590 = distance_agu_use_in_bb (regno0
, insn
,
20591 distance
, BB_HEAD (e
->dest
),
20592 &found_in_bb
, &redefined_in_bb
);
20595 if (shortest_dist
< 0)
20596 shortest_dist
= bb_dist
;
20597 else if (bb_dist
> 0)
20598 shortest_dist
= MIN (bb_dist
, shortest_dist
);
20604 distance
= shortest_dist
;
20608 if (!found
|| redefined
)
20611 return distance
>> 1;
20614 /* Define this macro to tune LEA priority vs ADD, it take effect when
20615 there is a dilemma of choicing LEA or ADD
20616 Negative value: ADD is more preferred than LEA
20618 Positive value: LEA is more preferred than ADD*/
20619 #define IX86_LEA_PRIORITY 0
20621 /* Return true if usage of lea INSN has performance advantage
20622 over a sequence of instructions. Instructions sequence has
20623 SPLIT_COST cycles higher latency than lea latency. */
20626 ix86_lea_outperforms (rtx_insn
*insn
, unsigned int regno0
, unsigned int regno1
,
20627 unsigned int regno2
, int split_cost
, bool has_scale
)
20629 int dist_define
, dist_use
;
20631 /* For Silvermont if using a 2-source or 3-source LEA for
20632 non-destructive destination purposes, or due to wanting
20633 ability to use SCALE, the use of LEA is justified. */
20634 if (TARGET_SILVERMONT
|| TARGET_INTEL
)
20638 if (split_cost
< 1)
20640 if (regno0
== regno1
|| regno0
== regno2
)
20645 dist_define
= distance_non_agu_define (regno1
, regno2
, insn
);
20646 dist_use
= distance_agu_use (regno0
, insn
);
20648 if (dist_define
< 0 || dist_define
>= LEA_MAX_STALL
)
20650 /* If there is no non AGU operand definition, no AGU
20651 operand usage and split cost is 0 then both lea
20652 and non lea variants have same priority. Currently
20653 we prefer lea for 64 bit code and non lea on 32 bit
20655 if (dist_use
< 0 && split_cost
== 0)
20656 return TARGET_64BIT
|| IX86_LEA_PRIORITY
;
20661 /* With longer definitions distance lea is more preferable.
20662 Here we change it to take into account splitting cost and
20664 dist_define
+= split_cost
+ IX86_LEA_PRIORITY
;
20666 /* If there is no use in memory addess then we just check
20667 that split cost exceeds AGU stall. */
20669 return dist_define
> LEA_MAX_STALL
;
20671 /* If this insn has both backward non-agu dependence and forward
20672 agu dependence, the one with short distance takes effect. */
20673 return dist_define
>= dist_use
;
20676 /* Return true if it is legal to clobber flags by INSN and
20677 false otherwise. */
20680 ix86_ok_to_clobber_flags (rtx_insn
*insn
)
20682 basic_block bb
= BLOCK_FOR_INSN (insn
);
20688 if (NONDEBUG_INSN_P (insn
))
20690 FOR_EACH_INSN_USE (use
, insn
)
20691 if (DF_REF_REG_USE_P (use
) && DF_REF_REGNO (use
) == FLAGS_REG
)
20694 if (insn_defines_reg (FLAGS_REG
, INVALID_REGNUM
, insn
))
20698 if (insn
== BB_END (bb
))
20701 insn
= NEXT_INSN (insn
);
20704 live
= df_get_live_out(bb
);
20705 return !REGNO_REG_SET_P (live
, FLAGS_REG
);
20708 /* Return true if we need to split op0 = op1 + op2 into a sequence of
20709 move and add to avoid AGU stalls. */
20712 ix86_avoid_lea_for_add (rtx_insn
*insn
, rtx operands
[])
20714 unsigned int regno0
, regno1
, regno2
;
20716 /* Check if we need to optimize. */
20717 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
20720 /* Check it is correct to split here. */
20721 if (!ix86_ok_to_clobber_flags(insn
))
20724 regno0
= true_regnum (operands
[0]);
20725 regno1
= true_regnum (operands
[1]);
20726 regno2
= true_regnum (operands
[2]);
20728 /* We need to split only adds with non destructive
20729 destination operand. */
20730 if (regno0
== regno1
|| regno0
== regno2
)
20733 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 1, false);
20736 /* Return true if we should emit lea instruction instead of mov
20740 ix86_use_lea_for_mov (rtx_insn
*insn
, rtx operands
[])
20742 unsigned int regno0
, regno1
;
20744 /* Check if we need to optimize. */
20745 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
20748 /* Use lea for reg to reg moves only. */
20749 if (!REG_P (operands
[0]) || !REG_P (operands
[1]))
20752 regno0
= true_regnum (operands
[0]);
20753 regno1
= true_regnum (operands
[1]);
20755 return ix86_lea_outperforms (insn
, regno0
, regno1
, INVALID_REGNUM
, 0, false);
20758 /* Return true if we need to split lea into a sequence of
20759 instructions to avoid AGU stalls. */
20762 ix86_avoid_lea_for_addr (rtx_insn
*insn
, rtx operands
[])
20764 unsigned int regno0
, regno1
, regno2
;
20766 struct ix86_address parts
;
20769 /* Check we need to optimize. */
20770 if (!TARGET_AVOID_LEA_FOR_ADDR
|| optimize_function_for_size_p (cfun
))
20773 /* The "at least two components" test below might not catch simple
20774 move or zero extension insns if parts.base is non-NULL and parts.disp
20775 is const0_rtx as the only components in the address, e.g. if the
20776 register is %rbp or %r13. As this test is much cheaper and moves or
20777 zero extensions are the common case, do this check first. */
20778 if (REG_P (operands
[1])
20779 || (SImode_address_operand (operands
[1], VOIDmode
)
20780 && REG_P (XEXP (operands
[1], 0))))
20783 /* Check if it is OK to split here. */
20784 if (!ix86_ok_to_clobber_flags (insn
))
20787 ok
= ix86_decompose_address (operands
[1], &parts
);
20790 /* There should be at least two components in the address. */
20791 if ((parts
.base
!= NULL_RTX
) + (parts
.index
!= NULL_RTX
)
20792 + (parts
.disp
!= NULL_RTX
) + (parts
.scale
> 1) < 2)
20795 /* We should not split into add if non legitimate pic
20796 operand is used as displacement. */
20797 if (parts
.disp
&& flag_pic
&& !LEGITIMATE_PIC_OPERAND_P (parts
.disp
))
20800 regno0
= true_regnum (operands
[0]) ;
20801 regno1
= INVALID_REGNUM
;
20802 regno2
= INVALID_REGNUM
;
20805 regno1
= true_regnum (parts
.base
);
20807 regno2
= true_regnum (parts
.index
);
20811 /* Compute how many cycles we will add to execution time
20812 if split lea into a sequence of instructions. */
20813 if (parts
.base
|| parts
.index
)
20815 /* Have to use mov instruction if non desctructive
20816 destination form is used. */
20817 if (regno1
!= regno0
&& regno2
!= regno0
)
20820 /* Have to add index to base if both exist. */
20821 if (parts
.base
&& parts
.index
)
20824 /* Have to use shift and adds if scale is 2 or greater. */
20825 if (parts
.scale
> 1)
20827 if (regno0
!= regno1
)
20829 else if (regno2
== regno0
)
20832 split_cost
+= parts
.scale
;
20835 /* Have to use add instruction with immediate if
20836 disp is non zero. */
20837 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
20840 /* Subtract the price of lea. */
20844 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, split_cost
,
20848 /* Emit x86 binary operand CODE in mode MODE, where the first operand
20849 matches destination. RTX includes clobber of FLAGS_REG. */
20852 ix86_emit_binop (enum rtx_code code
, machine_mode mode
,
20857 op
= gen_rtx_SET (dst
, gen_rtx_fmt_ee (code
, mode
, dst
, src
));
20858 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
20860 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
20863 /* Return true if regno1 def is nearest to the insn. */
20866 find_nearest_reg_def (rtx_insn
*insn
, int regno1
, int regno2
)
20868 rtx_insn
*prev
= insn
;
20869 rtx_insn
*start
= BB_HEAD (BLOCK_FOR_INSN (insn
));
20873 while (prev
&& prev
!= start
)
20875 if (!INSN_P (prev
) || !NONDEBUG_INSN_P (prev
))
20877 prev
= PREV_INSN (prev
);
20880 if (insn_defines_reg (regno1
, INVALID_REGNUM
, prev
))
20882 else if (insn_defines_reg (regno2
, INVALID_REGNUM
, prev
))
20884 prev
= PREV_INSN (prev
);
20887 /* None of the regs is defined in the bb. */
20891 /* Split lea instructions into a sequence of instructions
20892 which are executed on ALU to avoid AGU stalls.
20893 It is assumed that it is allowed to clobber flags register
20894 at lea position. */
20897 ix86_split_lea_for_addr (rtx_insn
*insn
, rtx operands
[], machine_mode mode
)
20899 unsigned int regno0
, regno1
, regno2
;
20900 struct ix86_address parts
;
20904 ok
= ix86_decompose_address (operands
[1], &parts
);
20907 target
= gen_lowpart (mode
, operands
[0]);
20909 regno0
= true_regnum (target
);
20910 regno1
= INVALID_REGNUM
;
20911 regno2
= INVALID_REGNUM
;
20915 parts
.base
= gen_lowpart (mode
, parts
.base
);
20916 regno1
= true_regnum (parts
.base
);
20921 parts
.index
= gen_lowpart (mode
, parts
.index
);
20922 regno2
= true_regnum (parts
.index
);
20926 parts
.disp
= gen_lowpart (mode
, parts
.disp
);
20928 if (parts
.scale
> 1)
20930 /* Case r1 = r1 + ... */
20931 if (regno1
== regno0
)
20933 /* If we have a case r1 = r1 + C * r2 then we
20934 should use multiplication which is very
20935 expensive. Assume cost model is wrong if we
20936 have such case here. */
20937 gcc_assert (regno2
!= regno0
);
20939 for (adds
= parts
.scale
; adds
> 0; adds
--)
20940 ix86_emit_binop (PLUS
, mode
, target
, parts
.index
);
20944 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
20945 if (regno0
!= regno2
)
20946 emit_insn (gen_rtx_SET (target
, parts
.index
));
20948 /* Use shift for scaling. */
20949 ix86_emit_binop (ASHIFT
, mode
, target
,
20950 GEN_INT (exact_log2 (parts
.scale
)));
20953 ix86_emit_binop (PLUS
, mode
, target
, parts
.base
);
20955 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
20956 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
20959 else if (!parts
.base
&& !parts
.index
)
20961 gcc_assert(parts
.disp
);
20962 emit_insn (gen_rtx_SET (target
, parts
.disp
));
20968 if (regno0
!= regno2
)
20969 emit_insn (gen_rtx_SET (target
, parts
.index
));
20971 else if (!parts
.index
)
20973 if (regno0
!= regno1
)
20974 emit_insn (gen_rtx_SET (target
, parts
.base
));
20978 if (regno0
== regno1
)
20980 else if (regno0
== regno2
)
20986 /* Find better operand for SET instruction, depending
20987 on which definition is farther from the insn. */
20988 if (find_nearest_reg_def (insn
, regno1
, regno2
))
20989 tmp
= parts
.index
, tmp1
= parts
.base
;
20991 tmp
= parts
.base
, tmp1
= parts
.index
;
20993 emit_insn (gen_rtx_SET (target
, tmp
));
20995 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
20996 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
20998 ix86_emit_binop (PLUS
, mode
, target
, tmp1
);
21002 ix86_emit_binop (PLUS
, mode
, target
, tmp
);
21005 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
21006 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
21010 /* Return true if it is ok to optimize an ADD operation to LEA
21011 operation to avoid flag register consumation. For most processors,
21012 ADD is faster than LEA. For the processors like BONNELL, if the
21013 destination register of LEA holds an actual address which will be
21014 used soon, LEA is better and otherwise ADD is better. */
21017 ix86_lea_for_add_ok (rtx_insn
*insn
, rtx operands
[])
21019 unsigned int regno0
= true_regnum (operands
[0]);
21020 unsigned int regno1
= true_regnum (operands
[1]);
21021 unsigned int regno2
= true_regnum (operands
[2]);
21023 /* If a = b + c, (a!=b && a!=c), must use lea form. */
21024 if (regno0
!= regno1
&& regno0
!= regno2
)
21027 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
21030 return ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 0, false);
21033 /* Return true if destination reg of SET_BODY is shift count of
21037 ix86_dep_by_shift_count_body (const_rtx set_body
, const_rtx use_body
)
21043 /* Retrieve destination of SET_BODY. */
21044 switch (GET_CODE (set_body
))
21047 set_dest
= SET_DEST (set_body
);
21048 if (!set_dest
|| !REG_P (set_dest
))
21052 for (i
= XVECLEN (set_body
, 0) - 1; i
>= 0; i
--)
21053 if (ix86_dep_by_shift_count_body (XVECEXP (set_body
, 0, i
),
21061 /* Retrieve shift count of USE_BODY. */
21062 switch (GET_CODE (use_body
))
21065 shift_rtx
= XEXP (use_body
, 1);
21068 for (i
= XVECLEN (use_body
, 0) - 1; i
>= 0; i
--)
21069 if (ix86_dep_by_shift_count_body (set_body
,
21070 XVECEXP (use_body
, 0, i
)))
21078 && (GET_CODE (shift_rtx
) == ASHIFT
21079 || GET_CODE (shift_rtx
) == LSHIFTRT
21080 || GET_CODE (shift_rtx
) == ASHIFTRT
21081 || GET_CODE (shift_rtx
) == ROTATE
21082 || GET_CODE (shift_rtx
) == ROTATERT
))
21084 rtx shift_count
= XEXP (shift_rtx
, 1);
21086 /* Return true if shift count is dest of SET_BODY. */
21087 if (REG_P (shift_count
))
21089 /* Add check since it can be invoked before register
21090 allocation in pre-reload schedule. */
21091 if (reload_completed
21092 && true_regnum (set_dest
) == true_regnum (shift_count
))
21094 else if (REGNO(set_dest
) == REGNO(shift_count
))
21102 /* Return true if destination reg of SET_INSN is shift count of
21106 ix86_dep_by_shift_count (const_rtx set_insn
, const_rtx use_insn
)
21108 return ix86_dep_by_shift_count_body (PATTERN (set_insn
),
21109 PATTERN (use_insn
));
21112 /* Return TRUE or FALSE depending on whether the unary operator meets the
21113 appropriate constraints. */
21116 ix86_unary_operator_ok (enum rtx_code
,
21120 /* If one of operands is memory, source and destination must match. */
21121 if ((MEM_P (operands
[0])
21122 || MEM_P (operands
[1]))
21123 && ! rtx_equal_p (operands
[0], operands
[1]))
21128 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
21129 are ok, keeping in mind the possible movddup alternative. */
21132 ix86_vec_interleave_v2df_operator_ok (rtx operands
[3], bool high
)
21134 if (MEM_P (operands
[0]))
21135 return rtx_equal_p (operands
[0], operands
[1 + high
]);
21136 if (MEM_P (operands
[1]) && MEM_P (operands
[2]))
21137 return TARGET_SSE3
&& rtx_equal_p (operands
[1], operands
[2]);
21141 /* Post-reload splitter for converting an SF or DFmode value in an
21142 SSE register into an unsigned SImode. */
21145 ix86_split_convert_uns_si_sse (rtx operands
[])
21147 machine_mode vecmode
;
21148 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
21150 large
= operands
[1];
21151 zero_or_two31
= operands
[2];
21152 input
= operands
[3];
21153 two31
= operands
[4];
21154 vecmode
= GET_MODE (large
);
21155 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
21157 /* Load up the value into the low element. We must ensure that the other
21158 elements are valid floats -- zero is the easiest such value. */
21161 if (vecmode
== V4SFmode
)
21162 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
21164 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
21168 input
= gen_rtx_REG (vecmode
, REGNO (input
));
21169 emit_move_insn (value
, CONST0_RTX (vecmode
));
21170 if (vecmode
== V4SFmode
)
21171 emit_insn (gen_sse_movss (value
, value
, input
));
21173 emit_insn (gen_sse2_movsd (value
, value
, input
));
21176 emit_move_insn (large
, two31
);
21177 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
21179 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
21180 emit_insn (gen_rtx_SET (large
, x
));
21182 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
21183 emit_insn (gen_rtx_SET (zero_or_two31
, x
));
21185 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
21186 emit_insn (gen_rtx_SET (value
, x
));
21188 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
21189 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
21191 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
21192 if (vecmode
== V4SFmode
)
21193 emit_insn (gen_fix_truncv4sfv4si2 (x
, value
));
21195 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
21198 emit_insn (gen_xorv4si3 (value
, value
, large
));
21201 /* Convert an unsigned DImode value into a DFmode, using only SSE.
21202 Expects the 64-bit DImode to be supplied in a pair of integral
21203 registers. Requires SSE2; will use SSE3 if available. For x86_32,
21204 -mfpmath=sse, !optimize_size only. */
21207 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
21209 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
21210 rtx int_xmm
, fp_xmm
;
21211 rtx biases
, exponents
;
21214 int_xmm
= gen_reg_rtx (V4SImode
);
21215 if (TARGET_INTER_UNIT_MOVES_TO_VEC
)
21216 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
21217 else if (TARGET_SSE_SPLIT_REGS
)
21219 emit_clobber (int_xmm
);
21220 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
21224 x
= gen_reg_rtx (V2DImode
);
21225 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
21226 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
21229 x
= gen_rtx_CONST_VECTOR (V4SImode
,
21230 gen_rtvec (4, GEN_INT (0x43300000UL
),
21231 GEN_INT (0x45300000UL
),
21232 const0_rtx
, const0_rtx
));
21233 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
21235 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
21236 emit_insn (gen_vec_interleave_lowv4si (int_xmm
, int_xmm
, exponents
));
21238 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
21239 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
21240 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
21241 (0x1.0p84 + double(fp_value_hi_xmm)).
21242 Note these exponents differ by 32. */
21244 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
21246 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
21247 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
21248 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
21249 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
21250 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
21251 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
21252 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
21253 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
21254 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
21256 /* Add the upper and lower DFmode values together. */
21258 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
21261 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
21262 emit_insn (gen_vec_interleave_highv2df (fp_xmm
, fp_xmm
, fp_xmm
));
21263 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
21266 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
21269 /* Not used, but eases macroization of patterns. */
21271 ix86_expand_convert_uns_sixf_sse (rtx
, rtx
)
21273 gcc_unreachable ();
21276 /* Convert an unsigned SImode value into a DFmode. Only currently used
21277 for SSE, but applicable anywhere. */
21280 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
21282 REAL_VALUE_TYPE TWO31r
;
21285 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
21286 NULL
, 1, OPTAB_DIRECT
);
21288 fp
= gen_reg_rtx (DFmode
);
21289 emit_insn (gen_floatsidf2 (fp
, x
));
21291 real_ldexp (&TWO31r
, &dconst1
, 31);
21292 x
= const_double_from_real_value (TWO31r
, DFmode
);
21294 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
21296 emit_move_insn (target
, x
);
21299 /* Convert a signed DImode value into a DFmode. Only used for SSE in
21300 32-bit mode; otherwise we have a direct convert instruction. */
21303 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
21305 REAL_VALUE_TYPE TWO32r
;
21306 rtx fp_lo
, fp_hi
, x
;
21308 fp_lo
= gen_reg_rtx (DFmode
);
21309 fp_hi
= gen_reg_rtx (DFmode
);
21311 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
21313 real_ldexp (&TWO32r
, &dconst1
, 32);
21314 x
= const_double_from_real_value (TWO32r
, DFmode
);
21315 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
21317 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
21319 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
21322 emit_move_insn (target
, x
);
21325 /* Convert an unsigned SImode value into a SFmode, using only SSE.
21326 For x86_32, -mfpmath=sse, !optimize_size only. */
21328 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
21330 REAL_VALUE_TYPE ONE16r
;
21331 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
21333 real_ldexp (&ONE16r
, &dconst1
, 16);
21334 x
= const_double_from_real_value (ONE16r
, SFmode
);
21335 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
21336 NULL
, 0, OPTAB_DIRECT
);
21337 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
21338 NULL
, 0, OPTAB_DIRECT
);
21339 fp_hi
= gen_reg_rtx (SFmode
);
21340 fp_lo
= gen_reg_rtx (SFmode
);
21341 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
21342 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
21343 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
21345 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
21347 if (!rtx_equal_p (target
, fp_hi
))
21348 emit_move_insn (target
, fp_hi
);
21351 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
21352 a vector of unsigned ints VAL to vector of floats TARGET. */
21355 ix86_expand_vector_convert_uns_vsivsf (rtx target
, rtx val
)
21358 REAL_VALUE_TYPE TWO16r
;
21359 machine_mode intmode
= GET_MODE (val
);
21360 machine_mode fltmode
= GET_MODE (target
);
21361 rtx (*cvt
) (rtx
, rtx
);
21363 if (intmode
== V4SImode
)
21364 cvt
= gen_floatv4siv4sf2
;
21366 cvt
= gen_floatv8siv8sf2
;
21367 tmp
[0] = ix86_build_const_vector (intmode
, 1, GEN_INT (0xffff));
21368 tmp
[0] = force_reg (intmode
, tmp
[0]);
21369 tmp
[1] = expand_simple_binop (intmode
, AND
, val
, tmp
[0], NULL_RTX
, 1,
21371 tmp
[2] = expand_simple_binop (intmode
, LSHIFTRT
, val
, GEN_INT (16),
21372 NULL_RTX
, 1, OPTAB_DIRECT
);
21373 tmp
[3] = gen_reg_rtx (fltmode
);
21374 emit_insn (cvt (tmp
[3], tmp
[1]));
21375 tmp
[4] = gen_reg_rtx (fltmode
);
21376 emit_insn (cvt (tmp
[4], tmp
[2]));
21377 real_ldexp (&TWO16r
, &dconst1
, 16);
21378 tmp
[5] = const_double_from_real_value (TWO16r
, SFmode
);
21379 tmp
[5] = force_reg (fltmode
, ix86_build_const_vector (fltmode
, 1, tmp
[5]));
21380 tmp
[6] = expand_simple_binop (fltmode
, MULT
, tmp
[4], tmp
[5], NULL_RTX
, 1,
21382 tmp
[7] = expand_simple_binop (fltmode
, PLUS
, tmp
[3], tmp
[6], target
, 1,
21384 if (tmp
[7] != target
)
21385 emit_move_insn (target
, tmp
[7]);
21388 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
21389 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
21390 This is done by doing just signed conversion if < 0x1p31, and otherwise by
21391 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
21394 ix86_expand_adjust_ufix_to_sfix_si (rtx val
, rtx
*xorp
)
21396 REAL_VALUE_TYPE TWO31r
;
21397 rtx two31r
, tmp
[4];
21398 machine_mode mode
= GET_MODE (val
);
21399 machine_mode scalarmode
= GET_MODE_INNER (mode
);
21400 machine_mode intmode
= GET_MODE_SIZE (mode
) == 32 ? V8SImode
: V4SImode
;
21401 rtx (*cmp
) (rtx
, rtx
, rtx
, rtx
);
21404 for (i
= 0; i
< 3; i
++)
21405 tmp
[i
] = gen_reg_rtx (mode
);
21406 real_ldexp (&TWO31r
, &dconst1
, 31);
21407 two31r
= const_double_from_real_value (TWO31r
, scalarmode
);
21408 two31r
= ix86_build_const_vector (mode
, 1, two31r
);
21409 two31r
= force_reg (mode
, two31r
);
21412 case E_V8SFmode
: cmp
= gen_avx_maskcmpv8sf3
; break;
21413 case E_V4SFmode
: cmp
= gen_sse_maskcmpv4sf3
; break;
21414 case E_V4DFmode
: cmp
= gen_avx_maskcmpv4df3
; break;
21415 case E_V2DFmode
: cmp
= gen_sse2_maskcmpv2df3
; break;
21416 default: gcc_unreachable ();
21418 tmp
[3] = gen_rtx_LE (mode
, two31r
, val
);
21419 emit_insn (cmp (tmp
[0], two31r
, val
, tmp
[3]));
21420 tmp
[1] = expand_simple_binop (mode
, AND
, tmp
[0], two31r
, tmp
[1],
21422 if (intmode
== V4SImode
|| TARGET_AVX2
)
21423 *xorp
= expand_simple_binop (intmode
, ASHIFT
,
21424 gen_lowpart (intmode
, tmp
[0]),
21425 GEN_INT (31), NULL_RTX
, 0,
21429 rtx two31
= GEN_INT (HOST_WIDE_INT_1U
<< 31);
21430 two31
= ix86_build_const_vector (intmode
, 1, two31
);
21431 *xorp
= expand_simple_binop (intmode
, AND
,
21432 gen_lowpart (intmode
, tmp
[0]),
21433 two31
, NULL_RTX
, 0,
21436 return expand_simple_binop (mode
, MINUS
, val
, tmp
[1], tmp
[2],
21440 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
21441 then replicate the value for all elements of the vector
21445 ix86_build_const_vector (machine_mode mode
, bool vect
, rtx value
)
21449 machine_mode scalar_mode
;
21473 n_elt
= GET_MODE_NUNITS (mode
);
21474 v
= rtvec_alloc (n_elt
);
21475 scalar_mode
= GET_MODE_INNER (mode
);
21477 RTVEC_ELT (v
, 0) = value
;
21479 for (i
= 1; i
< n_elt
; ++i
)
21480 RTVEC_ELT (v
, i
) = vect
? value
: CONST0_RTX (scalar_mode
);
21482 return gen_rtx_CONST_VECTOR (mode
, v
);
21485 gcc_unreachable ();
21489 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
21490 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
21491 for an SSE register. If VECT is true, then replicate the mask for
21492 all elements of the vector register. If INVERT is true, then create
21493 a mask excluding the sign bit. */
21496 ix86_build_signbit_mask (machine_mode mode
, bool vect
, bool invert
)
21498 machine_mode vec_mode
, imode
;
21526 vec_mode
= VOIDmode
;
21531 gcc_unreachable ();
21534 machine_mode inner_mode
= GET_MODE_INNER (mode
);
21535 w
= wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode
) - 1,
21536 GET_MODE_BITSIZE (inner_mode
));
21538 w
= wi::bit_not (w
);
21540 /* Force this value into the low part of a fp vector constant. */
21541 mask
= immed_wide_int_const (w
, imode
);
21542 mask
= gen_lowpart (inner_mode
, mask
);
21544 if (vec_mode
== VOIDmode
)
21545 return force_reg (inner_mode
, mask
);
21547 v
= ix86_build_const_vector (vec_mode
, vect
, mask
);
21548 return force_reg (vec_mode
, v
);
21551 /* Generate code for floating point ABS or NEG. */
21554 ix86_expand_fp_absneg_operator (enum rtx_code code
, machine_mode mode
,
21557 rtx mask
, set
, dst
, src
;
21558 bool use_sse
= false;
21559 bool vector_mode
= VECTOR_MODE_P (mode
);
21560 machine_mode vmode
= mode
;
21564 else if (mode
== TFmode
)
21566 else if (TARGET_SSE_MATH
)
21568 use_sse
= SSE_FLOAT_MODE_P (mode
);
21569 if (mode
== SFmode
)
21571 else if (mode
== DFmode
)
21575 /* NEG and ABS performed with SSE use bitwise mask operations.
21576 Create the appropriate mask now. */
21578 mask
= ix86_build_signbit_mask (vmode
, vector_mode
, code
== ABS
);
21585 set
= gen_rtx_fmt_e (code
, mode
, src
);
21586 set
= gen_rtx_SET (dst
, set
);
21593 use
= gen_rtx_USE (VOIDmode
, mask
);
21595 par
= gen_rtvec (2, set
, use
);
21598 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
21599 par
= gen_rtvec (3, set
, use
, clob
);
21601 emit_insn (gen_rtx_PARALLEL (VOIDmode
, par
));
21607 /* Expand a copysign operation. Special case operand 0 being a constant. */
21610 ix86_expand_copysign (rtx operands
[])
21612 machine_mode mode
, vmode
;
21613 rtx dest
, op0
, op1
, mask
, nmask
;
21615 dest
= operands
[0];
21619 mode
= GET_MODE (dest
);
21621 if (mode
== SFmode
)
21623 else if (mode
== DFmode
)
21628 if (CONST_DOUBLE_P (op0
))
21630 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
);
21632 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
21633 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
21635 if (mode
== SFmode
|| mode
== DFmode
)
21637 if (op0
== CONST0_RTX (mode
))
21638 op0
= CONST0_RTX (vmode
);
21641 rtx v
= ix86_build_const_vector (vmode
, false, op0
);
21643 op0
= force_reg (vmode
, v
);
21646 else if (op0
!= CONST0_RTX (mode
))
21647 op0
= force_reg (mode
, op0
);
21649 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
21651 if (mode
== SFmode
)
21652 copysign_insn
= gen_copysignsf3_const
;
21653 else if (mode
== DFmode
)
21654 copysign_insn
= gen_copysigndf3_const
;
21656 copysign_insn
= gen_copysigntf3_const
;
21658 emit_insn (copysign_insn (dest
, op0
, op1
, mask
));
21662 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
21664 nmask
= ix86_build_signbit_mask (vmode
, 0, 1);
21665 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
21667 if (mode
== SFmode
)
21668 copysign_insn
= gen_copysignsf3_var
;
21669 else if (mode
== DFmode
)
21670 copysign_insn
= gen_copysigndf3_var
;
21672 copysign_insn
= gen_copysigntf3_var
;
21674 emit_insn (copysign_insn (dest
, NULL_RTX
, op0
, op1
, nmask
, mask
));
21678 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
21679 be a constant, and so has already been expanded into a vector constant. */
21682 ix86_split_copysign_const (rtx operands
[])
21684 machine_mode mode
, vmode
;
21685 rtx dest
, op0
, mask
, x
;
21687 dest
= operands
[0];
21689 mask
= operands
[3];
21691 mode
= GET_MODE (dest
);
21692 vmode
= GET_MODE (mask
);
21694 dest
= lowpart_subreg (vmode
, dest
, mode
);
21695 x
= gen_rtx_AND (vmode
, dest
, mask
);
21696 emit_insn (gen_rtx_SET (dest
, x
));
21698 if (op0
!= CONST0_RTX (vmode
))
21700 x
= gen_rtx_IOR (vmode
, dest
, op0
);
21701 emit_insn (gen_rtx_SET (dest
, x
));
21705 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
21706 so we have to do two masks. */
21709 ix86_split_copysign_var (rtx operands
[])
21711 machine_mode mode
, vmode
;
21712 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
21714 dest
= operands
[0];
21715 scratch
= operands
[1];
21718 nmask
= operands
[4];
21719 mask
= operands
[5];
21721 mode
= GET_MODE (dest
);
21722 vmode
= GET_MODE (mask
);
21724 if (rtx_equal_p (op0
, op1
))
21726 /* Shouldn't happen often (it's useless, obviously), but when it does
21727 we'd generate incorrect code if we continue below. */
21728 emit_move_insn (dest
, op0
);
21732 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
21734 gcc_assert (REGNO (op1
) == REGNO (scratch
));
21736 x
= gen_rtx_AND (vmode
, scratch
, mask
);
21737 emit_insn (gen_rtx_SET (scratch
, x
));
21740 op0
= lowpart_subreg (vmode
, op0
, mode
);
21741 x
= gen_rtx_NOT (vmode
, dest
);
21742 x
= gen_rtx_AND (vmode
, x
, op0
);
21743 emit_insn (gen_rtx_SET (dest
, x
));
21747 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
21749 x
= gen_rtx_AND (vmode
, scratch
, mask
);
21751 else /* alternative 2,4 */
21753 gcc_assert (REGNO (mask
) == REGNO (scratch
));
21754 op1
= lowpart_subreg (vmode
, op1
, mode
);
21755 x
= gen_rtx_AND (vmode
, scratch
, op1
);
21757 emit_insn (gen_rtx_SET (scratch
, x
));
21759 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
21761 dest
= lowpart_subreg (vmode
, op0
, mode
);
21762 x
= gen_rtx_AND (vmode
, dest
, nmask
);
21764 else /* alternative 3,4 */
21766 gcc_assert (REGNO (nmask
) == REGNO (dest
));
21768 op0
= lowpart_subreg (vmode
, op0
, mode
);
21769 x
= gen_rtx_AND (vmode
, dest
, op0
);
21771 emit_insn (gen_rtx_SET (dest
, x
));
21774 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
21775 emit_insn (gen_rtx_SET (dest
, x
));
21778 /* Return TRUE or FALSE depending on whether the first SET in INSN
21779 has source and destination with matching CC modes, and that the
21780 CC mode is at least as constrained as REQ_MODE. */
21783 ix86_match_ccmode (rtx insn
, machine_mode req_mode
)
21786 machine_mode set_mode
;
21788 set
= PATTERN (insn
);
21789 if (GET_CODE (set
) == PARALLEL
)
21790 set
= XVECEXP (set
, 0, 0);
21791 gcc_assert (GET_CODE (set
) == SET
);
21792 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
21794 set_mode
= GET_MODE (SET_DEST (set
));
21798 if (req_mode
!= CCNOmode
21799 && (req_mode
!= CCmode
21800 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
21804 if (req_mode
== CCGCmode
)
21808 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
21812 if (req_mode
== CCZmode
)
21825 if (set_mode
!= req_mode
)
21830 gcc_unreachable ();
21833 return GET_MODE (SET_SRC (set
)) == set_mode
;
21836 /* Generate insn patterns to do an integer compare of OPERANDS. */
21839 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
21841 machine_mode cmpmode
;
21844 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
21845 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
21847 /* This is very simple, but making the interface the same as in the
21848 FP case makes the rest of the code easier. */
21849 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
21850 emit_insn (gen_rtx_SET (flags
, tmp
));
21852 /* Return the test that should be put into the flags user, i.e.
21853 the bcc, scc, or cmov instruction. */
21854 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
21857 /* Figure out whether to use unordered fp comparisons. */
21860 ix86_unordered_fp_compare (enum rtx_code code
)
21862 if (!TARGET_IEEE_FP
)
21887 gcc_unreachable ();
21892 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
21894 machine_mode mode
= GET_MODE (op0
);
21896 if (SCALAR_FLOAT_MODE_P (mode
))
21898 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
21904 /* Only zero flag is needed. */
21905 case EQ
: /* ZF=0 */
21906 case NE
: /* ZF!=0 */
21908 /* Codes needing carry flag. */
21909 case GEU
: /* CF=0 */
21910 case LTU
: /* CF=1 */
21911 /* Detect overflow checks. They need just the carry flag. */
21912 if (GET_CODE (op0
) == PLUS
21913 && (rtx_equal_p (op1
, XEXP (op0
, 0))
21914 || rtx_equal_p (op1
, XEXP (op0
, 1))))
21918 case GTU
: /* CF=0 & ZF=0 */
21919 case LEU
: /* CF=1 | ZF=1 */
21921 /* Codes possibly doable only with sign flag when
21922 comparing against zero. */
21923 case GE
: /* SF=OF or SF=0 */
21924 case LT
: /* SF<>OF or SF=1 */
21925 if (op1
== const0_rtx
)
21928 /* For other cases Carry flag is not required. */
21930 /* Codes doable only with sign flag when comparing
21931 against zero, but we miss jump instruction for it
21932 so we need to use relational tests against overflow
21933 that thus needs to be zero. */
21934 case GT
: /* ZF=0 & SF=OF */
21935 case LE
: /* ZF=1 | SF<>OF */
21936 if (op1
== const0_rtx
)
21940 /* strcmp pattern do (use flags) and combine may ask us for proper
21945 gcc_unreachable ();
21949 /* Return the fixed registers used for condition codes. */
21952 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
21959 /* If two condition code modes are compatible, return a condition code
21960 mode which is compatible with both. Otherwise, return
21963 static machine_mode
21964 ix86_cc_modes_compatible (machine_mode m1
, machine_mode m2
)
21969 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
21972 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
21973 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
21976 if ((m1
== CCNOmode
&& m2
== CCGOCmode
)
21977 || (m1
== CCGOCmode
&& m2
== CCNOmode
))
21981 && (m2
== CCGCmode
|| m2
== CCGOCmode
|| m2
== CCNOmode
))
21983 else if (m2
== CCZmode
21984 && (m1
== CCGCmode
|| m1
== CCGOCmode
|| m1
== CCNOmode
))
21990 gcc_unreachable ();
22021 /* These are only compatible with themselves, which we already
22028 /* Return a comparison we can do and that it is equivalent to
22029 swap_condition (code) apart possibly from orderedness.
22030 But, never change orderedness if TARGET_IEEE_FP, returning
22031 UNKNOWN in that case if necessary. */
22033 static enum rtx_code
22034 ix86_fp_swap_condition (enum rtx_code code
)
22038 case GT
: /* GTU - CF=0 & ZF=0 */
22039 return TARGET_IEEE_FP
? UNKNOWN
: UNLT
;
22040 case GE
: /* GEU - CF=0 */
22041 return TARGET_IEEE_FP
? UNKNOWN
: UNLE
;
22042 case UNLT
: /* LTU - CF=1 */
22043 return TARGET_IEEE_FP
? UNKNOWN
: GT
;
22044 case UNLE
: /* LEU - CF=1 | ZF=1 */
22045 return TARGET_IEEE_FP
? UNKNOWN
: GE
;
22047 return swap_condition (code
);
22051 /* Return cost of comparison CODE using the best strategy for performance.
22052 All following functions do use number of instructions as a cost metrics.
22053 In future this should be tweaked to compute bytes for optimize_size and
22054 take into account performance of various instructions on various CPUs. */
22057 ix86_fp_comparison_cost (enum rtx_code code
)
22061 /* The cost of code using bit-twiddling on %ah. */
22078 arith_cost
= TARGET_IEEE_FP
? 5 : 4;
22082 arith_cost
= TARGET_IEEE_FP
? 6 : 4;
22085 gcc_unreachable ();
22088 switch (ix86_fp_comparison_strategy (code
))
22090 case IX86_FPCMP_COMI
:
22091 return arith_cost
> 4 ? 3 : 2;
22092 case IX86_FPCMP_SAHF
:
22093 return arith_cost
> 4 ? 4 : 3;
22099 /* Return strategy to use for floating-point. We assume that fcomi is always
22100 preferrable where available, since that is also true when looking at size
22101 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
22103 enum ix86_fpcmp_strategy
22104 ix86_fp_comparison_strategy (enum rtx_code
)
22106 /* Do fcomi/sahf based test when profitable. */
22109 return IX86_FPCMP_COMI
;
22111 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
22112 return IX86_FPCMP_SAHF
;
22114 return IX86_FPCMP_ARITH
;
22117 /* Swap, force into registers, or otherwise massage the two operands
22118 to a fp comparison. The operands are updated in place; the new
22119 comparison code is returned. */
22121 static enum rtx_code
22122 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
22124 bool unordered_compare
= ix86_unordered_fp_compare (code
);
22125 rtx op0
= *pop0
, op1
= *pop1
;
22126 machine_mode op_mode
= GET_MODE (op0
);
22127 bool is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
22129 /* All of the unordered compare instructions only work on registers.
22130 The same is true of the fcomi compare instructions. The XFmode
22131 compare instructions require registers except when comparing
22132 against zero or when converting operand 1 from fixed point to
22136 && (unordered_compare
22137 || (op_mode
== XFmode
22138 && ! (standard_80387_constant_p (op0
) == 1
22139 || standard_80387_constant_p (op1
) == 1)
22140 && GET_CODE (op1
) != FLOAT
)
22141 || ix86_fp_comparison_strategy (code
) == IX86_FPCMP_COMI
))
22143 op0
= force_reg (op_mode
, op0
);
22144 op1
= force_reg (op_mode
, op1
);
22148 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
22149 things around if they appear profitable, otherwise force op0
22150 into a register. */
22152 if (standard_80387_constant_p (op0
) == 0
22154 && ! (standard_80387_constant_p (op1
) == 0
22157 enum rtx_code new_code
= ix86_fp_swap_condition (code
);
22158 if (new_code
!= UNKNOWN
)
22160 std::swap (op0
, op1
);
22166 op0
= force_reg (op_mode
, op0
);
22168 if (CONSTANT_P (op1
))
22170 int tmp
= standard_80387_constant_p (op1
);
22172 op1
= validize_mem (force_const_mem (op_mode
, op1
));
22176 op1
= force_reg (op_mode
, op1
);
22179 op1
= force_reg (op_mode
, op1
);
22183 /* Try to rearrange the comparison to make it cheaper. */
22184 if (ix86_fp_comparison_cost (code
)
22185 > ix86_fp_comparison_cost (swap_condition (code
))
22186 && (REG_P (op1
) || can_create_pseudo_p ()))
22188 std::swap (op0
, op1
);
22189 code
= swap_condition (code
);
22191 op0
= force_reg (op_mode
, op0
);
22199 /* Convert comparison codes we use to represent FP comparison to integer
22200 code that will result in proper branch. Return UNKNOWN if no such code
22204 ix86_fp_compare_code_to_integer (enum rtx_code code
)
22228 /* Generate insn patterns to do a floating point compare of OPERANDS. */
22231 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
)
22233 bool unordered_compare
= ix86_unordered_fp_compare (code
);
22234 machine_mode intcmp_mode
;
22237 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
22239 /* Do fcomi/sahf based test when profitable. */
22240 switch (ix86_fp_comparison_strategy (code
))
22242 case IX86_FPCMP_COMI
:
22243 intcmp_mode
= CCFPmode
;
22244 tmp
= gen_rtx_COMPARE (CCFPmode
, op0
, op1
);
22245 if (unordered_compare
)
22246 tmp
= gen_rtx_UNSPEC (CCFPmode
, gen_rtvec (1, tmp
), UNSPEC_NOTRAP
);
22247 emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode
, FLAGS_REG
), tmp
));
22250 case IX86_FPCMP_SAHF
:
22251 intcmp_mode
= CCFPmode
;
22252 tmp
= gen_rtx_COMPARE (CCFPmode
, op0
, op1
);
22253 if (unordered_compare
)
22254 tmp
= gen_rtx_UNSPEC (CCFPmode
, gen_rtvec (1, tmp
), UNSPEC_NOTRAP
);
22255 tmp
= gen_rtx_SET (gen_rtx_REG (CCFPmode
, FLAGS_REG
), tmp
);
22257 scratch
= gen_reg_rtx (HImode
);
22258 tmp2
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
22259 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, tmp2
)));
22262 case IX86_FPCMP_ARITH
:
22263 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
22264 tmp
= gen_rtx_COMPARE (CCFPmode
, op0
, op1
);
22265 if (unordered_compare
)
22266 tmp
= gen_rtx_UNSPEC (CCFPmode
, gen_rtvec (1, tmp
), UNSPEC_NOTRAP
);
22267 tmp
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
22269 scratch
= gen_reg_rtx (HImode
);
22270 emit_insn (gen_rtx_SET (scratch
, tmp
));
22272 /* In the unordered case, we have to check C2 for NaN's, which
22273 doesn't happen to work out to anything nice combination-wise.
22274 So do some bit twiddling on the value we've got in AH to come
22275 up with an appropriate set of condition codes. */
22277 intcmp_mode
= CCNOmode
;
22282 if (code
== GT
|| !TARGET_IEEE_FP
)
22284 emit_insn (gen_testqi_ext_1_ccno (scratch
, GEN_INT (0x45)));
22289 emit_insn (gen_andqi_ext_1 (scratch
, scratch
, GEN_INT (0x45)));
22290 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
22291 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
22292 intcmp_mode
= CCmode
;
22298 if (code
== LT
&& TARGET_IEEE_FP
)
22300 emit_insn (gen_andqi_ext_1 (scratch
, scratch
, GEN_INT (0x45)));
22301 emit_insn (gen_cmpqi_ext_3 (scratch
, const1_rtx
));
22302 intcmp_mode
= CCmode
;
22307 emit_insn (gen_testqi_ext_1_ccno (scratch
, const1_rtx
));
22313 if (code
== GE
|| !TARGET_IEEE_FP
)
22315 emit_insn (gen_testqi_ext_1_ccno (scratch
, GEN_INT (0x05)));
22320 emit_insn (gen_andqi_ext_1 (scratch
, scratch
, GEN_INT (0x45)));
22321 emit_insn (gen_xorqi_ext_1_cc (scratch
, scratch
, const1_rtx
));
22327 if (code
== LE
&& TARGET_IEEE_FP
)
22329 emit_insn (gen_andqi_ext_1 (scratch
, scratch
, GEN_INT (0x45)));
22330 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
22331 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
22332 intcmp_mode
= CCmode
;
22337 emit_insn (gen_testqi_ext_1_ccno (scratch
, GEN_INT (0x45)));
22343 if (code
== EQ
&& TARGET_IEEE_FP
)
22345 emit_insn (gen_andqi_ext_1 (scratch
, scratch
, GEN_INT (0x45)));
22346 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
22347 intcmp_mode
= CCmode
;
22352 emit_insn (gen_testqi_ext_1_ccno (scratch
, GEN_INT (0x40)));
22358 if (code
== NE
&& TARGET_IEEE_FP
)
22360 emit_insn (gen_andqi_ext_1 (scratch
, scratch
, GEN_INT (0x45)));
22361 emit_insn (gen_xorqi_ext_1_cc (scratch
, scratch
,
22367 emit_insn (gen_testqi_ext_1_ccno (scratch
, GEN_INT (0x40)));
22373 emit_insn (gen_testqi_ext_1_ccno (scratch
, GEN_INT (0x04)));
22377 emit_insn (gen_testqi_ext_1_ccno (scratch
, GEN_INT (0x04)));
22382 gcc_unreachable ();
22390 /* Return the test that should be put into the flags user, i.e.
22391 the bcc, scc, or cmov instruction. */
22392 return gen_rtx_fmt_ee (code
, VOIDmode
,
22393 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
22398 ix86_expand_compare (enum rtx_code code
, rtx op0
, rtx op1
)
22402 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
)
22403 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, op0
, op1
);
22405 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
22407 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0
)));
22408 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
22411 ret
= ix86_expand_int_compare (code
, op0
, op1
);
22417 ix86_expand_branch (enum rtx_code code
, rtx op0
, rtx op1
, rtx label
)
22419 machine_mode mode
= GET_MODE (op0
);
22422 /* Handle special case - vector comparsion with boolean result, transform
22423 it using ptest instruction. */
22424 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
22426 rtx flag
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
22427 machine_mode p_mode
= GET_MODE_SIZE (mode
) == 32 ? V4DImode
: V2DImode
;
22429 gcc_assert (code
== EQ
|| code
== NE
);
22430 /* Generate XOR since we can't check that one operand is zero vector. */
22431 tmp
= gen_reg_rtx (mode
);
22432 emit_insn (gen_rtx_SET (tmp
, gen_rtx_XOR (mode
, op0
, op1
)));
22433 tmp
= gen_lowpart (p_mode
, tmp
);
22434 emit_insn (gen_rtx_SET (gen_rtx_REG (CCmode
, FLAGS_REG
),
22435 gen_rtx_UNSPEC (CCmode
,
22436 gen_rtvec (2, tmp
, tmp
),
22438 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, flag
, const0_rtx
);
22439 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
22440 gen_rtx_LABEL_REF (VOIDmode
, label
),
22442 emit_jump_insn (gen_rtx_SET (pc_rtx
, tmp
));
22455 tmp
= ix86_expand_compare (code
, op0
, op1
);
22456 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
22457 gen_rtx_LABEL_REF (VOIDmode
, label
),
22459 emit_jump_insn (gen_rtx_SET (pc_rtx
, tmp
));
22465 /* For 32-bit target DI comparison may be performed on
22466 SSE registers. To allow this we should avoid split
22467 to SI mode which is achieved by doing xor in DI mode
22468 and then comparing with zero (which is recognized by
22469 STV pass). We don't compare using xor when optimizing
22471 if (!optimize_insn_for_size_p ()
22473 && (code
== EQ
|| code
== NE
))
22475 op0
= force_reg (mode
, gen_rtx_XOR (mode
, op0
, op1
));
22480 /* Expand DImode branch into multiple compare+branch. */
22483 rtx_code_label
*label2
;
22484 enum rtx_code code1
, code2
, code3
;
22485 machine_mode submode
;
22487 if (CONSTANT_P (op0
) && !CONSTANT_P (op1
))
22489 std::swap (op0
, op1
);
22490 code
= swap_condition (code
);
22493 split_double_mode (mode
, &op0
, 1, lo
+0, hi
+0);
22494 split_double_mode (mode
, &op1
, 1, lo
+1, hi
+1);
22496 submode
= mode
== DImode
? SImode
: DImode
;
22498 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
22499 avoid two branches. This costs one extra insn, so disable when
22500 optimizing for size. */
22502 if ((code
== EQ
|| code
== NE
)
22503 && (!optimize_insn_for_size_p ()
22504 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
22509 if (hi
[1] != const0_rtx
)
22510 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
22511 NULL_RTX
, 0, OPTAB_WIDEN
);
22514 if (lo
[1] != const0_rtx
)
22515 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
22516 NULL_RTX
, 0, OPTAB_WIDEN
);
22518 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
22519 NULL_RTX
, 0, OPTAB_WIDEN
);
22521 ix86_expand_branch (code
, tmp
, const0_rtx
, label
);
22525 /* Otherwise, if we are doing less-than or greater-or-equal-than,
22526 op1 is a constant and the low word is zero, then we can just
22527 examine the high word. Similarly for low word -1 and
22528 less-or-equal-than or greater-than. */
22530 if (CONST_INT_P (hi
[1]))
22533 case LT
: case LTU
: case GE
: case GEU
:
22534 if (lo
[1] == const0_rtx
)
22536 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
22540 case LE
: case LEU
: case GT
: case GTU
:
22541 if (lo
[1] == constm1_rtx
)
22543 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
22551 /* Emulate comparisons that do not depend on Zero flag with
22552 double-word subtraction. Note that only Overflow, Sign
22553 and Carry flags are valid, so swap arguments and condition
22554 of comparisons that would otherwise test Zero flag. */
22558 case LE
: case LEU
: case GT
: case GTU
:
22559 std::swap (lo
[0], lo
[1]);
22560 std::swap (hi
[0], hi
[1]);
22561 code
= swap_condition (code
);
22564 case LT
: case LTU
: case GE
: case GEU
:
22566 rtx (*cmp_insn
) (rtx
, rtx
);
22567 rtx (*sbb_insn
) (rtx
, rtx
, rtx
);
22568 bool uns
= (code
== LTU
|| code
== GEU
);
22572 cmp_insn
= gen_cmpdi_1
;
22574 = uns
? gen_subdi3_carry_ccc
: gen_subdi3_carry_ccgz
;
22578 cmp_insn
= gen_cmpsi_1
;
22580 = uns
? gen_subsi3_carry_ccc
: gen_subsi3_carry_ccgz
;
22583 if (!nonimmediate_operand (lo
[0], submode
))
22584 lo
[0] = force_reg (submode
, lo
[0]);
22585 if (!x86_64_general_operand (lo
[1], submode
))
22586 lo
[1] = force_reg (submode
, lo
[1]);
22588 if (!register_operand (hi
[0], submode
))
22589 hi
[0] = force_reg (submode
, hi
[0]);
22590 if ((uns
&& !nonimmediate_operand (hi
[1], submode
))
22591 || (!uns
&& !x86_64_general_operand (hi
[1], submode
)))
22592 hi
[1] = force_reg (submode
, hi
[1]);
22594 emit_insn (cmp_insn (lo
[0], lo
[1]));
22595 emit_insn (sbb_insn (gen_rtx_SCRATCH (submode
), hi
[0], hi
[1]));
22597 tmp
= gen_rtx_REG (uns
? CCCmode
: CCGZmode
, FLAGS_REG
);
22599 ix86_expand_branch (code
, tmp
, const0_rtx
, label
);
22607 /* Otherwise, we need two or three jumps. */
22609 label2
= gen_label_rtx ();
22612 code2
= swap_condition (code
);
22613 code3
= unsigned_condition (code
);
22617 case LT
: case GT
: case LTU
: case GTU
:
22620 case LE
: code1
= LT
; code2
= GT
; break;
22621 case GE
: code1
= GT
; code2
= LT
; break;
22622 case LEU
: code1
= LTU
; code2
= GTU
; break;
22623 case GEU
: code1
= GTU
; code2
= LTU
; break;
22625 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
22626 case NE
: code2
= UNKNOWN
; break;
22629 gcc_unreachable ();
22634 * if (hi(a) < hi(b)) goto true;
22635 * if (hi(a) > hi(b)) goto false;
22636 * if (lo(a) < lo(b)) goto true;
22640 if (code1
!= UNKNOWN
)
22641 ix86_expand_branch (code1
, hi
[0], hi
[1], label
);
22642 if (code2
!= UNKNOWN
)
22643 ix86_expand_branch (code2
, hi
[0], hi
[1], label2
);
22645 ix86_expand_branch (code3
, lo
[0], lo
[1], label
);
22647 if (code2
!= UNKNOWN
)
22648 emit_label (label2
);
22653 gcc_assert (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
);
22659 ix86_expand_setcc (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
22663 gcc_assert (GET_MODE (dest
) == QImode
);
22665 ret
= ix86_expand_compare (code
, op0
, op1
);
22666 PUT_MODE (ret
, QImode
);
22667 emit_insn (gen_rtx_SET (dest
, ret
));
22670 /* Expand comparison setting or clearing carry flag. Return true when
22671 successful and set pop for the operation. */
22673 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
22675 machine_mode mode
=
22676 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
22678 /* Do not handle double-mode compares that go through special path. */
22679 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
22682 if (SCALAR_FLOAT_MODE_P (mode
))
22685 rtx_insn
*compare_seq
;
22687 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
22689 /* Shortcut: following common codes never translate
22690 into carry flag compares. */
22691 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
22692 || code
== ORDERED
|| code
== UNORDERED
)
22695 /* These comparisons require zero flag; swap operands so they won't. */
22696 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
22697 && !TARGET_IEEE_FP
)
22699 std::swap (op0
, op1
);
22700 code
= swap_condition (code
);
22703 /* Try to expand the comparison and verify that we end up with
22704 carry flag based comparison. This fails to be true only when
22705 we decide to expand comparison using arithmetic that is not
22706 too common scenario. */
22708 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
22709 compare_seq
= get_insns ();
22712 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
)
22713 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
22715 code
= GET_CODE (compare_op
);
22717 if (code
!= LTU
&& code
!= GEU
)
22720 emit_insn (compare_seq
);
22725 if (!INTEGRAL_MODE_P (mode
))
22734 /* Convert a==0 into (unsigned)a<1. */
22737 if (op1
!= const0_rtx
)
22740 code
= (code
== EQ
? LTU
: GEU
);
22743 /* Convert a>b into b<a or a>=b-1. */
22746 if (CONST_INT_P (op1
))
22748 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
22749 /* Bail out on overflow. We still can swap operands but that
22750 would force loading of the constant into register. */
22751 if (op1
== const0_rtx
22752 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
22754 code
= (code
== GTU
? GEU
: LTU
);
22758 std::swap (op0
, op1
);
22759 code
= (code
== GTU
? LTU
: GEU
);
22763 /* Convert a>=0 into (unsigned)a<0x80000000. */
22766 if (mode
== DImode
|| op1
!= const0_rtx
)
22768 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
22769 code
= (code
== LT
? GEU
: LTU
);
22773 if (mode
== DImode
|| op1
!= constm1_rtx
)
22775 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
22776 code
= (code
== LE
? GEU
: LTU
);
22782 /* Swapping operands may cause constant to appear as first operand. */
22783 if (!nonimmediate_operand (op0
, VOIDmode
))
22785 if (!can_create_pseudo_p ())
22787 op0
= force_reg (mode
, op0
);
22789 *pop
= ix86_expand_compare (code
, op0
, op1
);
22790 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
22795 ix86_expand_int_movcc (rtx operands
[])
22797 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
22798 rtx_insn
*compare_seq
;
22800 machine_mode mode
= GET_MODE (operands
[0]);
22801 bool sign_bit_compare_p
= false;
22802 rtx op0
= XEXP (operands
[1], 0);
22803 rtx op1
= XEXP (operands
[1], 1);
22805 if (GET_MODE (op0
) == TImode
22806 || (GET_MODE (op0
) == DImode
22811 compare_op
= ix86_expand_compare (code
, op0
, op1
);
22812 compare_seq
= get_insns ();
22815 compare_code
= GET_CODE (compare_op
);
22817 if ((op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
22818 || (op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
22819 sign_bit_compare_p
= true;
22821 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
22822 HImode insns, we'd be swallowed in word prefix ops. */
22824 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
22825 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
22826 && CONST_INT_P (operands
[2])
22827 && CONST_INT_P (operands
[3]))
22829 rtx out
= operands
[0];
22830 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
22831 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
22832 HOST_WIDE_INT diff
;
22835 /* Sign bit compares are better done using shifts than we do by using
22837 if (sign_bit_compare_p
22838 || ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
22840 /* Detect overlap between destination and compare sources. */
22843 if (!sign_bit_compare_p
)
22846 bool fpcmp
= false;
22848 compare_code
= GET_CODE (compare_op
);
22850 flags
= XEXP (compare_op
, 0);
22852 if (GET_MODE (flags
) == CCFPmode
)
22856 = ix86_fp_compare_code_to_integer (compare_code
);
22859 /* To simplify rest of code, restrict to the GEU case. */
22860 if (compare_code
== LTU
)
22862 std::swap (ct
, cf
);
22863 compare_code
= reverse_condition (compare_code
);
22864 code
= reverse_condition (code
);
22869 PUT_CODE (compare_op
,
22870 reverse_condition_maybe_unordered
22871 (GET_CODE (compare_op
)));
22873 PUT_CODE (compare_op
,
22874 reverse_condition (GET_CODE (compare_op
)));
22878 if (reg_overlap_mentioned_p (out
, op0
)
22879 || reg_overlap_mentioned_p (out
, op1
))
22880 tmp
= gen_reg_rtx (mode
);
22882 if (mode
== DImode
)
22883 emit_insn (gen_x86_movdicc_0_m1 (tmp
, flags
, compare_op
));
22885 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
),
22886 flags
, compare_op
));
22890 if (code
== GT
|| code
== GE
)
22891 code
= reverse_condition (code
);
22894 std::swap (ct
, cf
);
22897 tmp
= emit_store_flag (tmp
, code
, op0
, op1
, VOIDmode
, 0, -1);
22910 tmp
= expand_simple_binop (mode
, PLUS
,
22912 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
22923 tmp
= expand_simple_binop (mode
, IOR
,
22925 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
22927 else if (diff
== -1 && ct
)
22937 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
22939 tmp
= expand_simple_binop (mode
, PLUS
,
22940 copy_rtx (tmp
), GEN_INT (cf
),
22941 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
22949 * andl cf - ct, dest
22959 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
22962 tmp
= expand_simple_binop (mode
, AND
,
22964 gen_int_mode (cf
- ct
, mode
),
22965 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
22967 tmp
= expand_simple_binop (mode
, PLUS
,
22968 copy_rtx (tmp
), GEN_INT (ct
),
22969 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
22972 if (!rtx_equal_p (tmp
, out
))
22973 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
22980 machine_mode cmp_mode
= GET_MODE (op0
);
22981 enum rtx_code new_code
;
22983 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
22985 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
22987 /* We may be reversing unordered compare to normal compare, that
22988 is not valid in general (we may convert non-trapping condition
22989 to trapping one), however on i386 we currently emit all
22990 comparisons unordered. */
22991 new_code
= reverse_condition_maybe_unordered (code
);
22994 new_code
= ix86_reverse_condition (code
, cmp_mode
);
22995 if (new_code
!= UNKNOWN
)
22997 std::swap (ct
, cf
);
23003 compare_code
= UNKNOWN
;
23004 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
23005 && CONST_INT_P (op1
))
23007 if (op1
== const0_rtx
23008 && (code
== LT
|| code
== GE
))
23009 compare_code
= code
;
23010 else if (op1
== constm1_rtx
)
23014 else if (code
== GT
)
23019 /* Optimize dest = (op0 < 0) ? -1 : cf. */
23020 if (compare_code
!= UNKNOWN
23021 && GET_MODE (op0
) == GET_MODE (out
)
23022 && (cf
== -1 || ct
== -1))
23024 /* If lea code below could be used, only optimize
23025 if it results in a 2 insn sequence. */
23027 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
23028 || diff
== 3 || diff
== 5 || diff
== 9)
23029 || (compare_code
== LT
&& ct
== -1)
23030 || (compare_code
== GE
&& cf
== -1))
23033 * notl op1 (if necessary)
23041 code
= reverse_condition (code
);
23044 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
23046 out
= expand_simple_binop (mode
, IOR
,
23048 out
, 1, OPTAB_DIRECT
);
23049 if (out
!= operands
[0])
23050 emit_move_insn (operands
[0], out
);
23057 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
23058 || diff
== 3 || diff
== 5 || diff
== 9)
23059 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
23061 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
23067 * lea cf(dest*(ct-cf)),dest
23071 * This also catches the degenerate setcc-only case.
23077 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
23080 /* On x86_64 the lea instruction operates on Pmode, so we need
23081 to get arithmetics done in proper mode to match. */
23083 tmp
= copy_rtx (out
);
23087 out1
= copy_rtx (out
);
23088 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
23092 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
23098 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
23101 if (!rtx_equal_p (tmp
, out
))
23104 out
= force_operand (tmp
, copy_rtx (out
));
23106 emit_insn (gen_rtx_SET (copy_rtx (out
), copy_rtx (tmp
)));
23108 if (!rtx_equal_p (out
, operands
[0]))
23109 emit_move_insn (operands
[0], copy_rtx (out
));
23115 * General case: Jumpful:
23116 * xorl dest,dest cmpl op1, op2
23117 * cmpl op1, op2 movl ct, dest
23118 * setcc dest jcc 1f
23119 * decl dest movl cf, dest
23120 * andl (cf-ct),dest 1:
23123 * Size 20. Size 14.
23125 * This is reasonably steep, but branch mispredict costs are
23126 * high on modern cpus, so consider failing only if optimizing
23130 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
23131 && BRANCH_COST (optimize_insn_for_speed_p (),
23136 machine_mode cmp_mode
= GET_MODE (op0
);
23137 enum rtx_code new_code
;
23139 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
23141 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
23143 /* We may be reversing unordered compare to normal compare,
23144 that is not valid in general (we may convert non-trapping
23145 condition to trapping one), however on i386 we currently
23146 emit all comparisons unordered. */
23147 new_code
= reverse_condition_maybe_unordered (code
);
23151 new_code
= ix86_reverse_condition (code
, cmp_mode
);
23152 if (compare_code
!= UNKNOWN
&& new_code
!= UNKNOWN
)
23153 compare_code
= reverse_condition (compare_code
);
23156 if (new_code
!= UNKNOWN
)
23164 if (compare_code
!= UNKNOWN
)
23166 /* notl op1 (if needed)
23171 For x < 0 (resp. x <= -1) there will be no notl,
23172 so if possible swap the constants to get rid of the
23174 True/false will be -1/0 while code below (store flag
23175 followed by decrement) is 0/-1, so the constants need
23176 to be exchanged once more. */
23178 if (compare_code
== GE
|| !cf
)
23180 code
= reverse_condition (code
);
23184 std::swap (ct
, cf
);
23186 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
23190 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
23192 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
),
23194 copy_rtx (out
), 1, OPTAB_DIRECT
);
23197 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
23198 gen_int_mode (cf
- ct
, mode
),
23199 copy_rtx (out
), 1, OPTAB_DIRECT
);
23201 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
23202 copy_rtx (out
), 1, OPTAB_DIRECT
);
23203 if (!rtx_equal_p (out
, operands
[0]))
23204 emit_move_insn (operands
[0], copy_rtx (out
));
23210 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
23212 /* Try a few things more with specific constants and a variable. */
23215 rtx var
, orig_out
, out
, tmp
;
23217 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
23220 /* If one of the two operands is an interesting constant, load a
23221 constant with the above and mask it in with a logical operation. */
23223 if (CONST_INT_P (operands
[2]))
23226 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
23227 operands
[3] = constm1_rtx
, op
= and_optab
;
23228 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
23229 operands
[3] = const0_rtx
, op
= ior_optab
;
23233 else if (CONST_INT_P (operands
[3]))
23236 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
23237 operands
[2] = constm1_rtx
, op
= and_optab
;
23238 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
23239 operands
[2] = const0_rtx
, op
= ior_optab
;
23246 orig_out
= operands
[0];
23247 tmp
= gen_reg_rtx (mode
);
23250 /* Recurse to get the constant loaded. */
23251 if (!ix86_expand_int_movcc (operands
))
23254 /* Mask in the interesting variable. */
23255 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
23257 if (!rtx_equal_p (out
, orig_out
))
23258 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
23264 * For comparison with above,
23274 if (! nonimmediate_operand (operands
[2], mode
))
23275 operands
[2] = force_reg (mode
, operands
[2]);
23276 if (! nonimmediate_operand (operands
[3], mode
))
23277 operands
[3] = force_reg (mode
, operands
[3]);
23279 if (! register_operand (operands
[2], VOIDmode
)
23281 || ! register_operand (operands
[3], VOIDmode
)))
23282 operands
[2] = force_reg (mode
, operands
[2]);
23285 && ! register_operand (operands
[3], VOIDmode
))
23286 operands
[3] = force_reg (mode
, operands
[3]);
23288 emit_insn (compare_seq
);
23289 emit_insn (gen_rtx_SET (operands
[0],
23290 gen_rtx_IF_THEN_ELSE (mode
,
23291 compare_op
, operands
[2],
23296 /* Swap, force into registers, or otherwise massage the two operands
23297 to an sse comparison with a mask result. Thus we differ a bit from
23298 ix86_prepare_fp_compare_args which expects to produce a flags result.
23300 The DEST operand exists to help determine whether to commute commutative
23301 operators. The POP0/POP1 operands are updated in place. The new
23302 comparison code is returned, or UNKNOWN if not implementable. */
23304 static enum rtx_code
23305 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
23306 rtx
*pop0
, rtx
*pop1
)
23312 /* AVX supports all the needed comparisons. */
23315 /* We have no LTGT as an operator. We could implement it with
23316 NE & ORDERED, but this requires an extra temporary. It's
23317 not clear that it's worth it. */
23324 /* These are supported directly. */
23331 /* AVX has 3 operand comparisons, no need to swap anything. */
23334 /* For commutative operators, try to canonicalize the destination
23335 operand to be first in the comparison - this helps reload to
23336 avoid extra moves. */
23337 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
23345 /* These are not supported directly before AVX, and furthermore
23346 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
23347 comparison operands to transform into something that is
23349 std::swap (*pop0
, *pop1
);
23350 code
= swap_condition (code
);
23354 gcc_unreachable ();
23360 /* Detect conditional moves that exactly match min/max operational
23361 semantics. Note that this is IEEE safe, as long as we don't
23362 interchange the operands.
23364 Returns FALSE if this conditional move doesn't match a MIN/MAX,
23365 and TRUE if the operation is successful and instructions are emitted. */
23368 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
23369 rtx cmp_op1
, rtx if_true
, rtx if_false
)
23377 else if (code
== UNGE
)
23378 std::swap (if_true
, if_false
);
23382 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
23384 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
23389 mode
= GET_MODE (dest
);
23391 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
23392 but MODE may be a vector mode and thus not appropriate. */
23393 if (!flag_finite_math_only
|| flag_signed_zeros
)
23395 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
23398 if_true
= force_reg (mode
, if_true
);
23399 v
= gen_rtvec (2, if_true
, if_false
);
23400 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
23404 code
= is_min
? SMIN
: SMAX
;
23405 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
23408 emit_insn (gen_rtx_SET (dest
, tmp
));
23412 /* Expand an sse vector comparison. Return the register with the result. */
23415 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
23416 rtx op_true
, rtx op_false
)
23418 machine_mode mode
= GET_MODE (dest
);
23419 machine_mode cmp_ops_mode
= GET_MODE (cmp_op0
);
23421 /* In general case result of comparison can differ from operands' type. */
23422 machine_mode cmp_mode
;
23424 /* In AVX512F the result of comparison is an integer mask. */
23425 bool maskcmp
= false;
23428 if (GET_MODE_SIZE (cmp_ops_mode
) == 64)
23430 unsigned int nbits
= GET_MODE_NUNITS (cmp_ops_mode
);
23431 cmp_mode
= int_mode_for_size (nbits
, 0).require ();
23435 cmp_mode
= cmp_ops_mode
;
23438 cmp_op0
= force_reg (cmp_ops_mode
, cmp_op0
);
23439 if (!nonimmediate_operand (cmp_op1
, cmp_ops_mode
))
23440 cmp_op1
= force_reg (cmp_ops_mode
, cmp_op1
);
23443 || (maskcmp
&& cmp_mode
!= mode
)
23444 || (op_true
&& reg_overlap_mentioned_p (dest
, op_true
))
23445 || (op_false
&& reg_overlap_mentioned_p (dest
, op_false
)))
23446 dest
= gen_reg_rtx (maskcmp
? cmp_mode
: mode
);
23448 /* Compare patterns for int modes are unspec in AVX512F only. */
23449 if (maskcmp
&& (code
== GT
|| code
== EQ
))
23451 rtx (*gen
)(rtx
, rtx
, rtx
);
23453 switch (cmp_ops_mode
)
23456 gcc_assert (TARGET_AVX512BW
);
23457 gen
= code
== GT
? gen_avx512bw_gtv64qi3
: gen_avx512bw_eqv64qi3_1
;
23460 gcc_assert (TARGET_AVX512BW
);
23461 gen
= code
== GT
? gen_avx512bw_gtv32hi3
: gen_avx512bw_eqv32hi3_1
;
23464 gen
= code
== GT
? gen_avx512f_gtv16si3
: gen_avx512f_eqv16si3_1
;
23467 gen
= code
== GT
? gen_avx512f_gtv8di3
: gen_avx512f_eqv8di3_1
;
23475 emit_insn (gen (dest
, cmp_op0
, cmp_op1
));
23479 x
= gen_rtx_fmt_ee (code
, cmp_mode
, cmp_op0
, cmp_op1
);
23481 if (cmp_mode
!= mode
&& !maskcmp
)
23483 x
= force_reg (cmp_ops_mode
, x
);
23484 convert_move (dest
, x
, false);
23487 emit_insn (gen_rtx_SET (dest
, x
));
23492 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
23493 operations. This is used for both scalar and vector conditional moves. */
23496 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
23498 machine_mode mode
= GET_MODE (dest
);
23499 machine_mode cmpmode
= GET_MODE (cmp
);
23501 /* In AVX512F the result of comparison is an integer mask. */
23502 bool maskcmp
= (mode
!= cmpmode
&& TARGET_AVX512F
);
23506 /* If we have an integer mask and FP value then we need
23507 to cast mask to FP mode. */
23508 if (mode
!= cmpmode
&& VECTOR_MODE_P (cmpmode
))
23510 cmp
= force_reg (cmpmode
, cmp
);
23511 cmp
= gen_rtx_SUBREG (mode
, cmp
, 0);
23514 if (vector_all_ones_operand (op_true
, mode
)
23515 && rtx_equal_p (op_false
, CONST0_RTX (mode
))
23518 emit_insn (gen_rtx_SET (dest
, cmp
));
23520 else if (op_false
== CONST0_RTX (mode
)
23523 op_true
= force_reg (mode
, op_true
);
23524 x
= gen_rtx_AND (mode
, cmp
, op_true
);
23525 emit_insn (gen_rtx_SET (dest
, x
));
23527 else if (op_true
== CONST0_RTX (mode
)
23530 op_false
= force_reg (mode
, op_false
);
23531 x
= gen_rtx_NOT (mode
, cmp
);
23532 x
= gen_rtx_AND (mode
, x
, op_false
);
23533 emit_insn (gen_rtx_SET (dest
, x
));
23535 else if (INTEGRAL_MODE_P (mode
) && op_true
== CONSTM1_RTX (mode
)
23538 op_false
= force_reg (mode
, op_false
);
23539 x
= gen_rtx_IOR (mode
, cmp
, op_false
);
23540 emit_insn (gen_rtx_SET (dest
, x
));
23542 else if (TARGET_XOP
23545 op_true
= force_reg (mode
, op_true
);
23547 if (!nonimmediate_operand (op_false
, mode
))
23548 op_false
= force_reg (mode
, op_false
);
23550 emit_insn (gen_rtx_SET (dest
, gen_rtx_IF_THEN_ELSE (mode
, cmp
,
23556 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
23559 if (!nonimmediate_operand (op_true
, mode
))
23560 op_true
= force_reg (mode
, op_true
);
23562 op_false
= force_reg (mode
, op_false
);
23568 gen
= gen_sse4_1_blendvps
;
23572 gen
= gen_sse4_1_blendvpd
;
23580 gen
= gen_sse4_1_pblendvb
;
23581 if (mode
!= V16QImode
)
23582 d
= gen_reg_rtx (V16QImode
);
23583 op_false
= gen_lowpart (V16QImode
, op_false
);
23584 op_true
= gen_lowpart (V16QImode
, op_true
);
23585 cmp
= gen_lowpart (V16QImode
, cmp
);
23590 gen
= gen_avx_blendvps256
;
23594 gen
= gen_avx_blendvpd256
;
23602 gen
= gen_avx2_pblendvb
;
23603 if (mode
!= V32QImode
)
23604 d
= gen_reg_rtx (V32QImode
);
23605 op_false
= gen_lowpart (V32QImode
, op_false
);
23606 op_true
= gen_lowpart (V32QImode
, op_true
);
23607 cmp
= gen_lowpart (V32QImode
, cmp
);
23612 gen
= gen_avx512bw_blendmv64qi
;
23615 gen
= gen_avx512bw_blendmv32hi
;
23618 gen
= gen_avx512f_blendmv16si
;
23621 gen
= gen_avx512f_blendmv8di
;
23624 gen
= gen_avx512f_blendmv8df
;
23627 gen
= gen_avx512f_blendmv16sf
;
23636 emit_insn (gen (d
, op_false
, op_true
, cmp
));
23638 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), d
));
23642 op_true
= force_reg (mode
, op_true
);
23644 t2
= gen_reg_rtx (mode
);
23646 t3
= gen_reg_rtx (mode
);
23650 x
= gen_rtx_AND (mode
, op_true
, cmp
);
23651 emit_insn (gen_rtx_SET (t2
, x
));
23653 x
= gen_rtx_NOT (mode
, cmp
);
23654 x
= gen_rtx_AND (mode
, x
, op_false
);
23655 emit_insn (gen_rtx_SET (t3
, x
));
23657 x
= gen_rtx_IOR (mode
, t3
, t2
);
23658 emit_insn (gen_rtx_SET (dest
, x
));
23663 /* Expand a floating-point conditional move. Return true if successful. */
23666 ix86_expand_fp_movcc (rtx operands
[])
23668 machine_mode mode
= GET_MODE (operands
[0]);
23669 enum rtx_code code
= GET_CODE (operands
[1]);
23670 rtx tmp
, compare_op
;
23671 rtx op0
= XEXP (operands
[1], 0);
23672 rtx op1
= XEXP (operands
[1], 1);
23674 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
23676 machine_mode cmode
;
23678 /* Since we've no cmove for sse registers, don't force bad register
23679 allocation just to gain access to it. Deny movcc when the
23680 comparison mode doesn't match the move mode. */
23681 cmode
= GET_MODE (op0
);
23682 if (cmode
== VOIDmode
)
23683 cmode
= GET_MODE (op1
);
23687 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
, &op0
, &op1
);
23688 if (code
== UNKNOWN
)
23691 if (ix86_expand_sse_fp_minmax (operands
[0], code
, op0
, op1
,
23692 operands
[2], operands
[3]))
23695 tmp
= ix86_expand_sse_cmp (operands
[0], code
, op0
, op1
,
23696 operands
[2], operands
[3]);
23697 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
23701 if (GET_MODE (op0
) == TImode
23702 || (GET_MODE (op0
) == DImode
23706 /* The floating point conditional move instructions don't directly
23707 support conditions resulting from a signed integer comparison. */
23709 compare_op
= ix86_expand_compare (code
, op0
, op1
);
23710 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
23712 tmp
= gen_reg_rtx (QImode
);
23713 ix86_expand_setcc (tmp
, code
, op0
, op1
);
23715 compare_op
= ix86_expand_compare (NE
, tmp
, const0_rtx
);
23718 emit_insn (gen_rtx_SET (operands
[0],
23719 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
23720 operands
[2], operands
[3])));
23725 /* Helper for ix86_cmp_code_to_pcmp_immediate for int modes. */
23728 ix86_int_cmp_code_to_pcmp_immediate (enum rtx_code code
)
23749 gcc_unreachable ();
23753 /* Helper for ix86_cmp_code_to_pcmp_immediate for fp modes. */
23756 ix86_fp_cmp_code_to_pcmp_immediate (enum rtx_code code
)
23789 gcc_unreachable ();
23793 /* Return immediate value to be used in UNSPEC_PCMP
23794 for comparison CODE in MODE. */
23797 ix86_cmp_code_to_pcmp_immediate (enum rtx_code code
, machine_mode mode
)
23799 if (FLOAT_MODE_P (mode
))
23800 return ix86_fp_cmp_code_to_pcmp_immediate (code
);
23801 return ix86_int_cmp_code_to_pcmp_immediate (code
);
23804 /* Expand AVX-512 vector comparison. */
23807 ix86_expand_mask_vec_cmp (rtx operands
[])
23809 machine_mode mask_mode
= GET_MODE (operands
[0]);
23810 machine_mode cmp_mode
= GET_MODE (operands
[2]);
23811 enum rtx_code code
= GET_CODE (operands
[1]);
23812 rtx imm
= GEN_INT (ix86_cmp_code_to_pcmp_immediate (code
, cmp_mode
));
23822 unspec_code
= UNSPEC_UNSIGNED_PCMP
;
23826 unspec_code
= UNSPEC_PCMP
;
23829 unspec
= gen_rtx_UNSPEC (mask_mode
, gen_rtvec (3, operands
[2],
23832 emit_insn (gen_rtx_SET (operands
[0], unspec
));
23837 /* Expand fp vector comparison. */
23840 ix86_expand_fp_vec_cmp (rtx operands
[])
23842 enum rtx_code code
= GET_CODE (operands
[1]);
23845 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
23846 &operands
[2], &operands
[3]);
23847 if (code
== UNKNOWN
)
23850 switch (GET_CODE (operands
[1]))
23853 temp
= ix86_expand_sse_cmp (operands
[0], ORDERED
, operands
[2],
23854 operands
[3], NULL
, NULL
);
23855 cmp
= ix86_expand_sse_cmp (operands
[0], NE
, operands
[2],
23856 operands
[3], NULL
, NULL
);
23860 temp
= ix86_expand_sse_cmp (operands
[0], UNORDERED
, operands
[2],
23861 operands
[3], NULL
, NULL
);
23862 cmp
= ix86_expand_sse_cmp (operands
[0], EQ
, operands
[2],
23863 operands
[3], NULL
, NULL
);
23867 gcc_unreachable ();
23869 cmp
= expand_simple_binop (GET_MODE (cmp
), code
, temp
, cmp
, cmp
, 1,
23873 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[2], operands
[3],
23874 operands
[1], operands
[2]);
23876 if (operands
[0] != cmp
)
23877 emit_move_insn (operands
[0], cmp
);
23883 ix86_expand_int_sse_cmp (rtx dest
, enum rtx_code code
, rtx cop0
, rtx cop1
,
23884 rtx op_true
, rtx op_false
, bool *negate
)
23886 machine_mode data_mode
= GET_MODE (dest
);
23887 machine_mode mode
= GET_MODE (cop0
);
23892 /* XOP supports all of the comparisons on all 128-bit vector int types. */
23894 && (mode
== V16QImode
|| mode
== V8HImode
23895 || mode
== V4SImode
|| mode
== V2DImode
))
23899 /* Canonicalize the comparison to EQ, GT, GTU. */
23910 code
= reverse_condition (code
);
23916 code
= reverse_condition (code
);
23922 std::swap (cop0
, cop1
);
23923 code
= swap_condition (code
);
23927 gcc_unreachable ();
23930 /* Only SSE4.1/SSE4.2 supports V2DImode. */
23931 if (mode
== V2DImode
)
23936 /* SSE4.1 supports EQ. */
23937 if (!TARGET_SSE4_1
)
23943 /* SSE4.2 supports GT/GTU. */
23944 if (!TARGET_SSE4_2
)
23949 gcc_unreachable ();
23953 /* Unsigned parallel compare is not supported by the hardware.
23954 Play some tricks to turn this into a signed comparison
23958 cop0
= force_reg (mode
, cop0
);
23970 rtx (*gen_sub3
) (rtx
, rtx
, rtx
);
23974 case E_V16SImode
: gen_sub3
= gen_subv16si3
; break;
23975 case E_V8DImode
: gen_sub3
= gen_subv8di3
; break;
23976 case E_V8SImode
: gen_sub3
= gen_subv8si3
; break;
23977 case E_V4DImode
: gen_sub3
= gen_subv4di3
; break;
23978 case E_V4SImode
: gen_sub3
= gen_subv4si3
; break;
23979 case E_V2DImode
: gen_sub3
= gen_subv2di3
; break;
23981 gcc_unreachable ();
23983 /* Subtract (-(INT MAX) - 1) from both operands to make
23985 mask
= ix86_build_signbit_mask (mode
, true, false);
23986 t1
= gen_reg_rtx (mode
);
23987 emit_insn (gen_sub3 (t1
, cop0
, mask
));
23989 t2
= gen_reg_rtx (mode
);
23990 emit_insn (gen_sub3 (t2
, cop1
, mask
));
24004 /* Perform a parallel unsigned saturating subtraction. */
24005 x
= gen_reg_rtx (mode
);
24006 emit_insn (gen_rtx_SET (x
, gen_rtx_US_MINUS (mode
, cop0
,
24010 cop1
= CONST0_RTX (mode
);
24012 *negate
= !*negate
;
24016 gcc_unreachable ();
24022 std::swap (op_true
, op_false
);
24024 /* Allow the comparison to be done in one mode, but the movcc to
24025 happen in another mode. */
24026 if (data_mode
== mode
)
24028 x
= ix86_expand_sse_cmp (dest
, code
, cop0
, cop1
,
24029 op_true
, op_false
);
24033 gcc_assert (GET_MODE_SIZE (data_mode
) == GET_MODE_SIZE (mode
));
24034 x
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), code
, cop0
, cop1
,
24035 op_true
, op_false
);
24036 if (GET_MODE (x
) == mode
)
24037 x
= gen_lowpart (data_mode
, x
);
24043 /* Expand integer vector comparison. */
24046 ix86_expand_int_vec_cmp (rtx operands
[])
24048 rtx_code code
= GET_CODE (operands
[1]);
24049 bool negate
= false;
24050 rtx cmp
= ix86_expand_int_sse_cmp (operands
[0], code
, operands
[2],
24051 operands
[3], NULL
, NULL
, &negate
);
24057 cmp
= ix86_expand_int_sse_cmp (operands
[0], EQ
, cmp
,
24058 CONST0_RTX (GET_MODE (cmp
)),
24059 NULL
, NULL
, &negate
);
24061 gcc_assert (!negate
);
24063 if (operands
[0] != cmp
)
24064 emit_move_insn (operands
[0], cmp
);
24069 /* Expand a floating-point vector conditional move; a vcond operation
24070 rather than a movcc operation. */
24073 ix86_expand_fp_vcond (rtx operands
[])
24075 enum rtx_code code
= GET_CODE (operands
[3]);
24078 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
24079 &operands
[4], &operands
[5]);
24080 if (code
== UNKNOWN
)
24083 switch (GET_CODE (operands
[3]))
24086 temp
= ix86_expand_sse_cmp (operands
[0], ORDERED
, operands
[4],
24087 operands
[5], operands
[0], operands
[0]);
24088 cmp
= ix86_expand_sse_cmp (operands
[0], NE
, operands
[4],
24089 operands
[5], operands
[1], operands
[2]);
24093 temp
= ix86_expand_sse_cmp (operands
[0], UNORDERED
, operands
[4],
24094 operands
[5], operands
[0], operands
[0]);
24095 cmp
= ix86_expand_sse_cmp (operands
[0], EQ
, operands
[4],
24096 operands
[5], operands
[1], operands
[2]);
24100 gcc_unreachable ();
24102 cmp
= expand_simple_binop (GET_MODE (cmp
), code
, temp
, cmp
, cmp
, 1,
24104 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
24108 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
24109 operands
[5], operands
[1], operands
[2]))
24112 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
24113 operands
[1], operands
[2]);
24114 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
24118 /* Expand a signed/unsigned integral vector conditional move. */
24121 ix86_expand_int_vcond (rtx operands
[])
24123 machine_mode data_mode
= GET_MODE (operands
[0]);
24124 machine_mode mode
= GET_MODE (operands
[4]);
24125 enum rtx_code code
= GET_CODE (operands
[3]);
24126 bool negate
= false;
24129 cop0
= operands
[4];
24130 cop1
= operands
[5];
24132 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
24133 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
24134 if ((code
== LT
|| code
== GE
)
24135 && data_mode
== mode
24136 && cop1
== CONST0_RTX (mode
)
24137 && operands
[1 + (code
== LT
)] == CONST0_RTX (data_mode
)
24138 && GET_MODE_UNIT_SIZE (data_mode
) > 1
24139 && GET_MODE_UNIT_SIZE (data_mode
) <= 8
24140 && (GET_MODE_SIZE (data_mode
) == 16
24141 || (TARGET_AVX2
&& GET_MODE_SIZE (data_mode
) == 32)))
24143 rtx negop
= operands
[2 - (code
== LT
)];
24144 int shift
= GET_MODE_UNIT_BITSIZE (data_mode
) - 1;
24145 if (negop
== CONST1_RTX (data_mode
))
24147 rtx res
= expand_simple_binop (mode
, LSHIFTRT
, cop0
, GEN_INT (shift
),
24148 operands
[0], 1, OPTAB_DIRECT
);
24149 if (res
!= operands
[0])
24150 emit_move_insn (operands
[0], res
);
24153 else if (GET_MODE_INNER (data_mode
) != DImode
24154 && vector_all_ones_operand (negop
, data_mode
))
24156 rtx res
= expand_simple_binop (mode
, ASHIFTRT
, cop0
, GEN_INT (shift
),
24157 operands
[0], 0, OPTAB_DIRECT
);
24158 if (res
!= operands
[0])
24159 emit_move_insn (operands
[0], res
);
24164 if (!nonimmediate_operand (cop1
, mode
))
24165 cop1
= force_reg (mode
, cop1
);
24166 if (!general_operand (operands
[1], data_mode
))
24167 operands
[1] = force_reg (data_mode
, operands
[1]);
24168 if (!general_operand (operands
[2], data_mode
))
24169 operands
[2] = force_reg (data_mode
, operands
[2]);
24171 x
= ix86_expand_int_sse_cmp (operands
[0], code
, cop0
, cop1
,
24172 operands
[1], operands
[2], &negate
);
24177 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
24178 operands
[2-negate
]);
24182 /* AVX512F does support 64-byte integer vector operations,
24183 thus the longest vector we are faced with is V64QImode. */
24184 #define MAX_VECT_LEN 64
24186 struct expand_vec_perm_d
24188 rtx target
, op0
, op1
;
24189 unsigned char perm
[MAX_VECT_LEN
];
24190 machine_mode vmode
;
24191 unsigned char nelt
;
24192 bool one_operand_p
;
24197 ix86_expand_vec_perm_vpermt2 (rtx target
, rtx mask
, rtx op0
, rtx op1
,
24198 struct expand_vec_perm_d
*d
)
24200 /* ix86_expand_vec_perm_vpermt2 is called from both const and non-const
24201 expander, so args are either in d, or in op0, op1 etc. */
24202 machine_mode mode
= GET_MODE (d
? d
->op0
: op0
);
24203 machine_mode maskmode
= mode
;
24204 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
24209 if (TARGET_AVX512VL
&& TARGET_AVX512BW
)
24210 gen
= gen_avx512vl_vpermt2varv8hi3
;
24213 if (TARGET_AVX512VL
&& TARGET_AVX512BW
)
24214 gen
= gen_avx512vl_vpermt2varv16hi3
;
24217 if (TARGET_AVX512VBMI
)
24218 gen
= gen_avx512bw_vpermt2varv64qi3
;
24221 if (TARGET_AVX512BW
)
24222 gen
= gen_avx512bw_vpermt2varv32hi3
;
24225 if (TARGET_AVX512VL
)
24226 gen
= gen_avx512vl_vpermt2varv4si3
;
24229 if (TARGET_AVX512VL
)
24230 gen
= gen_avx512vl_vpermt2varv8si3
;
24233 if (TARGET_AVX512F
)
24234 gen
= gen_avx512f_vpermt2varv16si3
;
24237 if (TARGET_AVX512VL
)
24239 gen
= gen_avx512vl_vpermt2varv4sf3
;
24240 maskmode
= V4SImode
;
24244 if (TARGET_AVX512VL
)
24246 gen
= gen_avx512vl_vpermt2varv8sf3
;
24247 maskmode
= V8SImode
;
24251 if (TARGET_AVX512F
)
24253 gen
= gen_avx512f_vpermt2varv16sf3
;
24254 maskmode
= V16SImode
;
24258 if (TARGET_AVX512VL
)
24259 gen
= gen_avx512vl_vpermt2varv2di3
;
24262 if (TARGET_AVX512VL
)
24263 gen
= gen_avx512vl_vpermt2varv4di3
;
24266 if (TARGET_AVX512F
)
24267 gen
= gen_avx512f_vpermt2varv8di3
;
24270 if (TARGET_AVX512VL
)
24272 gen
= gen_avx512vl_vpermt2varv2df3
;
24273 maskmode
= V2DImode
;
24277 if (TARGET_AVX512VL
)
24279 gen
= gen_avx512vl_vpermt2varv4df3
;
24280 maskmode
= V4DImode
;
24284 if (TARGET_AVX512F
)
24286 gen
= gen_avx512f_vpermt2varv8df3
;
24287 maskmode
= V8DImode
;
24297 /* ix86_expand_vec_perm_vpermt2 is called from both const and non-const
24298 expander, so args are either in d, or in op0, op1 etc. */
24302 target
= d
->target
;
24305 for (int i
= 0; i
< d
->nelt
; ++i
)
24306 vec
[i
] = GEN_INT (d
->perm
[i
]);
24307 mask
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (d
->nelt
, vec
));
24310 emit_insn (gen (target
, force_reg (maskmode
, mask
), op0
, op1
));
24314 /* Expand a variable vector permutation. */
24317 ix86_expand_vec_perm (rtx operands
[])
24319 rtx target
= operands
[0];
24320 rtx op0
= operands
[1];
24321 rtx op1
= operands
[2];
24322 rtx mask
= operands
[3];
24323 rtx t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
, vt
, vt2
, vec
[32];
24324 machine_mode mode
= GET_MODE (op0
);
24325 machine_mode maskmode
= GET_MODE (mask
);
24327 bool one_operand_shuffle
= rtx_equal_p (op0
, op1
);
24329 /* Number of elements in the vector. */
24330 w
= GET_MODE_NUNITS (mode
);
24331 e
= GET_MODE_UNIT_SIZE (mode
);
24332 gcc_assert (w
<= 64);
24334 if (TARGET_AVX512F
&& one_operand_shuffle
)
24336 rtx (*gen
) (rtx
, rtx
, rtx
) = NULL
;
24340 gen
=gen_avx512f_permvarv16si
;
24343 gen
= gen_avx512f_permvarv16sf
;
24346 gen
= gen_avx512f_permvarv8di
;
24349 gen
= gen_avx512f_permvarv8df
;
24356 emit_insn (gen (target
, op0
, mask
));
24361 if (ix86_expand_vec_perm_vpermt2 (target
, mask
, op0
, op1
, NULL
))
24366 if (mode
== V4DImode
|| mode
== V4DFmode
|| mode
== V16HImode
)
24368 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
24369 an constant shuffle operand. With a tiny bit of effort we can
24370 use VPERMD instead. A re-interpretation stall for V4DFmode is
24371 unfortunate but there's no avoiding it.
24372 Similarly for V16HImode we don't have instructions for variable
24373 shuffling, while for V32QImode we can use after preparing suitable
24374 masks vpshufb; vpshufb; vpermq; vpor. */
24376 if (mode
== V16HImode
)
24378 maskmode
= mode
= V32QImode
;
24384 maskmode
= mode
= V8SImode
;
24388 t1
= gen_reg_rtx (maskmode
);
24390 /* Replicate the low bits of the V4DImode mask into V8SImode:
24392 t1 = { A A B B C C D D }. */
24393 for (i
= 0; i
< w
/ 2; ++i
)
24394 vec
[i
*2 + 1] = vec
[i
*2] = GEN_INT (i
* 2);
24395 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
24396 vt
= force_reg (maskmode
, vt
);
24397 mask
= gen_lowpart (maskmode
, mask
);
24398 if (maskmode
== V8SImode
)
24399 emit_insn (gen_avx2_permvarv8si (t1
, mask
, vt
));
24401 emit_insn (gen_avx2_pshufbv32qi3 (t1
, mask
, vt
));
24403 /* Multiply the shuffle indicies by two. */
24404 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, t1
, t1
, 1,
24407 /* Add one to the odd shuffle indicies:
24408 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
24409 for (i
= 0; i
< w
/ 2; ++i
)
24411 vec
[i
* 2] = const0_rtx
;
24412 vec
[i
* 2 + 1] = const1_rtx
;
24414 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
24415 vt
= validize_mem (force_const_mem (maskmode
, vt
));
24416 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, vt
, t1
, 1,
24419 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
24420 operands
[3] = mask
= t1
;
24421 target
= gen_reg_rtx (mode
);
24422 op0
= gen_lowpart (mode
, op0
);
24423 op1
= gen_lowpart (mode
, op1
);
24429 /* The VPERMD and VPERMPS instructions already properly ignore
24430 the high bits of the shuffle elements. No need for us to
24431 perform an AND ourselves. */
24432 if (one_operand_shuffle
)
24434 emit_insn (gen_avx2_permvarv8si (target
, op0
, mask
));
24435 if (target
!= operands
[0])
24436 emit_move_insn (operands
[0],
24437 gen_lowpart (GET_MODE (operands
[0]), target
));
24441 t1
= gen_reg_rtx (V8SImode
);
24442 t2
= gen_reg_rtx (V8SImode
);
24443 emit_insn (gen_avx2_permvarv8si (t1
, op0
, mask
));
24444 emit_insn (gen_avx2_permvarv8si (t2
, op1
, mask
));
24450 mask
= gen_lowpart (V8SImode
, mask
);
24451 if (one_operand_shuffle
)
24452 emit_insn (gen_avx2_permvarv8sf (target
, op0
, mask
));
24455 t1
= gen_reg_rtx (V8SFmode
);
24456 t2
= gen_reg_rtx (V8SFmode
);
24457 emit_insn (gen_avx2_permvarv8sf (t1
, op0
, mask
));
24458 emit_insn (gen_avx2_permvarv8sf (t2
, op1
, mask
));
24464 /* By combining the two 128-bit input vectors into one 256-bit
24465 input vector, we can use VPERMD and VPERMPS for the full
24466 two-operand shuffle. */
24467 t1
= gen_reg_rtx (V8SImode
);
24468 t2
= gen_reg_rtx (V8SImode
);
24469 emit_insn (gen_avx_vec_concatv8si (t1
, op0
, op1
));
24470 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
24471 emit_insn (gen_avx2_permvarv8si (t1
, t1
, t2
));
24472 emit_insn (gen_avx_vextractf128v8si (target
, t1
, const0_rtx
));
24476 t1
= gen_reg_rtx (V8SFmode
);
24477 t2
= gen_reg_rtx (V8SImode
);
24478 mask
= gen_lowpart (V4SImode
, mask
);
24479 emit_insn (gen_avx_vec_concatv8sf (t1
, op0
, op1
));
24480 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
24481 emit_insn (gen_avx2_permvarv8sf (t1
, t1
, t2
));
24482 emit_insn (gen_avx_vextractf128v8sf (target
, t1
, const0_rtx
));
24486 t1
= gen_reg_rtx (V32QImode
);
24487 t2
= gen_reg_rtx (V32QImode
);
24488 t3
= gen_reg_rtx (V32QImode
);
24489 vt2
= GEN_INT (-128);
24490 vt
= gen_const_vec_duplicate (V32QImode
, vt2
);
24491 vt
= force_reg (V32QImode
, vt
);
24492 for (i
= 0; i
< 32; i
++)
24493 vec
[i
] = i
< 16 ? vt2
: const0_rtx
;
24494 vt2
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
24495 vt2
= force_reg (V32QImode
, vt2
);
24496 /* From mask create two adjusted masks, which contain the same
24497 bits as mask in the low 7 bits of each vector element.
24498 The first mask will have the most significant bit clear
24499 if it requests element from the same 128-bit lane
24500 and MSB set if it requests element from the other 128-bit lane.
24501 The second mask will have the opposite values of the MSB,
24502 and additionally will have its 128-bit lanes swapped.
24503 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
24504 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
24505 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
24506 stands for other 12 bytes. */
24507 /* The bit whether element is from the same lane or the other
24508 lane is bit 4, so shift it up by 3 to the MSB position. */
24509 t5
= gen_reg_rtx (V4DImode
);
24510 emit_insn (gen_ashlv4di3 (t5
, gen_lowpart (V4DImode
, mask
),
24512 /* Clear MSB bits from the mask just in case it had them set. */
24513 emit_insn (gen_avx2_andnotv32qi3 (t2
, vt
, mask
));
24514 /* After this t1 will have MSB set for elements from other lane. */
24515 emit_insn (gen_xorv32qi3 (t1
, gen_lowpart (V32QImode
, t5
), vt2
));
24516 /* Clear bits other than MSB. */
24517 emit_insn (gen_andv32qi3 (t1
, t1
, vt
));
24518 /* Or in the lower bits from mask into t3. */
24519 emit_insn (gen_iorv32qi3 (t3
, t1
, t2
));
24520 /* And invert MSB bits in t1, so MSB is set for elements from the same
24522 emit_insn (gen_xorv32qi3 (t1
, t1
, vt
));
24523 /* Swap 128-bit lanes in t3. */
24524 t6
= gen_reg_rtx (V4DImode
);
24525 emit_insn (gen_avx2_permv4di_1 (t6
, gen_lowpart (V4DImode
, t3
),
24526 const2_rtx
, GEN_INT (3),
24527 const0_rtx
, const1_rtx
));
24528 /* And or in the lower bits from mask into t1. */
24529 emit_insn (gen_iorv32qi3 (t1
, t1
, t2
));
24530 if (one_operand_shuffle
)
24532 /* Each of these shuffles will put 0s in places where
24533 element from the other 128-bit lane is needed, otherwise
24534 will shuffle in the requested value. */
24535 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op0
,
24536 gen_lowpart (V32QImode
, t6
)));
24537 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op0
, t1
));
24538 /* For t3 the 128-bit lanes are swapped again. */
24539 t7
= gen_reg_rtx (V4DImode
);
24540 emit_insn (gen_avx2_permv4di_1 (t7
, gen_lowpart (V4DImode
, t3
),
24541 const2_rtx
, GEN_INT (3),
24542 const0_rtx
, const1_rtx
));
24543 /* And oring both together leads to the result. */
24544 emit_insn (gen_iorv32qi3 (target
, t1
,
24545 gen_lowpart (V32QImode
, t7
)));
24546 if (target
!= operands
[0])
24547 emit_move_insn (operands
[0],
24548 gen_lowpart (GET_MODE (operands
[0]), target
));
24552 t4
= gen_reg_rtx (V32QImode
);
24553 /* Similarly to the above one_operand_shuffle code,
24554 just for repeated twice for each operand. merge_two:
24555 code will merge the two results together. */
24556 emit_insn (gen_avx2_pshufbv32qi3 (t4
, op0
,
24557 gen_lowpart (V32QImode
, t6
)));
24558 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op1
,
24559 gen_lowpart (V32QImode
, t6
)));
24560 emit_insn (gen_avx2_pshufbv32qi3 (t2
, op0
, t1
));
24561 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op1
, t1
));
24562 t7
= gen_reg_rtx (V4DImode
);
24563 emit_insn (gen_avx2_permv4di_1 (t7
, gen_lowpart (V4DImode
, t4
),
24564 const2_rtx
, GEN_INT (3),
24565 const0_rtx
, const1_rtx
));
24566 t8
= gen_reg_rtx (V4DImode
);
24567 emit_insn (gen_avx2_permv4di_1 (t8
, gen_lowpart (V4DImode
, t3
),
24568 const2_rtx
, GEN_INT (3),
24569 const0_rtx
, const1_rtx
));
24570 emit_insn (gen_iorv32qi3 (t4
, t2
, gen_lowpart (V32QImode
, t7
)));
24571 emit_insn (gen_iorv32qi3 (t3
, t1
, gen_lowpart (V32QImode
, t8
)));
24577 gcc_assert (GET_MODE_SIZE (mode
) <= 16);
24584 /* The XOP VPPERM insn supports three inputs. By ignoring the
24585 one_operand_shuffle special case, we avoid creating another
24586 set of constant vectors in memory. */
24587 one_operand_shuffle
= false;
24589 /* mask = mask & {2*w-1, ...} */
24590 vt
= GEN_INT (2*w
- 1);
24594 /* mask = mask & {w-1, ...} */
24595 vt
= GEN_INT (w
- 1);
24598 vt
= gen_const_vec_duplicate (maskmode
, vt
);
24599 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
24600 NULL_RTX
, 0, OPTAB_DIRECT
);
24602 /* For non-QImode operations, convert the word permutation control
24603 into a byte permutation control. */
24604 if (mode
!= V16QImode
)
24606 mask
= expand_simple_binop (maskmode
, ASHIFT
, mask
,
24607 GEN_INT (exact_log2 (e
)),
24608 NULL_RTX
, 0, OPTAB_DIRECT
);
24610 /* Convert mask to vector of chars. */
24611 mask
= force_reg (V16QImode
, gen_lowpart (V16QImode
, mask
));
24613 /* Replicate each of the input bytes into byte positions:
24614 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
24615 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
24616 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
24617 for (i
= 0; i
< 16; ++i
)
24618 vec
[i
] = GEN_INT (i
/e
* e
);
24619 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
24620 vt
= validize_mem (force_const_mem (V16QImode
, vt
));
24622 emit_insn (gen_xop_pperm (mask
, mask
, mask
, vt
));
24624 emit_insn (gen_ssse3_pshufbv16qi3 (mask
, mask
, vt
));
24626 /* Convert it into the byte positions by doing
24627 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
24628 for (i
= 0; i
< 16; ++i
)
24629 vec
[i
] = GEN_INT (i
% e
);
24630 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
24631 vt
= validize_mem (force_const_mem (V16QImode
, vt
));
24632 emit_insn (gen_addv16qi3 (mask
, mask
, vt
));
24635 /* The actual shuffle operations all operate on V16QImode. */
24636 op0
= gen_lowpart (V16QImode
, op0
);
24637 op1
= gen_lowpart (V16QImode
, op1
);
24641 if (GET_MODE (target
) != V16QImode
)
24642 target
= gen_reg_rtx (V16QImode
);
24643 emit_insn (gen_xop_pperm (target
, op0
, op1
, mask
));
24644 if (target
!= operands
[0])
24645 emit_move_insn (operands
[0],
24646 gen_lowpart (GET_MODE (operands
[0]), target
));
24648 else if (one_operand_shuffle
)
24650 if (GET_MODE (target
) != V16QImode
)
24651 target
= gen_reg_rtx (V16QImode
);
24652 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, mask
));
24653 if (target
!= operands
[0])
24654 emit_move_insn (operands
[0],
24655 gen_lowpart (GET_MODE (operands
[0]), target
));
24662 /* Shuffle the two input vectors independently. */
24663 t1
= gen_reg_rtx (V16QImode
);
24664 t2
= gen_reg_rtx (V16QImode
);
24665 emit_insn (gen_ssse3_pshufbv16qi3 (t1
, op0
, mask
));
24666 emit_insn (gen_ssse3_pshufbv16qi3 (t2
, op1
, mask
));
24669 /* Then merge them together. The key is whether any given control
24670 element contained a bit set that indicates the second word. */
24671 mask
= operands
[3];
24673 if (maskmode
== V2DImode
&& !TARGET_SSE4_1
)
24675 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
24676 more shuffle to convert the V2DI input mask into a V4SI
24677 input mask. At which point the masking that expand_int_vcond
24678 will work as desired. */
24679 rtx t3
= gen_reg_rtx (V4SImode
);
24680 emit_insn (gen_sse2_pshufd_1 (t3
, gen_lowpart (V4SImode
, mask
),
24681 const0_rtx
, const0_rtx
,
24682 const2_rtx
, const2_rtx
));
24684 maskmode
= V4SImode
;
24688 vt
= gen_const_vec_duplicate (maskmode
, vt
);
24689 vt
= force_reg (maskmode
, vt
);
24690 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
24691 NULL_RTX
, 0, OPTAB_DIRECT
);
24693 if (GET_MODE (target
) != mode
)
24694 target
= gen_reg_rtx (mode
);
24696 xops
[1] = gen_lowpart (mode
, t2
);
24697 xops
[2] = gen_lowpart (mode
, t1
);
24698 xops
[3] = gen_rtx_EQ (maskmode
, mask
, vt
);
24701 ok
= ix86_expand_int_vcond (xops
);
24703 if (target
!= operands
[0])
24704 emit_move_insn (operands
[0],
24705 gen_lowpart (GET_MODE (operands
[0]), target
));
24709 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
24710 true if we should do zero extension, else sign extension. HIGH_P is
24711 true if we want the N/2 high elements, else the low elements. */
24714 ix86_expand_sse_unpack (rtx dest
, rtx src
, bool unsigned_p
, bool high_p
)
24716 machine_mode imode
= GET_MODE (src
);
24721 rtx (*unpack
)(rtx
, rtx
);
24722 rtx (*extract
)(rtx
, rtx
) = NULL
;
24723 machine_mode halfmode
= BLKmode
;
24729 unpack
= gen_avx512bw_zero_extendv32qiv32hi2
;
24731 unpack
= gen_avx512bw_sign_extendv32qiv32hi2
;
24732 halfmode
= V32QImode
;
24734 = high_p
? gen_vec_extract_hi_v64qi
: gen_vec_extract_lo_v64qi
;
24738 unpack
= gen_avx2_zero_extendv16qiv16hi2
;
24740 unpack
= gen_avx2_sign_extendv16qiv16hi2
;
24741 halfmode
= V16QImode
;
24743 = high_p
? gen_vec_extract_hi_v32qi
: gen_vec_extract_lo_v32qi
;
24747 unpack
= gen_avx512f_zero_extendv16hiv16si2
;
24749 unpack
= gen_avx512f_sign_extendv16hiv16si2
;
24750 halfmode
= V16HImode
;
24752 = high_p
? gen_vec_extract_hi_v32hi
: gen_vec_extract_lo_v32hi
;
24756 unpack
= gen_avx2_zero_extendv8hiv8si2
;
24758 unpack
= gen_avx2_sign_extendv8hiv8si2
;
24759 halfmode
= V8HImode
;
24761 = high_p
? gen_vec_extract_hi_v16hi
: gen_vec_extract_lo_v16hi
;
24765 unpack
= gen_avx512f_zero_extendv8siv8di2
;
24767 unpack
= gen_avx512f_sign_extendv8siv8di2
;
24768 halfmode
= V8SImode
;
24770 = high_p
? gen_vec_extract_hi_v16si
: gen_vec_extract_lo_v16si
;
24774 unpack
= gen_avx2_zero_extendv4siv4di2
;
24776 unpack
= gen_avx2_sign_extendv4siv4di2
;
24777 halfmode
= V4SImode
;
24779 = high_p
? gen_vec_extract_hi_v8si
: gen_vec_extract_lo_v8si
;
24783 unpack
= gen_sse4_1_zero_extendv8qiv8hi2
;
24785 unpack
= gen_sse4_1_sign_extendv8qiv8hi2
;
24789 unpack
= gen_sse4_1_zero_extendv4hiv4si2
;
24791 unpack
= gen_sse4_1_sign_extendv4hiv4si2
;
24795 unpack
= gen_sse4_1_zero_extendv2siv2di2
;
24797 unpack
= gen_sse4_1_sign_extendv2siv2di2
;
24800 gcc_unreachable ();
24803 if (GET_MODE_SIZE (imode
) >= 32)
24805 tmp
= gen_reg_rtx (halfmode
);
24806 emit_insn (extract (tmp
, src
));
24810 /* Shift higher 8 bytes to lower 8 bytes. */
24811 tmp
= gen_reg_rtx (V1TImode
);
24812 emit_insn (gen_sse2_lshrv1ti3 (tmp
, gen_lowpart (V1TImode
, src
),
24814 tmp
= gen_lowpart (imode
, tmp
);
24819 emit_insn (unpack (dest
, tmp
));
24823 rtx (*unpack
)(rtx
, rtx
, rtx
);
24829 unpack
= gen_vec_interleave_highv16qi
;
24831 unpack
= gen_vec_interleave_lowv16qi
;
24835 unpack
= gen_vec_interleave_highv8hi
;
24837 unpack
= gen_vec_interleave_lowv8hi
;
24841 unpack
= gen_vec_interleave_highv4si
;
24843 unpack
= gen_vec_interleave_lowv4si
;
24846 gcc_unreachable ();
24850 tmp
= force_reg (imode
, CONST0_RTX (imode
));
24852 tmp
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
24853 src
, pc_rtx
, pc_rtx
);
24855 rtx tmp2
= gen_reg_rtx (imode
);
24856 emit_insn (unpack (tmp2
, src
, tmp
));
24857 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), tmp2
));
24861 /* Expand conditional increment or decrement using adb/sbb instructions.
24862 The default case using setcc followed by the conditional move can be
24863 done by generic code. */
24865 ix86_expand_int_addcc (rtx operands
[])
24867 enum rtx_code code
= GET_CODE (operands
[1]);
24869 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
, rtx
);
24871 rtx val
= const0_rtx
;
24872 bool fpcmp
= false;
24874 rtx op0
= XEXP (operands
[1], 0);
24875 rtx op1
= XEXP (operands
[1], 1);
24877 if (operands
[3] != const1_rtx
24878 && operands
[3] != constm1_rtx
)
24880 if (!ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
24882 code
= GET_CODE (compare_op
);
24884 flags
= XEXP (compare_op
, 0);
24886 if (GET_MODE (flags
) == CCFPmode
)
24889 code
= ix86_fp_compare_code_to_integer (code
);
24896 PUT_CODE (compare_op
,
24897 reverse_condition_maybe_unordered
24898 (GET_CODE (compare_op
)));
24900 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
24903 mode
= GET_MODE (operands
[0]);
24905 /* Construct either adc or sbb insn. */
24906 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
24911 insn
= gen_subqi3_carry
;
24914 insn
= gen_subhi3_carry
;
24917 insn
= gen_subsi3_carry
;
24920 insn
= gen_subdi3_carry
;
24923 gcc_unreachable ();
24931 insn
= gen_addqi3_carry
;
24934 insn
= gen_addhi3_carry
;
24937 insn
= gen_addsi3_carry
;
24940 insn
= gen_adddi3_carry
;
24943 gcc_unreachable ();
24946 emit_insn (insn (operands
[0], operands
[2], val
, flags
, compare_op
));
24952 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
24953 but works for floating pointer parameters and nonoffsetable memories.
24954 For pushes, it returns just stack offsets; the values will be saved
24955 in the right order. Maximally three parts are generated. */
24958 ix86_split_to_parts (rtx operand
, rtx
*parts
, machine_mode mode
)
24963 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
24965 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
24967 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
24968 gcc_assert (size
>= 2 && size
<= 4);
24970 /* Optimize constant pool reference to immediates. This is used by fp
24971 moves, that force all constants to memory to allow combining. */
24972 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
24973 operand
= avoid_constant_pool_reference (operand
);
24975 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
24977 /* The only non-offsetable memories we handle are pushes. */
24978 int ok
= push_operand (operand
, VOIDmode
);
24982 operand
= copy_rtx (operand
);
24983 PUT_MODE (operand
, word_mode
);
24984 parts
[0] = parts
[1] = parts
[2] = parts
[3] = operand
;
24988 if (GET_CODE (operand
) == CONST_VECTOR
)
24990 scalar_int_mode imode
= int_mode_for_mode (mode
).require ();
24991 /* Caution: if we looked through a constant pool memory above,
24992 the operand may actually have a different mode now. That's
24993 ok, since we want to pun this all the way back to an integer. */
24994 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
24995 gcc_assert (operand
!= NULL
);
25001 if (mode
== DImode
)
25002 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
25007 if (REG_P (operand
))
25009 gcc_assert (reload_completed
);
25010 for (i
= 0; i
< size
; i
++)
25011 parts
[i
] = gen_rtx_REG (SImode
, REGNO (operand
) + i
);
25013 else if (offsettable_memref_p (operand
))
25015 operand
= adjust_address (operand
, SImode
, 0);
25016 parts
[0] = operand
;
25017 for (i
= 1; i
< size
; i
++)
25018 parts
[i
] = adjust_address (operand
, SImode
, 4 * i
);
25020 else if (CONST_DOUBLE_P (operand
))
25022 const REAL_VALUE_TYPE
*r
;
25025 r
= CONST_DOUBLE_REAL_VALUE (operand
);
25029 real_to_target (l
, r
, mode
);
25030 parts
[3] = gen_int_mode (l
[3], SImode
);
25031 parts
[2] = gen_int_mode (l
[2], SImode
);
25034 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
25035 long double may not be 80-bit. */
25036 real_to_target (l
, r
, mode
);
25037 parts
[2] = gen_int_mode (l
[2], SImode
);
25040 REAL_VALUE_TO_TARGET_DOUBLE (*r
, l
);
25043 gcc_unreachable ();
25045 parts
[1] = gen_int_mode (l
[1], SImode
);
25046 parts
[0] = gen_int_mode (l
[0], SImode
);
25049 gcc_unreachable ();
25054 if (mode
== TImode
)
25055 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
25056 if (mode
== XFmode
|| mode
== TFmode
)
25058 machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
25059 if (REG_P (operand
))
25061 gcc_assert (reload_completed
);
25062 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
25063 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
25065 else if (offsettable_memref_p (operand
))
25067 operand
= adjust_address (operand
, DImode
, 0);
25068 parts
[0] = operand
;
25069 parts
[1] = adjust_address (operand
, upper_mode
, 8);
25071 else if (CONST_DOUBLE_P (operand
))
25075 real_to_target (l
, CONST_DOUBLE_REAL_VALUE (operand
), mode
);
25077 /* real_to_target puts 32-bit pieces in each long. */
25078 parts
[0] = gen_int_mode ((l
[0] & HOST_WIDE_INT_C (0xffffffff))
25079 | ((l
[1] & HOST_WIDE_INT_C (0xffffffff))
25082 if (upper_mode
== SImode
)
25083 parts
[1] = gen_int_mode (l
[2], SImode
);
25086 = gen_int_mode ((l
[2] & HOST_WIDE_INT_C (0xffffffff))
25087 | ((l
[3] & HOST_WIDE_INT_C (0xffffffff))
25091 gcc_unreachable ();
25098 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
25099 Return false when normal moves are needed; true when all required
25100 insns have been emitted. Operands 2-4 contain the input values
25101 int the correct order; operands 5-7 contain the output values. */
25104 ix86_split_long_move (rtx operands
[])
25109 int collisions
= 0;
25110 machine_mode mode
= GET_MODE (operands
[0]);
25111 bool collisionparts
[4];
25113 /* The DFmode expanders may ask us to move double.
25114 For 64bit target this is single move. By hiding the fact
25115 here we simplify i386.md splitters. */
25116 if (TARGET_64BIT
&& GET_MODE_SIZE (GET_MODE (operands
[0])) == 8)
25118 /* Optimize constant pool reference to immediates. This is used by
25119 fp moves, that force all constants to memory to allow combining. */
25121 if (MEM_P (operands
[1])
25122 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
25123 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
25124 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
25125 if (push_operand (operands
[0], VOIDmode
))
25127 operands
[0] = copy_rtx (operands
[0]);
25128 PUT_MODE (operands
[0], word_mode
);
25131 operands
[0] = gen_lowpart (DImode
, operands
[0]);
25132 operands
[1] = gen_lowpart (DImode
, operands
[1]);
25133 emit_move_insn (operands
[0], operands
[1]);
25137 /* The only non-offsettable memory we handle is push. */
25138 if (push_operand (operands
[0], VOIDmode
))
25141 gcc_assert (!MEM_P (operands
[0])
25142 || offsettable_memref_p (operands
[0]));
25144 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
25145 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
25147 /* When emitting push, take care for source operands on the stack. */
25148 if (push
&& MEM_P (operands
[1])
25149 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
25151 rtx src_base
= XEXP (part
[1][nparts
- 1], 0);
25153 /* Compensate for the stack decrement by 4. */
25154 if (!TARGET_64BIT
&& nparts
== 3
25155 && mode
== XFmode
&& TARGET_128BIT_LONG_DOUBLE
)
25156 src_base
= plus_constant (Pmode
, src_base
, 4);
25158 /* src_base refers to the stack pointer and is
25159 automatically decreased by emitted push. */
25160 for (i
= 0; i
< nparts
; i
++)
25161 part
[1][i
] = change_address (part
[1][i
],
25162 GET_MODE (part
[1][i
]), src_base
);
25165 /* We need to do copy in the right order in case an address register
25166 of the source overlaps the destination. */
25167 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
25171 for (i
= 0; i
< nparts
; i
++)
25174 = reg_overlap_mentioned_p (part
[0][i
], XEXP (part
[1][0], 0));
25175 if (collisionparts
[i
])
25179 /* Collision in the middle part can be handled by reordering. */
25180 if (collisions
== 1 && nparts
== 3 && collisionparts
[1])
25182 std::swap (part
[0][1], part
[0][2]);
25183 std::swap (part
[1][1], part
[1][2]);
25185 else if (collisions
== 1
25187 && (collisionparts
[1] || collisionparts
[2]))
25189 if (collisionparts
[1])
25191 std::swap (part
[0][1], part
[0][2]);
25192 std::swap (part
[1][1], part
[1][2]);
25196 std::swap (part
[0][2], part
[0][3]);
25197 std::swap (part
[1][2], part
[1][3]);
25201 /* If there are more collisions, we can't handle it by reordering.
25202 Do an lea to the last part and use only one colliding move. */
25203 else if (collisions
> 1)
25209 base
= part
[0][nparts
- 1];
25211 /* Handle the case when the last part isn't valid for lea.
25212 Happens in 64-bit mode storing the 12-byte XFmode. */
25213 if (GET_MODE (base
) != Pmode
)
25214 base
= gen_rtx_REG (Pmode
, REGNO (base
));
25216 addr
= XEXP (part
[1][0], 0);
25217 if (TARGET_TLS_DIRECT_SEG_REFS
)
25219 struct ix86_address parts
;
25220 int ok
= ix86_decompose_address (addr
, &parts
);
25222 /* It is not valid to use %gs: or %fs: in lea. */
25223 gcc_assert (parts
.seg
== ADDR_SPACE_GENERIC
);
25225 emit_insn (gen_rtx_SET (base
, addr
));
25226 part
[1][0] = replace_equiv_address (part
[1][0], base
);
25227 for (i
= 1; i
< nparts
; i
++)
25229 tmp
= plus_constant (Pmode
, base
, UNITS_PER_WORD
* i
);
25230 part
[1][i
] = replace_equiv_address (part
[1][i
], tmp
);
25241 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
25242 emit_insn (ix86_gen_add3 (stack_pointer_rtx
,
25243 stack_pointer_rtx
, GEN_INT (-4)));
25244 emit_move_insn (part
[0][2], part
[1][2]);
25246 else if (nparts
== 4)
25248 emit_move_insn (part
[0][3], part
[1][3]);
25249 emit_move_insn (part
[0][2], part
[1][2]);
25254 /* In 64bit mode we don't have 32bit push available. In case this is
25255 register, it is OK - we will just use larger counterpart. We also
25256 retype memory - these comes from attempt to avoid REX prefix on
25257 moving of second half of TFmode value. */
25258 if (GET_MODE (part
[1][1]) == SImode
)
25260 switch (GET_CODE (part
[1][1]))
25263 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
25267 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
25271 gcc_unreachable ();
25274 if (GET_MODE (part
[1][0]) == SImode
)
25275 part
[1][0] = part
[1][1];
25278 emit_move_insn (part
[0][1], part
[1][1]);
25279 emit_move_insn (part
[0][0], part
[1][0]);
25283 /* Choose correct order to not overwrite the source before it is copied. */
25284 if ((REG_P (part
[0][0])
25285 && REG_P (part
[1][1])
25286 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
25288 && REGNO (part
[0][0]) == REGNO (part
[1][2]))
25290 && REGNO (part
[0][0]) == REGNO (part
[1][3]))))
25292 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
25294 for (i
= 0, j
= nparts
- 1; i
< nparts
; i
++, j
--)
25296 operands
[2 + i
] = part
[0][j
];
25297 operands
[6 + i
] = part
[1][j
];
25302 for (i
= 0; i
< nparts
; i
++)
25304 operands
[2 + i
] = part
[0][i
];
25305 operands
[6 + i
] = part
[1][i
];
25309 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
25310 if (optimize_insn_for_size_p ())
25312 for (j
= 0; j
< nparts
- 1; j
++)
25313 if (CONST_INT_P (operands
[6 + j
])
25314 && operands
[6 + j
] != const0_rtx
25315 && REG_P (operands
[2 + j
]))
25316 for (i
= j
; i
< nparts
- 1; i
++)
25317 if (CONST_INT_P (operands
[7 + i
])
25318 && INTVAL (operands
[7 + i
]) == INTVAL (operands
[6 + j
]))
25319 operands
[7 + i
] = operands
[2 + j
];
25322 for (i
= 0; i
< nparts
; i
++)
25323 emit_move_insn (operands
[2 + i
], operands
[6 + i
]);
25328 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
25329 left shift by a constant, either using a single shift or
25330 a sequence of add instructions. */
25333 ix86_expand_ashl_const (rtx operand
, int count
, machine_mode mode
)
25335 rtx (*insn
)(rtx
, rtx
, rtx
);
25338 || (count
* ix86_cost
->add
<= ix86_cost
->shift_const
25339 && !optimize_insn_for_size_p ()))
25341 insn
= mode
== DImode
? gen_addsi3
: gen_adddi3
;
25342 while (count
-- > 0)
25343 emit_insn (insn (operand
, operand
, operand
));
25347 insn
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
25348 emit_insn (insn (operand
, operand
, GEN_INT (count
)));
25353 ix86_split_ashl (rtx
*operands
, rtx scratch
, machine_mode mode
)
25355 rtx (*gen_ashl3
)(rtx
, rtx
, rtx
);
25356 rtx (*gen_shld
)(rtx
, rtx
, rtx
);
25357 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
25359 rtx low
[2], high
[2];
25362 if (CONST_INT_P (operands
[2]))
25364 split_double_mode (mode
, operands
, 2, low
, high
);
25365 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
25367 if (count
>= half_width
)
25369 emit_move_insn (high
[0], low
[1]);
25370 emit_move_insn (low
[0], const0_rtx
);
25372 if (count
> half_width
)
25373 ix86_expand_ashl_const (high
[0], count
- half_width
, mode
);
25377 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
25379 if (!rtx_equal_p (operands
[0], operands
[1]))
25380 emit_move_insn (operands
[0], operands
[1]);
25382 emit_insn (gen_shld (high
[0], low
[0], GEN_INT (count
)));
25383 ix86_expand_ashl_const (low
[0], count
, mode
);
25388 split_double_mode (mode
, operands
, 1, low
, high
);
25390 gen_ashl3
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
25392 if (operands
[1] == const1_rtx
)
25394 /* Assuming we've chosen a QImode capable registers, then 1 << N
25395 can be done with two 32/64-bit shifts, no branches, no cmoves. */
25396 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
25398 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
25400 ix86_expand_clear (low
[0]);
25401 ix86_expand_clear (high
[0]);
25402 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (half_width
)));
25404 d
= gen_lowpart (QImode
, low
[0]);
25405 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
25406 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
25407 emit_insn (gen_rtx_SET (d
, s
));
25409 d
= gen_lowpart (QImode
, high
[0]);
25410 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
25411 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
25412 emit_insn (gen_rtx_SET (d
, s
));
25415 /* Otherwise, we can get the same results by manually performing
25416 a bit extract operation on bit 5/6, and then performing the two
25417 shifts. The two methods of getting 0/1 into low/high are exactly
25418 the same size. Avoiding the shift in the bit extract case helps
25419 pentium4 a bit; no one else seems to care much either way. */
25422 machine_mode half_mode
;
25423 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
);
25424 rtx (*gen_and3
)(rtx
, rtx
, rtx
);
25425 rtx (*gen_xor3
)(rtx
, rtx
, rtx
);
25426 HOST_WIDE_INT bits
;
25429 if (mode
== DImode
)
25431 half_mode
= SImode
;
25432 gen_lshr3
= gen_lshrsi3
;
25433 gen_and3
= gen_andsi3
;
25434 gen_xor3
= gen_xorsi3
;
25439 half_mode
= DImode
;
25440 gen_lshr3
= gen_lshrdi3
;
25441 gen_and3
= gen_anddi3
;
25442 gen_xor3
= gen_xordi3
;
25446 if (TARGET_PARTIAL_REG_STALL
&& !optimize_insn_for_size_p ())
25447 x
= gen_rtx_ZERO_EXTEND (half_mode
, operands
[2]);
25449 x
= gen_lowpart (half_mode
, operands
[2]);
25450 emit_insn (gen_rtx_SET (high
[0], x
));
25452 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (bits
)));
25453 emit_insn (gen_and3 (high
[0], high
[0], const1_rtx
));
25454 emit_move_insn (low
[0], high
[0]);
25455 emit_insn (gen_xor3 (low
[0], low
[0], const1_rtx
));
25458 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
25459 emit_insn (gen_ashl3 (high
[0], high
[0], operands
[2]));
25463 if (operands
[1] == constm1_rtx
)
25465 /* For -1 << N, we can avoid the shld instruction, because we
25466 know that we're shifting 0...31/63 ones into a -1. */
25467 emit_move_insn (low
[0], constm1_rtx
);
25468 if (optimize_insn_for_size_p ())
25469 emit_move_insn (high
[0], low
[0]);
25471 emit_move_insn (high
[0], constm1_rtx
);
25475 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
25477 if (!rtx_equal_p (operands
[0], operands
[1]))
25478 emit_move_insn (operands
[0], operands
[1]);
25480 split_double_mode (mode
, operands
, 1, low
, high
);
25481 emit_insn (gen_shld (high
[0], low
[0], operands
[2]));
25484 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
25486 if (TARGET_CMOVE
&& scratch
)
25488 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
25489 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
25491 ix86_expand_clear (scratch
);
25492 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2], scratch
));
25496 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
25497 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
25499 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
25504 ix86_split_ashr (rtx
*operands
, rtx scratch
, machine_mode mode
)
25506 rtx (*gen_ashr3
)(rtx
, rtx
, rtx
)
25507 = mode
== DImode
? gen_ashrsi3
: gen_ashrdi3
;
25508 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
25509 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
25511 rtx low
[2], high
[2];
25514 if (CONST_INT_P (operands
[2]))
25516 split_double_mode (mode
, operands
, 2, low
, high
);
25517 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
25519 if (count
== GET_MODE_BITSIZE (mode
) - 1)
25521 emit_move_insn (high
[0], high
[1]);
25522 emit_insn (gen_ashr3 (high
[0], high
[0],
25523 GEN_INT (half_width
- 1)));
25524 emit_move_insn (low
[0], high
[0]);
25527 else if (count
>= half_width
)
25529 emit_move_insn (low
[0], high
[1]);
25530 emit_move_insn (high
[0], low
[0]);
25531 emit_insn (gen_ashr3 (high
[0], high
[0],
25532 GEN_INT (half_width
- 1)));
25534 if (count
> half_width
)
25535 emit_insn (gen_ashr3 (low
[0], low
[0],
25536 GEN_INT (count
- half_width
)));
25540 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
25542 if (!rtx_equal_p (operands
[0], operands
[1]))
25543 emit_move_insn (operands
[0], operands
[1]);
25545 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
25546 emit_insn (gen_ashr3 (high
[0], high
[0], GEN_INT (count
)));
25551 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
25553 if (!rtx_equal_p (operands
[0], operands
[1]))
25554 emit_move_insn (operands
[0], operands
[1]);
25556 split_double_mode (mode
, operands
, 1, low
, high
);
25558 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
25559 emit_insn (gen_ashr3 (high
[0], high
[0], operands
[2]));
25561 if (TARGET_CMOVE
&& scratch
)
25563 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
25564 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
25566 emit_move_insn (scratch
, high
[0]);
25567 emit_insn (gen_ashr3 (scratch
, scratch
,
25568 GEN_INT (half_width
- 1)));
25569 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
25574 rtx (*gen_x86_shift_adj_3
)(rtx
, rtx
, rtx
)
25575 = mode
== DImode
? gen_x86_shiftsi_adj_3
: gen_x86_shiftdi_adj_3
;
25577 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
25583 ix86_split_lshr (rtx
*operands
, rtx scratch
, machine_mode mode
)
25585 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
)
25586 = mode
== DImode
? gen_lshrsi3
: gen_lshrdi3
;
25587 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
25588 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
25590 rtx low
[2], high
[2];
25593 if (CONST_INT_P (operands
[2]))
25595 split_double_mode (mode
, operands
, 2, low
, high
);
25596 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
25598 if (count
>= half_width
)
25600 emit_move_insn (low
[0], high
[1]);
25601 ix86_expand_clear (high
[0]);
25603 if (count
> half_width
)
25604 emit_insn (gen_lshr3 (low
[0], low
[0],
25605 GEN_INT (count
- half_width
)));
25609 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
25611 if (!rtx_equal_p (operands
[0], operands
[1]))
25612 emit_move_insn (operands
[0], operands
[1]);
25614 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
25615 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (count
)));
25620 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
25622 if (!rtx_equal_p (operands
[0], operands
[1]))
25623 emit_move_insn (operands
[0], operands
[1]);
25625 split_double_mode (mode
, operands
, 1, low
, high
);
25627 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
25628 emit_insn (gen_lshr3 (high
[0], high
[0], operands
[2]));
25630 if (TARGET_CMOVE
&& scratch
)
25632 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
25633 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
25635 ix86_expand_clear (scratch
);
25636 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
25641 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
25642 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
25644 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
25649 /* Predict just emitted jump instruction to be taken with probability PROB. */
25651 predict_jump (int prob
)
25653 rtx_insn
*insn
= get_last_insn ();
25654 gcc_assert (JUMP_P (insn
));
25655 add_reg_br_prob_note (insn
, profile_probability::from_reg_br_prob_base (prob
));
25658 /* Helper function for the string operations below. Dest VARIABLE whether
25659 it is aligned to VALUE bytes. If true, jump to the label. */
25660 static rtx_code_label
*
25661 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
25663 rtx_code_label
*label
= gen_label_rtx ();
25664 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
25665 if (GET_MODE (variable
) == DImode
)
25666 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
25668 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
25669 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
25672 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
25674 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
25678 /* Adjust COUNTER by the VALUE. */
25680 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
25682 rtx (*gen_add
)(rtx
, rtx
, rtx
)
25683 = GET_MODE (countreg
) == DImode
? gen_adddi3
: gen_addsi3
;
25685 emit_insn (gen_add (countreg
, countreg
, GEN_INT (-value
)));
25688 /* Zero extend possibly SImode EXP to Pmode register. */
25690 ix86_zero_extend_to_Pmode (rtx exp
)
25692 return force_reg (Pmode
, convert_to_mode (Pmode
, exp
, 1));
25695 /* Divide COUNTREG by SCALE. */
25697 scale_counter (rtx countreg
, int scale
)
25703 if (CONST_INT_P (countreg
))
25704 return GEN_INT (INTVAL (countreg
) / scale
);
25705 gcc_assert (REG_P (countreg
));
25707 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
25708 GEN_INT (exact_log2 (scale
)),
25709 NULL
, 1, OPTAB_DIRECT
);
25713 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
25714 DImode for constant loop counts. */
25716 static machine_mode
25717 counter_mode (rtx count_exp
)
25719 if (GET_MODE (count_exp
) != VOIDmode
)
25720 return GET_MODE (count_exp
);
25721 if (!CONST_INT_P (count_exp
))
25723 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
25728 /* Copy the address to a Pmode register. This is used for x32 to
25729 truncate DImode TLS address to a SImode register. */
25732 ix86_copy_addr_to_reg (rtx addr
)
25735 if (GET_MODE (addr
) == Pmode
|| GET_MODE (addr
) == VOIDmode
)
25737 reg
= copy_addr_to_reg (addr
);
25738 REG_POINTER (reg
) = 1;
25743 gcc_assert (GET_MODE (addr
) == DImode
&& Pmode
== SImode
);
25744 reg
= copy_to_mode_reg (DImode
, addr
);
25745 REG_POINTER (reg
) = 1;
25746 return gen_rtx_SUBREG (SImode
, reg
, 0);
25750 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
25751 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
25752 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
25753 memory by VALUE (supposed to be in MODE).
25755 The size is rounded down to whole number of chunk size moved at once.
25756 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
25760 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
25761 rtx destptr
, rtx srcptr
, rtx value
,
25762 rtx count
, machine_mode mode
, int unroll
,
25763 int expected_size
, bool issetmem
)
25765 rtx_code_label
*out_label
, *top_label
;
25767 machine_mode iter_mode
= counter_mode (count
);
25768 int piece_size_n
= GET_MODE_SIZE (mode
) * unroll
;
25769 rtx piece_size
= GEN_INT (piece_size_n
);
25770 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
25774 top_label
= gen_label_rtx ();
25775 out_label
= gen_label_rtx ();
25776 iter
= gen_reg_rtx (iter_mode
);
25778 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
25779 NULL
, 1, OPTAB_DIRECT
);
25780 /* Those two should combine. */
25781 if (piece_size
== const1_rtx
)
25783 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
25785 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
25787 emit_move_insn (iter
, const0_rtx
);
25789 emit_label (top_label
);
25791 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
25793 /* This assert could be relaxed - in this case we'll need to compute
25794 smallest power of two, containing in PIECE_SIZE_N and pass it to
25796 gcc_assert ((piece_size_n
& (piece_size_n
- 1)) == 0);
25797 destmem
= offset_address (destmem
, tmp
, piece_size_n
);
25798 destmem
= adjust_address (destmem
, mode
, 0);
25802 srcmem
= offset_address (srcmem
, copy_rtx (tmp
), piece_size_n
);
25803 srcmem
= adjust_address (srcmem
, mode
, 0);
25805 /* When unrolling for chips that reorder memory reads and writes,
25806 we can save registers by using single temporary.
25807 Also using 4 temporaries is overkill in 32bit mode. */
25808 if (!TARGET_64BIT
&& 0)
25810 for (i
= 0; i
< unroll
; i
++)
25815 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
25817 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
25819 emit_move_insn (destmem
, srcmem
);
25825 gcc_assert (unroll
<= 4);
25826 for (i
= 0; i
< unroll
; i
++)
25828 tmpreg
[i
] = gen_reg_rtx (mode
);
25832 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
25834 emit_move_insn (tmpreg
[i
], srcmem
);
25836 for (i
= 0; i
< unroll
; i
++)
25841 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
25843 emit_move_insn (destmem
, tmpreg
[i
]);
25848 for (i
= 0; i
< unroll
; i
++)
25852 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
25853 emit_move_insn (destmem
, value
);
25856 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
25857 true, OPTAB_LIB_WIDEN
);
25859 emit_move_insn (iter
, tmp
);
25861 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
25863 if (expected_size
!= -1)
25865 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
25866 if (expected_size
== 0)
25868 else if (expected_size
> REG_BR_PROB_BASE
)
25869 predict_jump (REG_BR_PROB_BASE
- 1);
25871 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
25874 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
25875 iter
= ix86_zero_extend_to_Pmode (iter
);
25876 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
25877 true, OPTAB_LIB_WIDEN
);
25878 if (tmp
!= destptr
)
25879 emit_move_insn (destptr
, tmp
);
25882 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
25883 true, OPTAB_LIB_WIDEN
);
25885 emit_move_insn (srcptr
, tmp
);
25887 emit_label (out_label
);
25890 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
25891 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
25892 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
25893 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
25894 ORIG_VALUE is the original value passed to memset to fill the memory with.
25895 Other arguments have same meaning as for previous function. */
25898 expand_set_or_movmem_via_rep (rtx destmem
, rtx srcmem
,
25899 rtx destptr
, rtx srcptr
, rtx value
, rtx orig_value
,
25901 machine_mode mode
, bool issetmem
)
25906 HOST_WIDE_INT rounded_count
;
25908 /* If possible, it is shorter to use rep movs.
25909 TODO: Maybe it is better to move this logic to decide_alg. */
25910 if (mode
== QImode
&& CONST_INT_P (count
) && !(INTVAL (count
) & 3)
25911 && (!issetmem
|| orig_value
== const0_rtx
))
25914 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
25915 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
25917 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
,
25918 GET_MODE_SIZE (mode
)));
25919 if (mode
!= QImode
)
25921 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
25922 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
25923 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
25926 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
25927 if ((!issetmem
|| orig_value
== const0_rtx
) && CONST_INT_P (count
))
25930 = ROUND_DOWN (INTVAL (count
), (HOST_WIDE_INT
) GET_MODE_SIZE (mode
));
25931 destmem
= shallow_copy_rtx (destmem
);
25932 set_mem_size (destmem
, rounded_count
);
25934 else if (MEM_SIZE_KNOWN_P (destmem
))
25935 clear_mem_size (destmem
);
25939 value
= force_reg (mode
, gen_lowpart (mode
, value
));
25940 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
25944 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
25945 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
25946 if (mode
!= QImode
)
25948 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
25949 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
25950 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
25953 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
25954 if (CONST_INT_P (count
))
25957 = ROUND_DOWN (INTVAL (count
), (HOST_WIDE_INT
) GET_MODE_SIZE (mode
));
25958 srcmem
= shallow_copy_rtx (srcmem
);
25959 set_mem_size (srcmem
, rounded_count
);
25963 if (MEM_SIZE_KNOWN_P (srcmem
))
25964 clear_mem_size (srcmem
);
25966 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
25971 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
25973 SRC is passed by pointer to be updated on return.
25974 Return value is updated DST. */
25976 emit_memmov (rtx destmem
, rtx
*srcmem
, rtx destptr
, rtx srcptr
,
25977 HOST_WIDE_INT size_to_move
)
25979 rtx dst
= destmem
, src
= *srcmem
, adjust
, tempreg
;
25980 enum insn_code code
;
25981 machine_mode move_mode
;
25984 /* Find the widest mode in which we could perform moves.
25985 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
25986 it until move of such size is supported. */
25987 piece_size
= 1 << floor_log2 (size_to_move
);
25988 while (!int_mode_for_size (piece_size
* BITS_PER_UNIT
, 0).exists (&move_mode
)
25989 || (code
= optab_handler (mov_optab
, move_mode
)) == CODE_FOR_nothing
)
25991 gcc_assert (piece_size
> 1);
25995 /* Find the corresponding vector mode with the same size as MOVE_MODE.
25996 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
25997 if (GET_MODE_SIZE (move_mode
) > GET_MODE_SIZE (word_mode
))
25999 int nunits
= GET_MODE_SIZE (move_mode
) / GET_MODE_SIZE (word_mode
);
26000 if (!mode_for_vector (word_mode
, nunits
).exists (&move_mode
)
26001 || (code
= optab_handler (mov_optab
, move_mode
)) == CODE_FOR_nothing
)
26003 move_mode
= word_mode
;
26004 piece_size
= GET_MODE_SIZE (move_mode
);
26005 code
= optab_handler (mov_optab
, move_mode
);
26008 gcc_assert (code
!= CODE_FOR_nothing
);
26010 dst
= adjust_automodify_address_nv (dst
, move_mode
, destptr
, 0);
26011 src
= adjust_automodify_address_nv (src
, move_mode
, srcptr
, 0);
26013 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
26014 gcc_assert (size_to_move
% piece_size
== 0);
26015 adjust
= GEN_INT (piece_size
);
26016 for (i
= 0; i
< size_to_move
; i
+= piece_size
)
26018 /* We move from memory to memory, so we'll need to do it via
26019 a temporary register. */
26020 tempreg
= gen_reg_rtx (move_mode
);
26021 emit_insn (GEN_FCN (code
) (tempreg
, src
));
26022 emit_insn (GEN_FCN (code
) (dst
, tempreg
));
26024 emit_move_insn (destptr
,
26025 gen_rtx_PLUS (Pmode
, copy_rtx (destptr
), adjust
));
26026 emit_move_insn (srcptr
,
26027 gen_rtx_PLUS (Pmode
, copy_rtx (srcptr
), adjust
));
26029 dst
= adjust_automodify_address_nv (dst
, move_mode
, destptr
,
26031 src
= adjust_automodify_address_nv (src
, move_mode
, srcptr
,
26035 /* Update DST and SRC rtx. */
26040 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
26042 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
26043 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
26046 if (CONST_INT_P (count
))
26048 HOST_WIDE_INT countval
= INTVAL (count
);
26049 HOST_WIDE_INT epilogue_size
= countval
% max_size
;
26052 /* For now MAX_SIZE should be a power of 2. This assert could be
26053 relaxed, but it'll require a bit more complicated epilogue
26055 gcc_assert ((max_size
& (max_size
- 1)) == 0);
26056 for (i
= max_size
; i
>= 1; i
>>= 1)
26058 if (epilogue_size
& i
)
26059 destmem
= emit_memmov (destmem
, &srcmem
, destptr
, srcptr
, i
);
26065 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
26066 count
, 1, OPTAB_DIRECT
);
26067 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
26068 count
, QImode
, 1, 4, false);
26072 /* When there are stringops, we can cheaply increase dest and src pointers.
26073 Otherwise we save code size by maintaining offset (zero is readily
26074 available from preceding rep operation) and using x86 addressing modes.
26076 if (TARGET_SINGLE_STRINGOP
)
26080 rtx_code_label
*label
= ix86_expand_aligntest (count
, 4, true);
26081 src
= change_address (srcmem
, SImode
, srcptr
);
26082 dest
= change_address (destmem
, SImode
, destptr
);
26083 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
26084 emit_label (label
);
26085 LABEL_NUSES (label
) = 1;
26089 rtx_code_label
*label
= ix86_expand_aligntest (count
, 2, true);
26090 src
= change_address (srcmem
, HImode
, srcptr
);
26091 dest
= change_address (destmem
, HImode
, destptr
);
26092 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
26093 emit_label (label
);
26094 LABEL_NUSES (label
) = 1;
26098 rtx_code_label
*label
= ix86_expand_aligntest (count
, 1, true);
26099 src
= change_address (srcmem
, QImode
, srcptr
);
26100 dest
= change_address (destmem
, QImode
, destptr
);
26101 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
26102 emit_label (label
);
26103 LABEL_NUSES (label
) = 1;
26108 rtx offset
= force_reg (Pmode
, const0_rtx
);
26113 rtx_code_label
*label
= ix86_expand_aligntest (count
, 4, true);
26114 src
= change_address (srcmem
, SImode
, srcptr
);
26115 dest
= change_address (destmem
, SImode
, destptr
);
26116 emit_move_insn (dest
, src
);
26117 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
26118 true, OPTAB_LIB_WIDEN
);
26120 emit_move_insn (offset
, tmp
);
26121 emit_label (label
);
26122 LABEL_NUSES (label
) = 1;
26126 rtx_code_label
*label
= ix86_expand_aligntest (count
, 2, true);
26127 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
26128 src
= change_address (srcmem
, HImode
, tmp
);
26129 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
26130 dest
= change_address (destmem
, HImode
, tmp
);
26131 emit_move_insn (dest
, src
);
26132 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
26133 true, OPTAB_LIB_WIDEN
);
26135 emit_move_insn (offset
, tmp
);
26136 emit_label (label
);
26137 LABEL_NUSES (label
) = 1;
26141 rtx_code_label
*label
= ix86_expand_aligntest (count
, 1, true);
26142 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
26143 src
= change_address (srcmem
, QImode
, tmp
);
26144 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
26145 dest
= change_address (destmem
, QImode
, tmp
);
26146 emit_move_insn (dest
, src
);
26147 emit_label (label
);
26148 LABEL_NUSES (label
) = 1;
26153 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
26154 with value PROMOTED_VAL.
26155 SRC is passed by pointer to be updated on return.
26156 Return value is updated DST. */
26158 emit_memset (rtx destmem
, rtx destptr
, rtx promoted_val
,
26159 HOST_WIDE_INT size_to_move
)
26161 rtx dst
= destmem
, adjust
;
26162 enum insn_code code
;
26163 machine_mode move_mode
;
26166 /* Find the widest mode in which we could perform moves.
26167 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
26168 it until move of such size is supported. */
26169 move_mode
= GET_MODE (promoted_val
);
26170 if (move_mode
== VOIDmode
)
26171 move_mode
= QImode
;
26172 if (size_to_move
< GET_MODE_SIZE (move_mode
))
26174 unsigned int move_bits
= size_to_move
* BITS_PER_UNIT
;
26175 move_mode
= int_mode_for_size (move_bits
, 0).require ();
26176 promoted_val
= gen_lowpart (move_mode
, promoted_val
);
26178 piece_size
= GET_MODE_SIZE (move_mode
);
26179 code
= optab_handler (mov_optab
, move_mode
);
26180 gcc_assert (code
!= CODE_FOR_nothing
&& promoted_val
!= NULL_RTX
);
26182 dst
= adjust_automodify_address_nv (dst
, move_mode
, destptr
, 0);
26184 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
26185 gcc_assert (size_to_move
% piece_size
== 0);
26186 adjust
= GEN_INT (piece_size
);
26187 for (i
= 0; i
< size_to_move
; i
+= piece_size
)
26189 if (piece_size
<= GET_MODE_SIZE (word_mode
))
26191 emit_insn (gen_strset (destptr
, dst
, promoted_val
));
26192 dst
= adjust_automodify_address_nv (dst
, move_mode
, destptr
,
26197 emit_insn (GEN_FCN (code
) (dst
, promoted_val
));
26199 emit_move_insn (destptr
,
26200 gen_rtx_PLUS (Pmode
, copy_rtx (destptr
), adjust
));
26202 dst
= adjust_automodify_address_nv (dst
, move_mode
, destptr
,
26206 /* Update DST rtx. */
26209 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
26211 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
26212 rtx count
, int max_size
)
26215 expand_simple_binop (counter_mode (count
), AND
, count
,
26216 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
26217 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
26218 gen_lowpart (QImode
, value
), count
, QImode
,
26219 1, max_size
/ 2, true);
26222 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
26224 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx vec_value
,
26225 rtx count
, int max_size
)
26229 if (CONST_INT_P (count
))
26231 HOST_WIDE_INT countval
= INTVAL (count
);
26232 HOST_WIDE_INT epilogue_size
= countval
% max_size
;
26235 /* For now MAX_SIZE should be a power of 2. This assert could be
26236 relaxed, but it'll require a bit more complicated epilogue
26238 gcc_assert ((max_size
& (max_size
- 1)) == 0);
26239 for (i
= max_size
; i
>= 1; i
>>= 1)
26241 if (epilogue_size
& i
)
26243 if (vec_value
&& i
> GET_MODE_SIZE (GET_MODE (value
)))
26244 destmem
= emit_memset (destmem
, destptr
, vec_value
, i
);
26246 destmem
= emit_memset (destmem
, destptr
, value
, i
);
26253 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
26258 rtx_code_label
*label
= ix86_expand_aligntest (count
, 16, true);
26261 dest
= change_address (destmem
, DImode
, destptr
);
26262 emit_insn (gen_strset (destptr
, dest
, value
));
26263 dest
= adjust_automodify_address_nv (dest
, DImode
, destptr
, 8);
26264 emit_insn (gen_strset (destptr
, dest
, value
));
26268 dest
= change_address (destmem
, SImode
, destptr
);
26269 emit_insn (gen_strset (destptr
, dest
, value
));
26270 dest
= adjust_automodify_address_nv (dest
, SImode
, destptr
, 4);
26271 emit_insn (gen_strset (destptr
, dest
, value
));
26272 dest
= adjust_automodify_address_nv (dest
, SImode
, destptr
, 8);
26273 emit_insn (gen_strset (destptr
, dest
, value
));
26274 dest
= adjust_automodify_address_nv (dest
, SImode
, destptr
, 12);
26275 emit_insn (gen_strset (destptr
, dest
, value
));
26277 emit_label (label
);
26278 LABEL_NUSES (label
) = 1;
26282 rtx_code_label
*label
= ix86_expand_aligntest (count
, 8, true);
26285 dest
= change_address (destmem
, DImode
, destptr
);
26286 emit_insn (gen_strset (destptr
, dest
, value
));
26290 dest
= change_address (destmem
, SImode
, destptr
);
26291 emit_insn (gen_strset (destptr
, dest
, value
));
26292 dest
= adjust_automodify_address_nv (dest
, SImode
, destptr
, 4);
26293 emit_insn (gen_strset (destptr
, dest
, value
));
26295 emit_label (label
);
26296 LABEL_NUSES (label
) = 1;
26300 rtx_code_label
*label
= ix86_expand_aligntest (count
, 4, true);
26301 dest
= change_address (destmem
, SImode
, destptr
);
26302 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
26303 emit_label (label
);
26304 LABEL_NUSES (label
) = 1;
26308 rtx_code_label
*label
= ix86_expand_aligntest (count
, 2, true);
26309 dest
= change_address (destmem
, HImode
, destptr
);
26310 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
26311 emit_label (label
);
26312 LABEL_NUSES (label
) = 1;
26316 rtx_code_label
*label
= ix86_expand_aligntest (count
, 1, true);
26317 dest
= change_address (destmem
, QImode
, destptr
);
26318 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
26319 emit_label (label
);
26320 LABEL_NUSES (label
) = 1;
26324 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
26325 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
26326 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
26328 Return value is updated DESTMEM. */
26330 expand_set_or_movmem_prologue (rtx destmem
, rtx srcmem
,
26331 rtx destptr
, rtx srcptr
, rtx value
,
26332 rtx vec_value
, rtx count
, int align
,
26333 int desired_alignment
, bool issetmem
)
26336 for (i
= 1; i
< desired_alignment
; i
<<= 1)
26340 rtx_code_label
*label
= ix86_expand_aligntest (destptr
, i
, false);
26343 if (vec_value
&& i
> GET_MODE_SIZE (GET_MODE (value
)))
26344 destmem
= emit_memset (destmem
, destptr
, vec_value
, i
);
26346 destmem
= emit_memset (destmem
, destptr
, value
, i
);
26349 destmem
= emit_memmov (destmem
, &srcmem
, destptr
, srcptr
, i
);
26350 ix86_adjust_counter (count
, i
);
26351 emit_label (label
);
26352 LABEL_NUSES (label
) = 1;
26353 set_mem_align (destmem
, i
* 2 * BITS_PER_UNIT
);
26359 /* Test if COUNT&SIZE is nonzero and if so, expand movme
26360 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
26361 and jump to DONE_LABEL. */
26363 expand_small_movmem_or_setmem (rtx destmem
, rtx srcmem
,
26364 rtx destptr
, rtx srcptr
,
26365 rtx value
, rtx vec_value
,
26366 rtx count
, int size
,
26367 rtx done_label
, bool issetmem
)
26369 rtx_code_label
*label
= ix86_expand_aligntest (count
, size
, false);
26370 machine_mode mode
= int_mode_for_size (size
* BITS_PER_UNIT
, 1).else_blk ();
26374 /* If we do not have vector value to copy, we must reduce size. */
26379 if (GET_MODE (value
) == VOIDmode
&& size
> 8)
26381 else if (GET_MODE_SIZE (mode
) > GET_MODE_SIZE (GET_MODE (value
)))
26382 mode
= GET_MODE (value
);
26385 mode
= GET_MODE (vec_value
), value
= vec_value
;
26389 /* Choose appropriate vector mode. */
26391 mode
= TARGET_AVX
? V32QImode
: TARGET_SSE
? V16QImode
: DImode
;
26392 else if (size
>= 16)
26393 mode
= TARGET_SSE
? V16QImode
: DImode
;
26394 srcmem
= change_address (srcmem
, mode
, srcptr
);
26396 destmem
= change_address (destmem
, mode
, destptr
);
26397 modesize
= GEN_INT (GET_MODE_SIZE (mode
));
26398 gcc_assert (GET_MODE_SIZE (mode
) <= size
);
26399 for (n
= 0; n
* GET_MODE_SIZE (mode
) < size
; n
++)
26402 emit_move_insn (destmem
, gen_lowpart (mode
, value
));
26405 emit_move_insn (destmem
, srcmem
);
26406 srcmem
= offset_address (srcmem
, modesize
, GET_MODE_SIZE (mode
));
26408 destmem
= offset_address (destmem
, modesize
, GET_MODE_SIZE (mode
));
26411 destmem
= offset_address (destmem
, count
, 1);
26412 destmem
= offset_address (destmem
, GEN_INT (-2 * size
),
26413 GET_MODE_SIZE (mode
));
26416 srcmem
= offset_address (srcmem
, count
, 1);
26417 srcmem
= offset_address (srcmem
, GEN_INT (-2 * size
),
26418 GET_MODE_SIZE (mode
));
26420 for (n
= 0; n
* GET_MODE_SIZE (mode
) < size
; n
++)
26423 emit_move_insn (destmem
, gen_lowpart (mode
, value
));
26426 emit_move_insn (destmem
, srcmem
);
26427 srcmem
= offset_address (srcmem
, modesize
, GET_MODE_SIZE (mode
));
26429 destmem
= offset_address (destmem
, modesize
, GET_MODE_SIZE (mode
));
26431 emit_jump_insn (gen_jump (done_label
));
26434 emit_label (label
);
26435 LABEL_NUSES (label
) = 1;
26438 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
26439 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
26440 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
26441 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
26442 DONE_LABEL is a label after the whole copying sequence. The label is created
26443 on demand if *DONE_LABEL is NULL.
26444 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
26445 bounds after the initial copies.
26447 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
26448 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
26449 we will dispatch to a library call for large blocks.
26451 In pseudocode we do:
26455 Assume that SIZE is 4. Bigger sizes are handled analogously
26458 copy 4 bytes from SRCPTR to DESTPTR
26459 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
26464 copy 1 byte from SRCPTR to DESTPTR
26467 copy 2 bytes from SRCPTR to DESTPTR
26468 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
26473 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
26474 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
26476 OLD_DESPTR = DESTPTR;
26477 Align DESTPTR up to DESIRED_ALIGN
26478 SRCPTR += DESTPTR - OLD_DESTPTR
26479 COUNT -= DEST_PTR - OLD_DESTPTR
26481 Round COUNT down to multiple of SIZE
26482 << optional caller supplied zero size guard is here >>
26483 << optional caller supplied dynamic check is here >>
26484 << caller supplied main copy loop is here >>
26489 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem
, rtx srcmem
,
26490 rtx
*destptr
, rtx
*srcptr
,
26492 rtx value
, rtx vec_value
,
26494 rtx_code_label
**done_label
,
26498 unsigned HOST_WIDE_INT
*min_size
,
26499 bool dynamic_check
,
26502 rtx_code_label
*loop_label
= NULL
, *label
;
26505 int prolog_size
= 0;
26508 /* Chose proper value to copy. */
26509 if (issetmem
&& VECTOR_MODE_P (mode
))
26510 mode_value
= vec_value
;
26512 mode_value
= value
;
26513 gcc_assert (GET_MODE_SIZE (mode
) <= size
);
26515 /* See if block is big or small, handle small blocks. */
26516 if (!CONST_INT_P (*count
) && *min_size
< (unsigned HOST_WIDE_INT
)size
)
26519 loop_label
= gen_label_rtx ();
26522 *done_label
= gen_label_rtx ();
26524 emit_cmp_and_jump_insns (*count
, GEN_INT (size2
), GE
, 0, GET_MODE (*count
),
26528 /* Handle sizes > 3. */
26529 for (;size2
> 2; size2
>>= 1)
26530 expand_small_movmem_or_setmem (destmem
, srcmem
,
26534 size2
, *done_label
, issetmem
);
26535 /* Nothing to copy? Jump to DONE_LABEL if so */
26536 emit_cmp_and_jump_insns (*count
, const0_rtx
, EQ
, 0, GET_MODE (*count
),
26539 /* Do a byte copy. */
26540 destmem
= change_address (destmem
, QImode
, *destptr
);
26542 emit_move_insn (destmem
, gen_lowpart (QImode
, value
));
26545 srcmem
= change_address (srcmem
, QImode
, *srcptr
);
26546 emit_move_insn (destmem
, srcmem
);
26549 /* Handle sizes 2 and 3. */
26550 label
= ix86_expand_aligntest (*count
, 2, false);
26551 destmem
= change_address (destmem
, HImode
, *destptr
);
26552 destmem
= offset_address (destmem
, *count
, 1);
26553 destmem
= offset_address (destmem
, GEN_INT (-2), 2);
26555 emit_move_insn (destmem
, gen_lowpart (HImode
, value
));
26558 srcmem
= change_address (srcmem
, HImode
, *srcptr
);
26559 srcmem
= offset_address (srcmem
, *count
, 1);
26560 srcmem
= offset_address (srcmem
, GEN_INT (-2), 2);
26561 emit_move_insn (destmem
, srcmem
);
26564 emit_label (label
);
26565 LABEL_NUSES (label
) = 1;
26566 emit_jump_insn (gen_jump (*done_label
));
26570 gcc_assert (*min_size
>= (unsigned HOST_WIDE_INT
)size
26571 || UINTVAL (*count
) >= (unsigned HOST_WIDE_INT
)size
);
26573 /* Start memcpy for COUNT >= SIZE. */
26576 emit_label (loop_label
);
26577 LABEL_NUSES (loop_label
) = 1;
26580 /* Copy first desired_align bytes. */
26582 srcmem
= change_address (srcmem
, mode
, *srcptr
);
26583 destmem
= change_address (destmem
, mode
, *destptr
);
26584 modesize
= GEN_INT (GET_MODE_SIZE (mode
));
26585 for (n
= 0; prolog_size
< desired_align
- align
; n
++)
26588 emit_move_insn (destmem
, mode_value
);
26591 emit_move_insn (destmem
, srcmem
);
26592 srcmem
= offset_address (srcmem
, modesize
, GET_MODE_SIZE (mode
));
26594 destmem
= offset_address (destmem
, modesize
, GET_MODE_SIZE (mode
));
26595 prolog_size
+= GET_MODE_SIZE (mode
);
26599 /* Copy last SIZE bytes. */
26600 destmem
= offset_address (destmem
, *count
, 1);
26601 destmem
= offset_address (destmem
,
26602 GEN_INT (-size
- prolog_size
),
26605 emit_move_insn (destmem
, mode_value
);
26608 srcmem
= offset_address (srcmem
, *count
, 1);
26609 srcmem
= offset_address (srcmem
,
26610 GEN_INT (-size
- prolog_size
),
26612 emit_move_insn (destmem
, srcmem
);
26614 for (n
= 1; n
* GET_MODE_SIZE (mode
) < size
; n
++)
26616 destmem
= offset_address (destmem
, modesize
, 1);
26618 emit_move_insn (destmem
, mode_value
);
26621 srcmem
= offset_address (srcmem
, modesize
, 1);
26622 emit_move_insn (destmem
, srcmem
);
26626 /* Align destination. */
26627 if (desired_align
> 1 && desired_align
> align
)
26629 rtx saveddest
= *destptr
;
26631 gcc_assert (desired_align
<= size
);
26632 /* Align destptr up, place it to new register. */
26633 *destptr
= expand_simple_binop (GET_MODE (*destptr
), PLUS
, *destptr
,
26634 GEN_INT (prolog_size
),
26635 NULL_RTX
, 1, OPTAB_DIRECT
);
26636 if (REG_P (*destptr
) && REG_P (saveddest
) && REG_POINTER (saveddest
))
26637 REG_POINTER (*destptr
) = 1;
26638 *destptr
= expand_simple_binop (GET_MODE (*destptr
), AND
, *destptr
,
26639 GEN_INT (-desired_align
),
26640 *destptr
, 1, OPTAB_DIRECT
);
26641 /* See how many bytes we skipped. */
26642 saveddest
= expand_simple_binop (GET_MODE (*destptr
), MINUS
, saveddest
,
26644 saveddest
, 1, OPTAB_DIRECT
);
26645 /* Adjust srcptr and count. */
26647 *srcptr
= expand_simple_binop (GET_MODE (*srcptr
), MINUS
, *srcptr
,
26648 saveddest
, *srcptr
, 1, OPTAB_DIRECT
);
26649 *count
= expand_simple_binop (GET_MODE (*count
), PLUS
, *count
,
26650 saveddest
, *count
, 1, OPTAB_DIRECT
);
26651 /* We copied at most size + prolog_size. */
26652 if (*min_size
> (unsigned HOST_WIDE_INT
)(size
+ prolog_size
))
26654 = ROUND_DOWN (*min_size
- size
, (unsigned HOST_WIDE_INT
)size
);
26658 /* Our loops always round down the block size, but for dispatch to
26659 library we need precise value. */
26661 *count
= expand_simple_binop (GET_MODE (*count
), AND
, *count
,
26662 GEN_INT (-size
), *count
, 1, OPTAB_DIRECT
);
26666 gcc_assert (prolog_size
== 0);
26667 /* Decrease count, so we won't end up copying last word twice. */
26668 if (!CONST_INT_P (*count
))
26669 *count
= expand_simple_binop (GET_MODE (*count
), PLUS
, *count
,
26670 constm1_rtx
, *count
, 1, OPTAB_DIRECT
);
26672 *count
= GEN_INT (ROUND_DOWN (UINTVAL (*count
) - 1,
26673 (unsigned HOST_WIDE_INT
)size
));
26675 *min_size
= ROUND_DOWN (*min_size
- 1, (unsigned HOST_WIDE_INT
)size
);
26680 /* This function is like the previous one, except here we know how many bytes
26681 need to be copied. That allows us to update alignment not only of DST, which
26682 is returned, but also of SRC, which is passed as a pointer for that
26685 expand_set_or_movmem_constant_prologue (rtx dst
, rtx
*srcp
, rtx destreg
,
26686 rtx srcreg
, rtx value
, rtx vec_value
,
26687 int desired_align
, int align_bytes
,
26691 rtx orig_dst
= dst
;
26692 rtx orig_src
= NULL
;
26693 int piece_size
= 1;
26694 int copied_bytes
= 0;
26698 gcc_assert (srcp
!= NULL
);
26703 for (piece_size
= 1;
26704 piece_size
<= desired_align
&& copied_bytes
< align_bytes
;
26707 if (align_bytes
& piece_size
)
26711 if (vec_value
&& piece_size
> GET_MODE_SIZE (GET_MODE (value
)))
26712 dst
= emit_memset (dst
, destreg
, vec_value
, piece_size
);
26714 dst
= emit_memset (dst
, destreg
, value
, piece_size
);
26717 dst
= emit_memmov (dst
, &src
, destreg
, srcreg
, piece_size
);
26718 copied_bytes
+= piece_size
;
26721 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
26722 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
26723 if (MEM_SIZE_KNOWN_P (orig_dst
))
26724 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
26728 int src_align_bytes
= get_mem_align_offset (src
, desired_align
26730 if (src_align_bytes
>= 0)
26731 src_align_bytes
= desired_align
- src_align_bytes
;
26732 if (src_align_bytes
>= 0)
26734 unsigned int src_align
;
26735 for (src_align
= desired_align
; src_align
>= 2; src_align
>>= 1)
26737 if ((src_align_bytes
& (src_align
- 1))
26738 == (align_bytes
& (src_align
- 1)))
26741 if (src_align
> (unsigned int) desired_align
)
26742 src_align
= desired_align
;
26743 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
26744 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
26746 if (MEM_SIZE_KNOWN_P (orig_src
))
26747 set_mem_size (src
, MEM_SIZE (orig_src
) - align_bytes
);
26754 /* Return true if ALG can be used in current context.
26755 Assume we expand memset if MEMSET is true. */
26757 alg_usable_p (enum stringop_alg alg
, bool memset
, bool have_as
)
26759 if (alg
== no_stringop
)
26761 if (alg
== vector_loop
)
26762 return TARGET_SSE
|| TARGET_AVX
;
26763 /* Algorithms using the rep prefix want at least edi and ecx;
26764 additionally, memset wants eax and memcpy wants esi. Don't
26765 consider such algorithms if the user has appropriated those
26766 registers for their own purposes, or if we have a non-default
26767 address space, since some string insns cannot override the segment. */
26768 if (alg
== rep_prefix_1_byte
26769 || alg
== rep_prefix_4_byte
26770 || alg
== rep_prefix_8_byte
)
26774 if (fixed_regs
[CX_REG
]
26775 || fixed_regs
[DI_REG
]
26776 || (memset
? fixed_regs
[AX_REG
] : fixed_regs
[SI_REG
]))
26782 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
26783 static enum stringop_alg
26784 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
,
26785 unsigned HOST_WIDE_INT min_size
, unsigned HOST_WIDE_INT max_size
,
26786 bool memset
, bool zero_memset
, bool have_as
,
26787 int *dynamic_check
, bool *noalign
, bool recur
)
26789 const struct stringop_algs
*algs
;
26790 bool optimize_for_speed
;
26792 const struct processor_costs
*cost
;
26794 bool any_alg_usable_p
= false;
26797 *dynamic_check
= -1;
26799 /* Even if the string operation call is cold, we still might spend a lot
26800 of time processing large blocks. */
26801 if (optimize_function_for_size_p (cfun
)
26802 || (optimize_insn_for_size_p ()
26804 || (expected_size
!= -1 && expected_size
< 256))))
26805 optimize_for_speed
= false;
26807 optimize_for_speed
= true;
26809 cost
= optimize_for_speed
? ix86_cost
: &ix86_size_cost
;
26811 algs
= &cost
->memset
[TARGET_64BIT
!= 0];
26813 algs
= &cost
->memcpy
[TARGET_64BIT
!= 0];
26815 /* See maximal size for user defined algorithm. */
26816 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
26818 enum stringop_alg candidate
= algs
->size
[i
].alg
;
26819 bool usable
= alg_usable_p (candidate
, memset
, have_as
);
26820 any_alg_usable_p
|= usable
;
26822 if (candidate
!= libcall
&& candidate
&& usable
)
26823 max
= algs
->size
[i
].max
;
26826 /* If expected size is not known but max size is small enough
26827 so inline version is a win, set expected size into
26829 if (((max
> 1 && (unsigned HOST_WIDE_INT
) max
>= max_size
) || max
== -1)
26830 && expected_size
== -1)
26831 expected_size
= min_size
/ 2 + max_size
/ 2;
26833 /* If user specified the algorithm, honor it if possible. */
26834 if (ix86_stringop_alg
!= no_stringop
26835 && alg_usable_p (ix86_stringop_alg
, memset
, have_as
))
26836 return ix86_stringop_alg
;
26837 /* rep; movq or rep; movl is the smallest variant. */
26838 else if (!optimize_for_speed
)
26841 if (!count
|| (count
& 3) || (memset
&& !zero_memset
))
26842 return alg_usable_p (rep_prefix_1_byte
, memset
, have_as
)
26843 ? rep_prefix_1_byte
: loop_1_byte
;
26845 return alg_usable_p (rep_prefix_4_byte
, memset
, have_as
)
26846 ? rep_prefix_4_byte
: loop
;
26848 /* Very tiny blocks are best handled via the loop, REP is expensive to
26850 else if (expected_size
!= -1 && expected_size
< 4)
26851 return loop_1_byte
;
26852 else if (expected_size
!= -1)
26854 enum stringop_alg alg
= libcall
;
26855 bool alg_noalign
= false;
26856 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
26858 /* We get here if the algorithms that were not libcall-based
26859 were rep-prefix based and we are unable to use rep prefixes
26860 based on global register usage. Break out of the loop and
26861 use the heuristic below. */
26862 if (algs
->size
[i
].max
== 0)
26864 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
26866 enum stringop_alg candidate
= algs
->size
[i
].alg
;
26868 if (candidate
!= libcall
26869 && alg_usable_p (candidate
, memset
, have_as
))
26872 alg_noalign
= algs
->size
[i
].noalign
;
26874 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
26875 last non-libcall inline algorithm. */
26876 if (TARGET_INLINE_ALL_STRINGOPS
)
26878 /* When the current size is best to be copied by a libcall,
26879 but we are still forced to inline, run the heuristic below
26880 that will pick code for medium sized blocks. */
26881 if (alg
!= libcall
)
26883 *noalign
= alg_noalign
;
26886 else if (!any_alg_usable_p
)
26889 else if (alg_usable_p (candidate
, memset
, have_as
))
26891 *noalign
= algs
->size
[i
].noalign
;
26897 /* When asked to inline the call anyway, try to pick meaningful choice.
26898 We look for maximal size of block that is faster to copy by hand and
26899 take blocks of at most of that size guessing that average size will
26900 be roughly half of the block.
26902 If this turns out to be bad, we might simply specify the preferred
26903 choice in ix86_costs. */
26904 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
26905 && (algs
->unknown_size
== libcall
26906 || !alg_usable_p (algs
->unknown_size
, memset
, have_as
)))
26908 enum stringop_alg alg
;
26909 HOST_WIDE_INT new_expected_size
= (max
> 0 ? max
: 4096) / 2;
26911 /* If there aren't any usable algorithms or if recursing already,
26912 then recursing on smaller sizes or same size isn't going to
26913 find anything. Just return the simple byte-at-a-time copy loop. */
26914 if (!any_alg_usable_p
|| recur
)
26916 /* Pick something reasonable. */
26917 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
&& !recur
)
26918 *dynamic_check
= 128;
26919 return loop_1_byte
;
26921 alg
= decide_alg (count
, new_expected_size
, min_size
, max_size
, memset
,
26922 zero_memset
, have_as
, dynamic_check
, noalign
, true);
26923 gcc_assert (*dynamic_check
== -1);
26924 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
26925 *dynamic_check
= max
;
26927 gcc_assert (alg
!= libcall
);
26930 return (alg_usable_p (algs
->unknown_size
, memset
, have_as
)
26931 ? algs
->unknown_size
: libcall
);
26934 /* Decide on alignment. We know that the operand is already aligned to ALIGN
26935 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
26937 decide_alignment (int align
,
26938 enum stringop_alg alg
,
26940 machine_mode move_mode
)
26942 int desired_align
= 0;
26944 gcc_assert (alg
!= no_stringop
);
26946 if (alg
== libcall
)
26948 if (move_mode
== VOIDmode
)
26951 desired_align
= GET_MODE_SIZE (move_mode
);
26952 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
26953 copying whole cacheline at once. */
26954 if (TARGET_PENTIUMPRO
26955 && (alg
== rep_prefix_4_byte
|| alg
== rep_prefix_1_byte
))
26960 if (desired_align
< align
)
26961 desired_align
= align
;
26962 if (expected_size
!= -1 && expected_size
< 4)
26963 desired_align
= align
;
26965 return desired_align
;
26969 /* Helper function for memcpy. For QImode value 0xXY produce
26970 0xXYXYXYXY of wide specified by MODE. This is essentially
26971 a * 0x10101010, but we can do slightly better than
26972 synth_mult by unwinding the sequence by hand on CPUs with
26975 promote_duplicated_reg (machine_mode mode
, rtx val
)
26977 machine_mode valmode
= GET_MODE (val
);
26979 int nops
= mode
== DImode
? 3 : 2;
26981 gcc_assert (mode
== SImode
|| mode
== DImode
|| val
== const0_rtx
);
26982 if (val
== const0_rtx
)
26983 return copy_to_mode_reg (mode
, CONST0_RTX (mode
));
26984 if (CONST_INT_P (val
))
26986 HOST_WIDE_INT v
= INTVAL (val
) & 255;
26990 if (mode
== DImode
)
26991 v
|= (v
<< 16) << 16;
26992 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
26995 if (valmode
== VOIDmode
)
26997 if (valmode
!= QImode
)
26998 val
= gen_lowpart (QImode
, val
);
26999 if (mode
== QImode
)
27001 if (!TARGET_PARTIAL_REG_STALL
)
27003 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
27004 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
27005 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
27006 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
27008 rtx reg
= convert_modes (mode
, QImode
, val
, true);
27009 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
27010 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
27015 rtx reg
= convert_modes (mode
, QImode
, val
, true);
27017 if (!TARGET_PARTIAL_REG_STALL
)
27018 if (mode
== SImode
)
27019 emit_insn (gen_insvsi_1 (reg
, reg
));
27021 emit_insn (gen_insvdi_1 (reg
, reg
));
27024 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
27025 NULL
, 1, OPTAB_DIRECT
);
27027 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
27029 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
27030 NULL
, 1, OPTAB_DIRECT
);
27031 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
27032 if (mode
== SImode
)
27034 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
27035 NULL
, 1, OPTAB_DIRECT
);
27036 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
27041 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
27042 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
27043 alignment from ALIGN to DESIRED_ALIGN. */
27045 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
,
27051 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
27052 promoted_val
= promote_duplicated_reg (DImode
, val
);
27053 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
27054 promoted_val
= promote_duplicated_reg (SImode
, val
);
27055 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
27056 promoted_val
= promote_duplicated_reg (HImode
, val
);
27058 promoted_val
= val
;
27060 return promoted_val
;
27063 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
27064 operations when profitable. The code depends upon architecture, block size
27065 and alignment, but always has one of the following overall structures:
27067 Aligned move sequence:
27069 1) Prologue guard: Conditional that jumps up to epilogues for small
27070 blocks that can be handled by epilogue alone. This is faster
27071 but also needed for correctness, since prologue assume the block
27072 is larger than the desired alignment.
27074 Optional dynamic check for size and libcall for large
27075 blocks is emitted here too, with -minline-stringops-dynamically.
27077 2) Prologue: copy first few bytes in order to get destination
27078 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
27079 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
27080 copied. We emit either a jump tree on power of two sized
27081 blocks, or a byte loop.
27083 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
27084 with specified algorithm.
27086 4) Epilogue: code copying tail of the block that is too small to be
27087 handled by main body (or up to size guarded by prologue guard).
27089 Misaligned move sequence
27091 1) missaligned move prologue/epilogue containing:
27092 a) Prologue handling small memory blocks and jumping to done_label
27093 (skipped if blocks are known to be large enough)
27094 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
27095 needed by single possibly misaligned move
27096 (skipped if alignment is not needed)
27097 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
27099 2) Zero size guard dispatching to done_label, if needed
27101 3) dispatch to library call, if needed,
27103 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
27104 with specified algorithm. */
27106 ix86_expand_set_or_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx val_exp
,
27107 rtx align_exp
, rtx expected_align_exp
,
27108 rtx expected_size_exp
, rtx min_size_exp
,
27109 rtx max_size_exp
, rtx probable_max_size_exp
,
27114 rtx_code_label
*label
= NULL
;
27116 rtx_code_label
*jump_around_label
= NULL
;
27117 HOST_WIDE_INT align
= 1;
27118 unsigned HOST_WIDE_INT count
= 0;
27119 HOST_WIDE_INT expected_size
= -1;
27120 int size_needed
= 0, epilogue_size_needed
;
27121 int desired_align
= 0, align_bytes
= 0;
27122 enum stringop_alg alg
;
27123 rtx promoted_val
= NULL
;
27124 rtx vec_promoted_val
= NULL
;
27125 bool force_loopy_epilogue
= false;
27127 bool need_zero_guard
= false;
27129 machine_mode move_mode
= VOIDmode
;
27130 machine_mode wider_mode
;
27131 int unroll_factor
= 1;
27132 /* TODO: Once value ranges are available, fill in proper data. */
27133 unsigned HOST_WIDE_INT min_size
= 0;
27134 unsigned HOST_WIDE_INT max_size
= -1;
27135 unsigned HOST_WIDE_INT probable_max_size
= -1;
27136 bool misaligned_prologue_used
= false;
27139 if (CONST_INT_P (align_exp
))
27140 align
= INTVAL (align_exp
);
27141 /* i386 can do misaligned access on reasonably increased cost. */
27142 if (CONST_INT_P (expected_align_exp
)
27143 && INTVAL (expected_align_exp
) > align
)
27144 align
= INTVAL (expected_align_exp
);
27145 /* ALIGN is the minimum of destination and source alignment, but we care here
27146 just about destination alignment. */
27148 && MEM_ALIGN (dst
) > (unsigned HOST_WIDE_INT
) align
* BITS_PER_UNIT
)
27149 align
= MEM_ALIGN (dst
) / BITS_PER_UNIT
;
27151 if (CONST_INT_P (count_exp
))
27153 min_size
= max_size
= probable_max_size
= count
= expected_size
27154 = INTVAL (count_exp
);
27155 /* When COUNT is 0, there is nothing to do. */
27162 min_size
= INTVAL (min_size_exp
);
27164 max_size
= INTVAL (max_size_exp
);
27165 if (probable_max_size_exp
)
27166 probable_max_size
= INTVAL (probable_max_size_exp
);
27167 if (CONST_INT_P (expected_size_exp
))
27168 expected_size
= INTVAL (expected_size_exp
);
27171 /* Make sure we don't need to care about overflow later on. */
27172 if (count
> (HOST_WIDE_INT_1U
<< 30))
27175 have_as
= !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst
));
27177 have_as
|= !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src
));
27179 /* Step 0: Decide on preferred algorithm, desired alignment and
27180 size of chunks to be copied by main loop. */
27181 alg
= decide_alg (count
, expected_size
, min_size
, probable_max_size
,
27183 issetmem
&& val_exp
== const0_rtx
, have_as
,
27184 &dynamic_check
, &noalign
, false);
27185 if (alg
== libcall
)
27187 gcc_assert (alg
!= no_stringop
);
27189 /* For now vector-version of memset is generated only for memory zeroing, as
27190 creating of promoted vector value is very cheap in this case. */
27191 if (issetmem
&& alg
== vector_loop
&& val_exp
!= const0_rtx
)
27192 alg
= unrolled_loop
;
27195 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
27196 destreg
= ix86_copy_addr_to_reg (XEXP (dst
, 0));
27198 srcreg
= ix86_copy_addr_to_reg (XEXP (src
, 0));
27201 move_mode
= word_mode
;
27207 gcc_unreachable ();
27209 need_zero_guard
= true;
27210 move_mode
= QImode
;
27213 need_zero_guard
= true;
27215 case unrolled_loop
:
27216 need_zero_guard
= true;
27217 unroll_factor
= (TARGET_64BIT
? 4 : 2);
27220 need_zero_guard
= true;
27222 /* Find the widest supported mode. */
27223 move_mode
= word_mode
;
27224 while (GET_MODE_WIDER_MODE (move_mode
).exists (&wider_mode
)
27225 && optab_handler (mov_optab
, wider_mode
) != CODE_FOR_nothing
)
27226 move_mode
= wider_mode
;
27228 if (TARGET_AVX128_OPTIMAL
&& GET_MODE_BITSIZE (move_mode
) > 128)
27229 move_mode
= TImode
;
27231 /* Find the corresponding vector mode with the same size as MOVE_MODE.
27232 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
27233 if (GET_MODE_SIZE (move_mode
) > GET_MODE_SIZE (word_mode
))
27235 int nunits
= GET_MODE_SIZE (move_mode
) / GET_MODE_SIZE (word_mode
);
27236 if (!mode_for_vector (word_mode
, nunits
).exists (&move_mode
)
27237 || optab_handler (mov_optab
, move_mode
) == CODE_FOR_nothing
)
27238 move_mode
= word_mode
;
27240 gcc_assert (optab_handler (mov_optab
, move_mode
) != CODE_FOR_nothing
);
27242 case rep_prefix_8_byte
:
27243 move_mode
= DImode
;
27245 case rep_prefix_4_byte
:
27246 move_mode
= SImode
;
27248 case rep_prefix_1_byte
:
27249 move_mode
= QImode
;
27252 size_needed
= GET_MODE_SIZE (move_mode
) * unroll_factor
;
27253 epilogue_size_needed
= size_needed
;
27255 /* If we are going to call any library calls conditionally, make sure any
27256 pending stack adjustment happen before the first conditional branch,
27257 otherwise they will be emitted before the library call only and won't
27258 happen from the other branches. */
27259 if (dynamic_check
!= -1)
27260 do_pending_stack_adjust ();
27262 desired_align
= decide_alignment (align
, alg
, expected_size
, move_mode
);
27263 if (!TARGET_ALIGN_STRINGOPS
|| noalign
)
27264 align
= desired_align
;
27266 /* Step 1: Prologue guard. */
27268 /* Alignment code needs count to be in register. */
27269 if (CONST_INT_P (count_exp
) && desired_align
> align
)
27271 if (INTVAL (count_exp
) > desired_align
27272 && INTVAL (count_exp
) > size_needed
)
27275 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
27276 if (align_bytes
<= 0)
27279 align_bytes
= desired_align
- align_bytes
;
27281 if (align_bytes
== 0)
27282 count_exp
= force_reg (counter_mode (count_exp
), count_exp
);
27284 gcc_assert (desired_align
>= 1 && align
>= 1);
27286 /* Misaligned move sequences handle both prologue and epilogue at once.
27287 Default code generation results in a smaller code for large alignments
27288 and also avoids redundant job when sizes are known precisely. */
27289 misaligned_prologue_used
27290 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
27291 && MAX (desired_align
, epilogue_size_needed
) <= 32
27292 && desired_align
<= epilogue_size_needed
27293 && ((desired_align
> align
&& !align_bytes
)
27294 || (!count
&& epilogue_size_needed
> 1)));
27296 /* Do the cheap promotion to allow better CSE across the
27297 main loop and epilogue (ie one load of the big constant in the
27299 For now the misaligned move sequences do not have fast path
27300 without broadcasting. */
27301 if (issetmem
&& ((CONST_INT_P (val_exp
) || misaligned_prologue_used
)))
27303 if (alg
== vector_loop
)
27305 gcc_assert (val_exp
== const0_rtx
);
27306 vec_promoted_val
= promote_duplicated_reg (move_mode
, val_exp
);
27307 promoted_val
= promote_duplicated_reg_to_size (val_exp
,
27308 GET_MODE_SIZE (word_mode
),
27309 desired_align
, align
);
27313 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
27314 desired_align
, align
);
27317 /* Misaligned move sequences handles both prologues and epilogues at once.
27318 Default code generation results in smaller code for large alignments and
27319 also avoids redundant job when sizes are known precisely. */
27320 if (misaligned_prologue_used
)
27322 /* Misaligned move prologue handled small blocks by itself. */
27323 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
27324 (dst
, src
, &destreg
, &srcreg
,
27325 move_mode
, promoted_val
, vec_promoted_val
,
27327 &jump_around_label
,
27328 desired_align
< align
27329 ? MAX (desired_align
, epilogue_size_needed
) : epilogue_size_needed
,
27330 desired_align
, align
, &min_size
, dynamic_check
, issetmem
);
27332 src
= change_address (src
, BLKmode
, srcreg
);
27333 dst
= change_address (dst
, BLKmode
, destreg
);
27334 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
27335 epilogue_size_needed
= 0;
27336 if (need_zero_guard
27337 && min_size
< (unsigned HOST_WIDE_INT
) size_needed
)
27339 /* It is possible that we copied enough so the main loop will not
27341 gcc_assert (size_needed
> 1);
27342 if (jump_around_label
== NULL_RTX
)
27343 jump_around_label
= gen_label_rtx ();
27344 emit_cmp_and_jump_insns (count_exp
,
27345 GEN_INT (size_needed
),
27346 LTU
, 0, counter_mode (count_exp
), 1, jump_around_label
);
27347 if (expected_size
== -1
27348 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
27349 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
27351 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
27354 /* Ensure that alignment prologue won't copy past end of block. */
27355 else if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
27357 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
27358 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
27359 Make sure it is power of 2. */
27360 epilogue_size_needed
= 1 << (floor_log2 (epilogue_size_needed
) + 1);
27362 /* To improve performance of small blocks, we jump around the VAL
27363 promoting mode. This mean that if the promoted VAL is not constant,
27364 we might not use it in the epilogue and have to use byte
27366 if (issetmem
&& epilogue_size_needed
> 2 && !promoted_val
)
27367 force_loopy_epilogue
= true;
27368 if ((count
&& count
< (unsigned HOST_WIDE_INT
) epilogue_size_needed
)
27369 || max_size
< (unsigned HOST_WIDE_INT
) epilogue_size_needed
)
27371 /* If main algorithm works on QImode, no epilogue is needed.
27372 For small sizes just don't align anything. */
27373 if (size_needed
== 1)
27374 desired_align
= align
;
27379 && min_size
< (unsigned HOST_WIDE_INT
) epilogue_size_needed
)
27381 label
= gen_label_rtx ();
27382 emit_cmp_and_jump_insns (count_exp
,
27383 GEN_INT (epilogue_size_needed
),
27384 LTU
, 0, counter_mode (count_exp
), 1, label
);
27385 if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
27386 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
27388 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
27392 /* Emit code to decide on runtime whether library call or inline should be
27394 if (dynamic_check
!= -1)
27396 if (!issetmem
&& CONST_INT_P (count_exp
))
27398 if (UINTVAL (count_exp
) >= (unsigned HOST_WIDE_INT
)dynamic_check
)
27400 emit_block_copy_via_libcall (dst
, src
, count_exp
);
27401 count_exp
= const0_rtx
;
27407 rtx_code_label
*hot_label
= gen_label_rtx ();
27408 if (jump_around_label
== NULL_RTX
)
27409 jump_around_label
= gen_label_rtx ();
27410 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
27411 LEU
, 0, counter_mode (count_exp
),
27413 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
27415 set_storage_via_libcall (dst
, count_exp
, val_exp
);
27417 emit_block_copy_via_libcall (dst
, src
, count_exp
);
27418 emit_jump (jump_around_label
);
27419 emit_label (hot_label
);
27423 /* Step 2: Alignment prologue. */
27424 /* Do the expensive promotion once we branched off the small blocks. */
27425 if (issetmem
&& !promoted_val
)
27426 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
27427 desired_align
, align
);
27429 if (desired_align
> align
&& !misaligned_prologue_used
)
27431 if (align_bytes
== 0)
27433 /* Except for the first move in prologue, we no longer know
27434 constant offset in aliasing info. It don't seems to worth
27435 the pain to maintain it for the first move, so throw away
27437 dst
= change_address (dst
, BLKmode
, destreg
);
27439 src
= change_address (src
, BLKmode
, srcreg
);
27440 dst
= expand_set_or_movmem_prologue (dst
, src
, destreg
, srcreg
,
27441 promoted_val
, vec_promoted_val
,
27442 count_exp
, align
, desired_align
,
27444 /* At most desired_align - align bytes are copied. */
27445 if (min_size
< (unsigned)(desired_align
- align
))
27448 min_size
-= desired_align
- align
;
27452 /* If we know how many bytes need to be stored before dst is
27453 sufficiently aligned, maintain aliasing info accurately. */
27454 dst
= expand_set_or_movmem_constant_prologue (dst
, &src
, destreg
,
27462 count_exp
= plus_constant (counter_mode (count_exp
),
27463 count_exp
, -align_bytes
);
27464 count
-= align_bytes
;
27465 min_size
-= align_bytes
;
27466 max_size
-= align_bytes
;
27468 if (need_zero_guard
27469 && min_size
< (unsigned HOST_WIDE_INT
) size_needed
27470 && (count
< (unsigned HOST_WIDE_INT
) size_needed
27471 || (align_bytes
== 0
27472 && count
< ((unsigned HOST_WIDE_INT
) size_needed
27473 + desired_align
- align
))))
27475 /* It is possible that we copied enough so the main loop will not
27477 gcc_assert (size_needed
> 1);
27478 if (label
== NULL_RTX
)
27479 label
= gen_label_rtx ();
27480 emit_cmp_and_jump_insns (count_exp
,
27481 GEN_INT (size_needed
),
27482 LTU
, 0, counter_mode (count_exp
), 1, label
);
27483 if (expected_size
== -1
27484 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
27485 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
27487 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
27490 if (label
&& size_needed
== 1)
27492 emit_label (label
);
27493 LABEL_NUSES (label
) = 1;
27495 epilogue_size_needed
= 1;
27497 promoted_val
= val_exp
;
27499 else if (label
== NULL_RTX
&& !misaligned_prologue_used
)
27500 epilogue_size_needed
= size_needed
;
27502 /* Step 3: Main loop. */
27509 gcc_unreachable ();
27512 case unrolled_loop
:
27513 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, promoted_val
,
27514 count_exp
, move_mode
, unroll_factor
,
27515 expected_size
, issetmem
);
27518 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
,
27519 vec_promoted_val
, count_exp
, move_mode
,
27520 unroll_factor
, expected_size
, issetmem
);
27522 case rep_prefix_8_byte
:
27523 case rep_prefix_4_byte
:
27524 case rep_prefix_1_byte
:
27525 expand_set_or_movmem_via_rep (dst
, src
, destreg
, srcreg
, promoted_val
,
27526 val_exp
, count_exp
, move_mode
, issetmem
);
27529 /* Adjust properly the offset of src and dest memory for aliasing. */
27530 if (CONST_INT_P (count_exp
))
27533 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
27534 (count
/ size_needed
) * size_needed
);
27535 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
27536 (count
/ size_needed
) * size_needed
);
27541 src
= change_address (src
, BLKmode
, srcreg
);
27542 dst
= change_address (dst
, BLKmode
, destreg
);
27545 /* Step 4: Epilogue to copy the remaining bytes. */
27549 /* When the main loop is done, COUNT_EXP might hold original count,
27550 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
27551 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
27552 bytes. Compensate if needed. */
27554 if (size_needed
< epilogue_size_needed
)
27557 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
27558 GEN_INT (size_needed
- 1), count_exp
, 1,
27560 if (tmp
!= count_exp
)
27561 emit_move_insn (count_exp
, tmp
);
27563 emit_label (label
);
27564 LABEL_NUSES (label
) = 1;
27567 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
27569 if (force_loopy_epilogue
)
27570 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
27571 epilogue_size_needed
);
27575 expand_setmem_epilogue (dst
, destreg
, promoted_val
,
27576 vec_promoted_val
, count_exp
,
27577 epilogue_size_needed
);
27579 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
27580 epilogue_size_needed
);
27583 if (jump_around_label
)
27584 emit_label (jump_around_label
);
27589 /* Expand the appropriate insns for doing strlen if not just doing
27592 out = result, initialized with the start address
27593 align_rtx = alignment of the address.
27594 scratch = scratch register, initialized with the startaddress when
27595 not aligned, otherwise undefined
27597 This is just the body. It needs the initializations mentioned above and
27598 some address computing at the end. These things are done in i386.md. */
27601 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
27605 rtx_code_label
*align_2_label
= NULL
;
27606 rtx_code_label
*align_3_label
= NULL
;
27607 rtx_code_label
*align_4_label
= gen_label_rtx ();
27608 rtx_code_label
*end_0_label
= gen_label_rtx ();
27610 rtx tmpreg
= gen_reg_rtx (SImode
);
27611 rtx scratch
= gen_reg_rtx (SImode
);
27615 if (CONST_INT_P (align_rtx
))
27616 align
= INTVAL (align_rtx
);
27618 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
27620 /* Is there a known alignment and is it less than 4? */
27623 rtx scratch1
= gen_reg_rtx (Pmode
);
27624 emit_move_insn (scratch1
, out
);
27625 /* Is there a known alignment and is it not 2? */
27628 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
27629 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
27631 /* Leave just the 3 lower bits. */
27632 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
27633 NULL_RTX
, 0, OPTAB_WIDEN
);
27635 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
27636 Pmode
, 1, align_4_label
);
27637 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
27638 Pmode
, 1, align_2_label
);
27639 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
27640 Pmode
, 1, align_3_label
);
27644 /* Since the alignment is 2, we have to check 2 or 0 bytes;
27645 check if is aligned to 4 - byte. */
27647 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
27648 NULL_RTX
, 0, OPTAB_WIDEN
);
27650 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
27651 Pmode
, 1, align_4_label
);
27654 mem
= change_address (src
, QImode
, out
);
27656 /* Now compare the bytes. */
27658 /* Compare the first n unaligned byte on a byte per byte basis. */
27659 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
27660 QImode
, 1, end_0_label
);
27662 /* Increment the address. */
27663 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
27665 /* Not needed with an alignment of 2 */
27668 emit_label (align_2_label
);
27670 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
27673 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
27675 emit_label (align_3_label
);
27678 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
27681 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
27684 /* Generate loop to check 4 bytes at a time. It is not a good idea to
27685 align this loop. It gives only huge programs, but does not help to
27687 emit_label (align_4_label
);
27689 mem
= change_address (src
, SImode
, out
);
27690 emit_move_insn (scratch
, mem
);
27691 emit_insn (ix86_gen_add3 (out
, out
, GEN_INT (4)));
27693 /* This formula yields a nonzero result iff one of the bytes is zero.
27694 This saves three branches inside loop and many cycles. */
27696 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
27697 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
27698 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
27699 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
27700 gen_int_mode (0x80808080, SImode
)));
27701 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
27706 rtx reg
= gen_reg_rtx (SImode
);
27707 rtx reg2
= gen_reg_rtx (Pmode
);
27708 emit_move_insn (reg
, tmpreg
);
27709 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
27711 /* If zero is not in the first two bytes, move two bytes forward. */
27712 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
27713 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
27714 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
27715 emit_insn (gen_rtx_SET (tmpreg
,
27716 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
27719 /* Emit lea manually to avoid clobbering of flags. */
27720 emit_insn (gen_rtx_SET (reg2
, gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
27722 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
27723 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
27724 emit_insn (gen_rtx_SET (out
,
27725 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
27731 rtx_code_label
*end_2_label
= gen_label_rtx ();
27732 /* Is zero in the first two bytes? */
27734 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
27735 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
27736 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
27737 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
27738 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
27740 tmp
= emit_jump_insn (gen_rtx_SET (pc_rtx
, tmp
));
27741 JUMP_LABEL (tmp
) = end_2_label
;
27743 /* Not in the first two. Move two bytes forward. */
27744 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
27745 emit_insn (ix86_gen_add3 (out
, out
, const2_rtx
));
27747 emit_label (end_2_label
);
27751 /* Avoid branch in fixing the byte. */
27752 tmpreg
= gen_lowpart (QImode
, tmpreg
);
27753 emit_insn (gen_addqi3_cconly_overflow (tmpreg
, tmpreg
));
27754 tmp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
27755 cmp
= gen_rtx_LTU (VOIDmode
, tmp
, const0_rtx
);
27756 emit_insn (ix86_gen_sub3_carry (out
, out
, GEN_INT (3), tmp
, cmp
));
27758 emit_label (end_0_label
);
27761 /* Expand strlen. */
27764 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
27766 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
27768 /* The generic case of strlen expander is long. Avoid it's
27769 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
27771 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
27772 && !TARGET_INLINE_ALL_STRINGOPS
27773 && !optimize_insn_for_size_p ()
27774 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
27777 addr
= force_reg (Pmode
, XEXP (src
, 0));
27778 scratch1
= gen_reg_rtx (Pmode
);
27780 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
27781 && !optimize_insn_for_size_p ())
27783 /* Well it seems that some optimizer does not combine a call like
27784 foo(strlen(bar), strlen(bar));
27785 when the move and the subtraction is done here. It does calculate
27786 the length just once when these instructions are done inside of
27787 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
27788 often used and I use one fewer register for the lifetime of
27789 output_strlen_unroll() this is better. */
27791 emit_move_insn (out
, addr
);
27793 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
27795 /* strlensi_unroll_1 returns the address of the zero at the end of
27796 the string, like memchr(), so compute the length by subtracting
27797 the start address. */
27798 emit_insn (ix86_gen_sub3 (out
, out
, addr
));
27804 /* Can't use this if the user has appropriated eax, ecx, or edi. */
27805 if (fixed_regs
[AX_REG
] || fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
])
27807 /* Can't use this for non-default address spaces. */
27808 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src
)))
27811 scratch2
= gen_reg_rtx (Pmode
);
27812 scratch3
= gen_reg_rtx (Pmode
);
27813 scratch4
= force_reg (Pmode
, constm1_rtx
);
27815 emit_move_insn (scratch3
, addr
);
27816 eoschar
= force_reg (QImode
, eoschar
);
27818 src
= replace_equiv_address_nv (src
, scratch3
);
27820 /* If .md starts supporting :P, this can be done in .md. */
27821 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
27822 scratch4
), UNSPEC_SCAS
);
27823 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
27824 emit_insn (ix86_gen_one_cmpl2 (scratch2
, scratch1
));
27825 emit_insn (ix86_gen_add3 (out
, scratch2
, constm1_rtx
));
27830 /* For given symbol (function) construct code to compute address of it's PLT
27831 entry in large x86-64 PIC model. */
27833 construct_plt_address (rtx symbol
)
27837 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
27838 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
&& !TARGET_PECOFF
);
27839 gcc_assert (Pmode
== DImode
);
27841 tmp
= gen_reg_rtx (Pmode
);
27842 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
27844 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
27845 emit_insn (ix86_gen_add3 (tmp
, tmp
, pic_offset_table_rtx
));
27850 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
27852 rtx pop
, bool sibcall
)
27855 rtx use
= NULL
, call
;
27856 unsigned int vec_len
= 0;
27859 if (GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
27861 fndecl
= SYMBOL_REF_DECL (XEXP (fnaddr
, 0));
27863 && (lookup_attribute ("interrupt",
27864 TYPE_ATTRIBUTES (TREE_TYPE (fndecl
)))))
27865 error ("interrupt service routine can't be called directly");
27868 fndecl
= NULL_TREE
;
27870 if (pop
== const0_rtx
)
27872 gcc_assert (!TARGET_64BIT
|| !pop
);
27874 if (TARGET_MACHO
&& !TARGET_64BIT
)
27877 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
27878 fnaddr
= machopic_indirect_call_target (fnaddr
);
27883 /* Static functions and indirect calls don't need the pic register. Also,
27884 check if PLT was explicitly avoided via no-plt or "noplt" attribute, making
27885 it an indirect call. */
27886 rtx addr
= XEXP (fnaddr
, 0);
27888 && GET_CODE (addr
) == SYMBOL_REF
27889 && !SYMBOL_REF_LOCAL_P (addr
))
27892 && (SYMBOL_REF_DECL (addr
) == NULL_TREE
27893 || !lookup_attribute ("noplt",
27894 DECL_ATTRIBUTES (SYMBOL_REF_DECL (addr
)))))
27897 || (ix86_cmodel
== CM_LARGE_PIC
27898 && DEFAULT_ABI
!= MS_ABI
))
27900 use_reg (&use
, gen_rtx_REG (Pmode
,
27901 REAL_PIC_OFFSET_TABLE_REGNUM
));
27902 if (ix86_use_pseudo_pic_reg ())
27903 emit_move_insn (gen_rtx_REG (Pmode
,
27904 REAL_PIC_OFFSET_TABLE_REGNUM
),
27905 pic_offset_table_rtx
);
27908 else if (!TARGET_PECOFF
&& !TARGET_MACHO
)
27912 fnaddr
= gen_rtx_UNSPEC (Pmode
,
27913 gen_rtvec (1, addr
),
27915 fnaddr
= gen_rtx_CONST (Pmode
, fnaddr
);
27919 fnaddr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
),
27921 fnaddr
= gen_rtx_CONST (Pmode
, fnaddr
);
27922 fnaddr
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
,
27925 fnaddr
= gen_const_mem (Pmode
, fnaddr
);
27926 /* Pmode may not be the same as word_mode for x32, which
27927 doesn't support indirect branch via 32-bit memory slot.
27928 Since x32 GOT slot is 64 bit with zero upper 32 bits,
27929 indirect branch via x32 GOT slot is OK. */
27930 if (GET_MODE (fnaddr
) != word_mode
)
27931 fnaddr
= gen_rtx_ZERO_EXTEND (word_mode
, fnaddr
);
27932 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
27937 /* Skip setting up RAX register for -mskip-rax-setup when there are no
27938 parameters passed in vector registers. */
27940 && (INTVAL (callarg2
) > 0
27941 || (INTVAL (callarg2
) == 0
27942 && (TARGET_SSE
|| !flag_skip_rax_setup
))))
27944 rtx al
= gen_rtx_REG (QImode
, AX_REG
);
27945 emit_move_insn (al
, callarg2
);
27946 use_reg (&use
, al
);
27949 if (ix86_cmodel
== CM_LARGE_PIC
27952 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
27953 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
27954 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
27955 /* Since x32 GOT slot is 64 bit with zero upper 32 bits, indirect
27956 branch via x32 GOT slot is OK. */
27957 else if (!(TARGET_X32
27959 && GET_CODE (XEXP (fnaddr
, 0)) == ZERO_EXTEND
27960 && GOT_memory_operand (XEXP (XEXP (fnaddr
, 0), 0), Pmode
))
27962 ? !sibcall_insn_operand (XEXP (fnaddr
, 0), word_mode
)
27963 : !call_insn_operand (XEXP (fnaddr
, 0), word_mode
)))
27965 fnaddr
= convert_to_mode (word_mode
, XEXP (fnaddr
, 0), 1);
27966 fnaddr
= gen_rtx_MEM (QImode
, copy_to_mode_reg (word_mode
, fnaddr
));
27969 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
27973 /* We should add bounds as destination register in case
27974 pointer with bounds may be returned. */
27975 if (TARGET_MPX
&& SCALAR_INT_MODE_P (GET_MODE (retval
)))
27977 rtx b0
= gen_rtx_REG (BND64mode
, FIRST_BND_REG
);
27978 rtx b1
= gen_rtx_REG (BND64mode
, FIRST_BND_REG
+ 1);
27979 if (GET_CODE (retval
) == PARALLEL
)
27981 b0
= gen_rtx_EXPR_LIST (VOIDmode
, b0
, const0_rtx
);
27982 b1
= gen_rtx_EXPR_LIST (VOIDmode
, b1
, const0_rtx
);
27983 rtx par
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, b0
, b1
));
27984 retval
= chkp_join_splitted_slot (retval
, par
);
27988 retval
= gen_rtx_PARALLEL (VOIDmode
,
27989 gen_rtvec (3, retval
, b0
, b1
));
27990 chkp_put_regs_to_expr_list (retval
);
27994 call
= gen_rtx_SET (retval
, call
);
27996 vec
[vec_len
++] = call
;
28000 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
28001 pop
= gen_rtx_SET (stack_pointer_rtx
, pop
);
28002 vec
[vec_len
++] = pop
;
28005 if (cfun
->machine
->no_caller_saved_registers
28007 || (!TREE_THIS_VOLATILE (fndecl
)
28008 && !lookup_attribute ("no_caller_saved_registers",
28009 TYPE_ATTRIBUTES (TREE_TYPE (fndecl
))))))
28011 static const char ix86_call_used_regs
[] = CALL_USED_REGISTERS
;
28012 bool is_64bit_ms_abi
= (TARGET_64BIT
28013 && ix86_function_abi (fndecl
) == MS_ABI
);
28014 char c_mask
= CALL_USED_REGISTERS_MASK (is_64bit_ms_abi
);
28016 /* If there are no caller-saved registers, add all registers
28017 that are clobbered by the call which returns. */
28018 for (int i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
28020 && (ix86_call_used_regs
[i
] == 1
28021 || (ix86_call_used_regs
[i
] & c_mask
))
28022 && !STACK_REGNO_P (i
)
28023 && !MMX_REGNO_P (i
))
28025 gen_rtx_REG (GET_MODE (regno_reg_rtx
[i
]), i
));
28027 else if (TARGET_64BIT_MS_ABI
28028 && (!callarg2
|| INTVAL (callarg2
) != -2))
28032 for (i
= 0; i
< NUM_X86_64_MS_CLOBBERED_REGS
; i
++)
28034 int regno
= x86_64_ms_sysv_extra_clobbered_registers
[i
];
28035 machine_mode mode
= SSE_REGNO_P (regno
) ? TImode
: DImode
;
28037 clobber_reg (&use
, gen_rtx_REG (mode
, regno
));
28040 /* Set here, but it may get cleared later. */
28041 if (TARGET_CALL_MS2SYSV_XLOGUES
)
28046 /* Don't break hot-patched functions. */
28047 else if (ix86_function_ms_hook_prologue (current_function_decl
))
28050 /* TODO: Cases not yet examined. */
28051 else if (flag_split_stack
)
28052 warn_once_call_ms2sysv_xlogues ("-fsplit-stack");
28056 gcc_assert (!reload_completed
);
28057 cfun
->machine
->call_ms2sysv
= true;
28063 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (vec_len
, vec
));
28064 call
= emit_call_insn (call
);
28066 CALL_INSN_FUNCTION_USAGE (call
) = use
;
28071 /* Return true if the function being called was marked with attribute
28072 "noplt" or using -fno-plt and we are compiling for non-PIC. We need
28073 to handle the non-PIC case in the backend because there is no easy
28074 interface for the front-end to force non-PLT calls to use the GOT.
28075 This is currently used only with 64-bit or 32-bit GOT32X ELF targets
28076 to call the function marked "noplt" indirectly. */
28079 ix86_nopic_noplt_attribute_p (rtx call_op
)
28081 if (flag_pic
|| ix86_cmodel
== CM_LARGE
28082 || !(TARGET_64BIT
|| HAVE_AS_IX86_GOT32X
)
28083 || TARGET_MACHO
|| TARGET_SEH
|| TARGET_PECOFF
28084 || SYMBOL_REF_LOCAL_P (call_op
))
28087 tree symbol_decl
= SYMBOL_REF_DECL (call_op
);
28090 || (symbol_decl
!= NULL_TREE
28091 && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl
))))
28097 /* Output the assembly for a call instruction. */
28100 ix86_output_call_insn (rtx_insn
*insn
, rtx call_op
)
28102 bool direct_p
= constant_call_address_operand (call_op
, VOIDmode
);
28103 bool seh_nop_p
= false;
28106 if (SIBLING_CALL_P (insn
))
28110 if (ix86_nopic_noplt_attribute_p (call_op
))
28113 xasm
= "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
28115 xasm
= "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
28118 xasm
= "%!jmp\t%P0";
28120 /* SEH epilogue detection requires the indirect branch case
28121 to include REX.W. */
28122 else if (TARGET_SEH
)
28123 xasm
= "%!rex.W jmp\t%A0";
28125 xasm
= "%!jmp\t%A0";
28127 output_asm_insn (xasm
, &call_op
);
28131 /* SEH unwinding can require an extra nop to be emitted in several
28132 circumstances. Determine if we have one of those. */
28137 for (i
= NEXT_INSN (insn
); i
; i
= NEXT_INSN (i
))
28139 /* If we get to another real insn, we don't need the nop. */
28143 /* If we get to the epilogue note, prevent a catch region from
28144 being adjacent to the standard epilogue sequence. If non-
28145 call-exceptions, we'll have done this during epilogue emission. */
28146 if (NOTE_P (i
) && NOTE_KIND (i
) == NOTE_INSN_EPILOGUE_BEG
28147 && !flag_non_call_exceptions
28148 && !can_throw_internal (insn
))
28155 /* If we didn't find a real insn following the call, prevent the
28156 unwinder from looking into the next function. */
28163 if (ix86_nopic_noplt_attribute_p (call_op
))
28166 xasm
= "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
28168 xasm
= "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
28171 xasm
= "%!call\t%P0";
28174 xasm
= "%!call\t%A0";
28176 output_asm_insn (xasm
, &call_op
);
28184 /* Clear stack slot assignments remembered from previous functions.
28185 This is called from INIT_EXPANDERS once before RTL is emitted for each
28188 static struct machine_function
*
28189 ix86_init_machine_status (void)
28191 struct machine_function
*f
;
28193 f
= ggc_cleared_alloc
<machine_function
> ();
28194 f
->call_abi
= ix86_abi
;
28199 /* Return a MEM corresponding to a stack slot with mode MODE.
28200 Allocate a new slot if necessary.
28202 The RTL for a function can have several slots available: N is
28203 which slot to use. */
28206 assign_386_stack_local (machine_mode mode
, enum ix86_stack_slot n
)
28208 struct stack_local_entry
*s
;
28210 gcc_assert (n
< MAX_386_STACK_LOCALS
);
28212 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
28213 if (s
->mode
== mode
&& s
->n
== n
)
28214 return validize_mem (copy_rtx (s
->rtl
));
28216 s
= ggc_alloc
<stack_local_entry
> ();
28219 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
28221 s
->next
= ix86_stack_locals
;
28222 ix86_stack_locals
= s
;
28223 return validize_mem (copy_rtx (s
->rtl
));
28227 ix86_instantiate_decls (void)
28229 struct stack_local_entry
*s
;
28231 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
28232 if (s
->rtl
!= NULL_RTX
)
28233 instantiate_decl_rtl (s
->rtl
);
28236 /* Return the number used for encoding REG, in the range 0..7. */
28239 reg_encoded_number (rtx reg
)
28241 unsigned regno
= REGNO (reg
);
28263 if (IN_RANGE (regno
, FIRST_STACK_REG
, LAST_STACK_REG
))
28264 return regno
- FIRST_STACK_REG
;
28265 if (IN_RANGE (regno
, FIRST_SSE_REG
, LAST_SSE_REG
))
28266 return regno
- FIRST_SSE_REG
;
28267 if (IN_RANGE (regno
, FIRST_MMX_REG
, LAST_MMX_REG
))
28268 return regno
- FIRST_MMX_REG
;
28269 if (IN_RANGE (regno
, FIRST_REX_SSE_REG
, LAST_REX_SSE_REG
))
28270 return regno
- FIRST_REX_SSE_REG
;
28271 if (IN_RANGE (regno
, FIRST_REX_INT_REG
, LAST_REX_INT_REG
))
28272 return regno
- FIRST_REX_INT_REG
;
28273 if (IN_RANGE (regno
, FIRST_MASK_REG
, LAST_MASK_REG
))
28274 return regno
- FIRST_MASK_REG
;
28275 if (IN_RANGE (regno
, FIRST_BND_REG
, LAST_BND_REG
))
28276 return regno
- FIRST_BND_REG
;
28280 /* Given an insn INSN with NOPERANDS OPERANDS, return the modr/m byte used
28281 in its encoding if it could be relevant for ROP mitigation, otherwise
28282 return -1. If POPNO0 and POPNO1 are nonnull, store the operand numbers
28283 used for calculating it into them. */
28286 ix86_get_modrm_for_rop (rtx_insn
*insn
, rtx
*operands
, int noperands
,
28287 int *popno0
= 0, int *popno1
= 0)
28289 if (asm_noperands (PATTERN (insn
)) >= 0)
28291 int has_modrm
= get_attr_modrm (insn
);
28294 enum attr_modrm_class cls
= get_attr_modrm_class (insn
);
28298 case MODRM_CLASS_OP02
:
28299 gcc_assert (noperands
>= 3);
28308 case MODRM_CLASS_OP01
:
28309 gcc_assert (noperands
>= 2);
28321 if (REG_P (op0
) && REG_P (op1
))
28323 int enc0
= reg_encoded_number (op0
);
28324 int enc1
= reg_encoded_number (op1
);
28325 return 0xc0 + (enc1
<< 3) + enc0
;
28330 /* Check whether x86 address PARTS is a pc-relative address. */
28333 ix86_rip_relative_addr_p (struct ix86_address
*parts
)
28335 rtx base
, index
, disp
;
28337 base
= parts
->base
;
28338 index
= parts
->index
;
28339 disp
= parts
->disp
;
28341 if (disp
&& !base
&& !index
)
28347 if (GET_CODE (disp
) == CONST
)
28348 symbol
= XEXP (disp
, 0);
28349 if (GET_CODE (symbol
) == PLUS
28350 && CONST_INT_P (XEXP (symbol
, 1)))
28351 symbol
= XEXP (symbol
, 0);
28353 if (GET_CODE (symbol
) == LABEL_REF
28354 || (GET_CODE (symbol
) == SYMBOL_REF
28355 && SYMBOL_REF_TLS_MODEL (symbol
) == 0)
28356 || (GET_CODE (symbol
) == UNSPEC
28357 && (XINT (symbol
, 1) == UNSPEC_GOTPCREL
28358 || XINT (symbol
, 1) == UNSPEC_PCREL
28359 || XINT (symbol
, 1) == UNSPEC_GOTNTPOFF
)))
28366 /* Calculate the length of the memory address in the instruction encoding.
28367 Includes addr32 prefix, does not include the one-byte modrm, opcode,
28368 or other prefixes. We never generate addr32 prefix for LEA insn. */
28371 memory_address_length (rtx addr
, bool lea
)
28373 struct ix86_address parts
;
28374 rtx base
, index
, disp
;
28378 if (GET_CODE (addr
) == PRE_DEC
28379 || GET_CODE (addr
) == POST_INC
28380 || GET_CODE (addr
) == PRE_MODIFY
28381 || GET_CODE (addr
) == POST_MODIFY
)
28384 ok
= ix86_decompose_address (addr
, &parts
);
28387 len
= (parts
.seg
== ADDR_SPACE_GENERIC
) ? 0 : 1;
28389 /* If this is not LEA instruction, add the length of addr32 prefix. */
28390 if (TARGET_64BIT
&& !lea
28391 && (SImode_address_operand (addr
, VOIDmode
)
28392 || (parts
.base
&& GET_MODE (parts
.base
) == SImode
)
28393 || (parts
.index
&& GET_MODE (parts
.index
) == SImode
)))
28397 index
= parts
.index
;
28400 if (base
&& SUBREG_P (base
))
28401 base
= SUBREG_REG (base
);
28402 if (index
&& SUBREG_P (index
))
28403 index
= SUBREG_REG (index
);
28405 gcc_assert (base
== NULL_RTX
|| REG_P (base
));
28406 gcc_assert (index
== NULL_RTX
|| REG_P (index
));
28409 - esp as the base always wants an index,
28410 - ebp as the base always wants a displacement,
28411 - r12 as the base always wants an index,
28412 - r13 as the base always wants a displacement. */
28414 /* Register Indirect. */
28415 if (base
&& !index
&& !disp
)
28417 /* esp (for its index) and ebp (for its displacement) need
28418 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
28420 if (base
== arg_pointer_rtx
28421 || base
== frame_pointer_rtx
28422 || REGNO (base
) == SP_REG
28423 || REGNO (base
) == BP_REG
28424 || REGNO (base
) == R12_REG
28425 || REGNO (base
) == R13_REG
)
28429 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
28430 is not disp32, but disp32(%rip), so for disp32
28431 SIB byte is needed, unless print_operand_address
28432 optimizes it into disp32(%rip) or (%rip) is implied
28434 else if (disp
&& !base
&& !index
)
28437 if (!ix86_rip_relative_addr_p (&parts
))
28442 /* Find the length of the displacement constant. */
28445 if (base
&& satisfies_constraint_K (disp
))
28450 /* ebp always wants a displacement. Similarly r13. */
28451 else if (base
&& (REGNO (base
) == BP_REG
|| REGNO (base
) == R13_REG
))
28454 /* An index requires the two-byte modrm form.... */
28456 /* ...like esp (or r12), which always wants an index. */
28457 || base
== arg_pointer_rtx
28458 || base
== frame_pointer_rtx
28459 || (base
&& (REGNO (base
) == SP_REG
|| REGNO (base
) == R12_REG
)))
28466 /* Compute default value for "length_immediate" attribute. When SHORTFORM
28467 is set, expect that insn have 8bit immediate alternative. */
28469 ix86_attr_length_immediate_default (rtx_insn
*insn
, bool shortform
)
28473 extract_insn_cached (insn
);
28474 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
28475 if (CONSTANT_P (recog_data
.operand
[i
]))
28477 enum attr_mode mode
= get_attr_mode (insn
);
28480 if (shortform
&& CONST_INT_P (recog_data
.operand
[i
]))
28482 HOST_WIDE_INT ival
= INTVAL (recog_data
.operand
[i
]);
28489 ival
= trunc_int_for_mode (ival
, HImode
);
28492 ival
= trunc_int_for_mode (ival
, SImode
);
28497 if (IN_RANGE (ival
, -128, 127))
28514 /* Immediates for DImode instructions are encoded
28515 as 32bit sign extended values. */
28520 fatal_insn ("unknown insn mode", insn
);
28526 /* Compute default value for "length_address" attribute. */
28528 ix86_attr_length_address_default (rtx_insn
*insn
)
28532 if (get_attr_type (insn
) == TYPE_LEA
)
28534 rtx set
= PATTERN (insn
), addr
;
28536 if (GET_CODE (set
) == PARALLEL
)
28537 set
= XVECEXP (set
, 0, 0);
28539 gcc_assert (GET_CODE (set
) == SET
);
28541 addr
= SET_SRC (set
);
28543 return memory_address_length (addr
, true);
28546 extract_insn_cached (insn
);
28547 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
28549 rtx op
= recog_data
.operand
[i
];
28552 constrain_operands_cached (insn
, reload_completed
);
28553 if (which_alternative
!= -1)
28555 const char *constraints
= recog_data
.constraints
[i
];
28556 int alt
= which_alternative
;
28558 while (*constraints
== '=' || *constraints
== '+')
28561 while (*constraints
++ != ',')
28563 /* Skip ignored operands. */
28564 if (*constraints
== 'X')
28568 int len
= memory_address_length (XEXP (op
, 0), false);
28570 /* Account for segment prefix for non-default addr spaces. */
28571 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op
)))
28580 /* Compute default value for "length_vex" attribute. It includes
28581 2 or 3 byte VEX prefix and 1 opcode byte. */
28584 ix86_attr_length_vex_default (rtx_insn
*insn
, bool has_0f_opcode
,
28589 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
28590 byte VEX prefix. */
28591 if (!has_0f_opcode
|| has_vex_w
)
28594 /* We can always use 2 byte VEX prefix in 32bit. */
28598 extract_insn_cached (insn
);
28600 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
28601 if (REG_P (recog_data
.operand
[i
]))
28603 /* REX.W bit uses 3 byte VEX prefix. */
28604 if (GET_MODE (recog_data
.operand
[i
]) == DImode
28605 && GENERAL_REG_P (recog_data
.operand
[i
]))
28610 /* REX.X or REX.B bits use 3 byte VEX prefix. */
28611 if (MEM_P (recog_data
.operand
[i
])
28612 && x86_extended_reg_mentioned_p (recog_data
.operand
[i
]))
28621 ix86_class_likely_spilled_p (reg_class_t
);
28623 /* Returns true if lhs of insn is HW function argument register and set up
28624 is_spilled to true if it is likely spilled HW register. */
28626 insn_is_function_arg (rtx insn
, bool* is_spilled
)
28630 if (!NONDEBUG_INSN_P (insn
))
28632 /* Call instructions are not movable, ignore it. */
28635 insn
= PATTERN (insn
);
28636 if (GET_CODE (insn
) == PARALLEL
)
28637 insn
= XVECEXP (insn
, 0, 0);
28638 if (GET_CODE (insn
) != SET
)
28640 dst
= SET_DEST (insn
);
28641 if (REG_P (dst
) && HARD_REGISTER_P (dst
)
28642 && ix86_function_arg_regno_p (REGNO (dst
)))
28644 /* Is it likely spilled HW register? */
28645 if (!TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (dst
))
28646 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst
))))
28647 *is_spilled
= true;
28653 /* Add output dependencies for chain of function adjacent arguments if only
28654 there is a move to likely spilled HW register. Return first argument
28655 if at least one dependence was added or NULL otherwise. */
28657 add_parameter_dependencies (rtx_insn
*call
, rtx_insn
*head
)
28660 rtx_insn
*last
= call
;
28661 rtx_insn
*first_arg
= NULL
;
28662 bool is_spilled
= false;
28664 head
= PREV_INSN (head
);
28666 /* Find nearest to call argument passing instruction. */
28669 last
= PREV_INSN (last
);
28672 if (!NONDEBUG_INSN_P (last
))
28674 if (insn_is_function_arg (last
, &is_spilled
))
28682 insn
= PREV_INSN (last
);
28683 if (!INSN_P (insn
))
28687 if (!NONDEBUG_INSN_P (insn
))
28692 if (insn_is_function_arg (insn
, &is_spilled
))
28694 /* Add output depdendence between two function arguments if chain
28695 of output arguments contains likely spilled HW registers. */
28697 add_dependence (first_arg
, insn
, REG_DEP_OUTPUT
);
28698 first_arg
= last
= insn
;
28708 /* Add output or anti dependency from insn to first_arg to restrict its code
28711 avoid_func_arg_motion (rtx_insn
*first_arg
, rtx_insn
*insn
)
28716 /* Add anti dependencies for bounds stores. */
28718 && GET_CODE (PATTERN (insn
)) == PARALLEL
28719 && GET_CODE (XVECEXP (PATTERN (insn
), 0, 0)) == UNSPEC
28720 && XINT (XVECEXP (PATTERN (insn
), 0, 0), 1) == UNSPEC_BNDSTX
)
28722 add_dependence (first_arg
, insn
, REG_DEP_ANTI
);
28726 set
= single_set (insn
);
28729 tmp
= SET_DEST (set
);
28732 /* Add output dependency to the first function argument. */
28733 add_dependence (first_arg
, insn
, REG_DEP_OUTPUT
);
28736 /* Add anti dependency. */
28737 add_dependence (first_arg
, insn
, REG_DEP_ANTI
);
28740 /* Avoid cross block motion of function argument through adding dependency
28741 from the first non-jump instruction in bb. */
28743 add_dependee_for_func_arg (rtx_insn
*arg
, basic_block bb
)
28745 rtx_insn
*insn
= BB_END (bb
);
28749 if (NONDEBUG_INSN_P (insn
) && NONJUMP_INSN_P (insn
))
28751 rtx set
= single_set (insn
);
28754 avoid_func_arg_motion (arg
, insn
);
28758 if (insn
== BB_HEAD (bb
))
28760 insn
= PREV_INSN (insn
);
28764 /* Hook for pre-reload schedule - avoid motion of function arguments
28765 passed in likely spilled HW registers. */
28767 ix86_dependencies_evaluation_hook (rtx_insn
*head
, rtx_insn
*tail
)
28770 rtx_insn
*first_arg
= NULL
;
28771 if (reload_completed
)
28773 while (head
!= tail
&& DEBUG_INSN_P (head
))
28774 head
= NEXT_INSN (head
);
28775 for (insn
= tail
; insn
!= head
; insn
= PREV_INSN (insn
))
28776 if (INSN_P (insn
) && CALL_P (insn
))
28778 first_arg
= add_parameter_dependencies (insn
, head
);
28781 /* Add dependee for first argument to predecessors if only
28782 region contains more than one block. */
28783 basic_block bb
= BLOCK_FOR_INSN (insn
);
28784 int rgn
= CONTAINING_RGN (bb
->index
);
28785 int nr_blks
= RGN_NR_BLOCKS (rgn
);
28786 /* Skip trivial regions and region head blocks that can have
28787 predecessors outside of region. */
28788 if (nr_blks
> 1 && BLOCK_TO_BB (bb
->index
) != 0)
28793 /* Regions are SCCs with the exception of selective
28794 scheduling with pipelining of outer blocks enabled.
28795 So also check that immediate predecessors of a non-head
28796 block are in the same region. */
28797 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
28799 /* Avoid creating of loop-carried dependencies through
28800 using topological ordering in the region. */
28801 if (rgn
== CONTAINING_RGN (e
->src
->index
)
28802 && BLOCK_TO_BB (bb
->index
) > BLOCK_TO_BB (e
->src
->index
))
28803 add_dependee_for_func_arg (first_arg
, e
->src
);
28811 else if (first_arg
)
28812 avoid_func_arg_motion (first_arg
, insn
);
28815 /* Hook for pre-reload schedule - set priority of moves from likely spilled
28816 HW registers to maximum, to schedule them at soon as possible. These are
28817 moves from function argument registers at the top of the function entry
28818 and moves from function return value registers after call. */
28820 ix86_adjust_priority (rtx_insn
*insn
, int priority
)
28824 if (reload_completed
)
28827 if (!NONDEBUG_INSN_P (insn
))
28830 set
= single_set (insn
);
28833 rtx tmp
= SET_SRC (set
);
28835 && HARD_REGISTER_P (tmp
)
28836 && !TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (tmp
))
28837 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp
))))
28838 return current_sched_info
->sched_max_insns_priority
;
28844 /* Prepare for scheduling pass. */
28846 ix86_sched_init_global (FILE *, int, int)
28848 /* Install scheduling hooks for current CPU. Some of these hooks are used
28849 in time-critical parts of the scheduler, so we only set them up when
28850 they are actually used. */
28853 case PROCESSOR_CORE2
:
28854 case PROCESSOR_NEHALEM
:
28855 case PROCESSOR_SANDYBRIDGE
:
28856 case PROCESSOR_HASWELL
:
28857 case PROCESSOR_GENERIC
:
28858 /* Do not perform multipass scheduling for pre-reload schedule
28859 to save compile time. */
28860 if (reload_completed
)
28862 ix86_core2i7_init_hooks ();
28865 /* Fall through. */
28867 targetm
.sched
.dfa_post_advance_cycle
= NULL
;
28868 targetm
.sched
.first_cycle_multipass_init
= NULL
;
28869 targetm
.sched
.first_cycle_multipass_begin
= NULL
;
28870 targetm
.sched
.first_cycle_multipass_issue
= NULL
;
28871 targetm
.sched
.first_cycle_multipass_backtrack
= NULL
;
28872 targetm
.sched
.first_cycle_multipass_end
= NULL
;
28873 targetm
.sched
.first_cycle_multipass_fini
= NULL
;
28879 /* Implement TARGET_STATIC_RTX_ALIGNMENT. */
28881 static HOST_WIDE_INT
28882 ix86_static_rtx_alignment (machine_mode mode
)
28884 if (mode
== DFmode
)
28886 if (ALIGN_MODE_128 (mode
))
28887 return MAX (128, GET_MODE_ALIGNMENT (mode
));
28888 return GET_MODE_ALIGNMENT (mode
);
28891 /* Implement TARGET_CONSTANT_ALIGNMENT. */
28893 static HOST_WIDE_INT
28894 ix86_constant_alignment (const_tree exp
, HOST_WIDE_INT align
)
28896 if (TREE_CODE (exp
) == REAL_CST
|| TREE_CODE (exp
) == VECTOR_CST
28897 || TREE_CODE (exp
) == INTEGER_CST
)
28899 machine_mode mode
= TYPE_MODE (TREE_TYPE (exp
));
28900 HOST_WIDE_INT mode_align
= ix86_static_rtx_alignment (mode
);
28901 return MAX (mode_align
, align
);
28903 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
28904 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
28905 return BITS_PER_WORD
;
28910 /* Implement TARGET_EMPTY_RECORD_P. */
28913 ix86_is_empty_record (const_tree type
)
28917 return default_is_empty_record (type
);
28920 /* Implement TARGET_WARN_PARAMETER_PASSING_ABI. */
28923 ix86_warn_parameter_passing_abi (cumulative_args_t cum_v
, tree type
)
28925 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
28927 if (!cum
->warn_empty
)
28930 if (!TYPE_EMPTY_P (type
))
28933 const_tree ctx
= get_ultimate_context (cum
->decl
);
28934 if (ctx
!= NULL_TREE
28935 && !TRANSLATION_UNIT_WARN_EMPTY_P (ctx
))
28938 /* If the actual size of the type is zero, then there is no change
28939 in how objects of this size are passed. */
28940 if (int_size_in_bytes (type
) == 0)
28943 warning (OPT_Wabi
, "empty class %qT parameter passing ABI "
28944 "changes in -fabi-version=12 (GCC 8)", type
);
28946 /* Only warn once. */
28947 cum
->warn_empty
= false;
28950 /* Compute the alignment for a variable for Intel MCU psABI. TYPE is
28951 the data type, and ALIGN is the alignment that the object would
28952 ordinarily have. */
28955 iamcu_alignment (tree type
, int align
)
28959 if (align
< 32 || TYPE_USER_ALIGN (type
))
28962 /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
28964 mode
= TYPE_MODE (strip_array_types (type
));
28965 switch (GET_MODE_CLASS (mode
))
28968 case MODE_COMPLEX_INT
:
28969 case MODE_COMPLEX_FLOAT
:
28971 case MODE_DECIMAL_FLOAT
:
28978 /* Compute the alignment for a static variable.
28979 TYPE is the data type, and ALIGN is the alignment that
28980 the object would ordinarily have. The value of this function is used
28981 instead of that alignment to align the object. */
28984 ix86_data_alignment (tree type
, int align
, bool opt
)
28986 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
28987 for symbols from other compilation units or symbols that don't need
28988 to bind locally. In order to preserve some ABI compatibility with
28989 those compilers, ensure we don't decrease alignment from what we
28992 int max_align_compat
= MIN (256, MAX_OFILE_ALIGNMENT
);
28994 /* A data structure, equal or greater than the size of a cache line
28995 (64 bytes in the Pentium 4 and other recent Intel processors, including
28996 processors based on Intel Core microarchitecture) should be aligned
28997 so that its base address is a multiple of a cache line size. */
29000 = MIN ((unsigned) ix86_tune_cost
->prefetch_block
* 8, MAX_OFILE_ALIGNMENT
);
29002 if (max_align
< BITS_PER_WORD
)
29003 max_align
= BITS_PER_WORD
;
29005 switch (ix86_align_data_type
)
29007 case ix86_align_data_type_abi
: opt
= false; break;
29008 case ix86_align_data_type_compat
: max_align
= BITS_PER_WORD
; break;
29009 case ix86_align_data_type_cacheline
: break;
29013 align
= iamcu_alignment (type
, align
);
29016 && AGGREGATE_TYPE_P (type
)
29017 && TYPE_SIZE (type
)
29018 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
)
29020 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type
)), max_align_compat
)
29021 && align
< max_align_compat
)
29022 align
= max_align_compat
;
29023 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type
)), max_align
)
29024 && align
< max_align
)
29028 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
29029 to 16byte boundary. */
29032 if ((opt
? AGGREGATE_TYPE_P (type
) : TREE_CODE (type
) == ARRAY_TYPE
)
29033 && TYPE_SIZE (type
)
29034 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
29035 && wi::geu_p (wi::to_wide (TYPE_SIZE (type
)), 128)
29043 if (TREE_CODE (type
) == ARRAY_TYPE
)
29045 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
29047 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
29050 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
29053 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
29055 if ((TYPE_MODE (type
) == XCmode
29056 || TYPE_MODE (type
) == TCmode
) && align
< 128)
29059 else if ((TREE_CODE (type
) == RECORD_TYPE
29060 || TREE_CODE (type
) == UNION_TYPE
29061 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
29062 && TYPE_FIELDS (type
))
29064 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
29066 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
29069 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
29070 || TREE_CODE (type
) == INTEGER_TYPE
)
29072 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
29074 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
29081 /* Compute the alignment for a local variable or a stack slot. EXP is
29082 the data type or decl itself, MODE is the widest mode available and
29083 ALIGN is the alignment that the object would ordinarily have. The
29084 value of this macro is used instead of that alignment to align the
29088 ix86_local_alignment (tree exp
, machine_mode mode
,
29089 unsigned int align
)
29093 if (exp
&& DECL_P (exp
))
29095 type
= TREE_TYPE (exp
);
29104 /* Don't do dynamic stack realignment for long long objects with
29105 -mpreferred-stack-boundary=2. */
29108 && ix86_preferred_stack_boundary
< 64
29109 && (mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
29110 && (!type
|| !TYPE_USER_ALIGN (type
))
29111 && (!decl
|| !DECL_USER_ALIGN (decl
)))
29114 /* If TYPE is NULL, we are allocating a stack slot for caller-save
29115 register in MODE. We will return the largest alignment of XF
29119 if (mode
== XFmode
&& align
< GET_MODE_ALIGNMENT (DFmode
))
29120 align
= GET_MODE_ALIGNMENT (DFmode
);
29124 /* Don't increase alignment for Intel MCU psABI. */
29128 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
29129 to 16byte boundary. Exact wording is:
29131 An array uses the same alignment as its elements, except that a local or
29132 global array variable of length at least 16 bytes or
29133 a C99 variable-length array variable always has alignment of at least 16 bytes.
29135 This was added to allow use of aligned SSE instructions at arrays. This
29136 rule is meant for static storage (where compiler can not do the analysis
29137 by itself). We follow it for automatic variables only when convenient.
29138 We fully control everything in the function compiled and functions from
29139 other unit can not rely on the alignment.
29141 Exclude va_list type. It is the common case of local array where
29142 we can not benefit from the alignment.
29144 TODO: Probably one should optimize for size only when var is not escaping. */
29145 if (TARGET_64BIT
&& optimize_function_for_speed_p (cfun
)
29148 if (AGGREGATE_TYPE_P (type
)
29149 && (va_list_type_node
== NULL_TREE
29150 || (TYPE_MAIN_VARIANT (type
)
29151 != TYPE_MAIN_VARIANT (va_list_type_node
)))
29152 && TYPE_SIZE (type
)
29153 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
29154 && wi::geu_p (wi::to_wide (TYPE_SIZE (type
)), 128)
29158 if (TREE_CODE (type
) == ARRAY_TYPE
)
29160 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
29162 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
29165 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
29167 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
29169 if ((TYPE_MODE (type
) == XCmode
29170 || TYPE_MODE (type
) == TCmode
) && align
< 128)
29173 else if ((TREE_CODE (type
) == RECORD_TYPE
29174 || TREE_CODE (type
) == UNION_TYPE
29175 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
29176 && TYPE_FIELDS (type
))
29178 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
29180 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
29183 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
29184 || TREE_CODE (type
) == INTEGER_TYPE
)
29187 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
29189 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
29195 /* Compute the minimum required alignment for dynamic stack realignment
29196 purposes for a local variable, parameter or a stack slot. EXP is
29197 the data type or decl itself, MODE is its mode and ALIGN is the
29198 alignment that the object would ordinarily have. */
29201 ix86_minimum_alignment (tree exp
, machine_mode mode
,
29202 unsigned int align
)
29206 if (exp
&& DECL_P (exp
))
29208 type
= TREE_TYPE (exp
);
29217 if (TARGET_64BIT
|| align
!= 64 || ix86_preferred_stack_boundary
>= 64)
29220 /* Don't do dynamic stack realignment for long long objects with
29221 -mpreferred-stack-boundary=2. */
29222 if ((mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
29223 && (!type
|| !TYPE_USER_ALIGN (type
))
29224 && (!decl
|| !DECL_USER_ALIGN (decl
)))
29226 gcc_checking_assert (!TARGET_STV
);
29233 /* Find a location for the static chain incoming to a nested function.
29234 This is a register, unless all free registers are used by arguments. */
29237 ix86_static_chain (const_tree fndecl_or_type
, bool incoming_p
)
29243 /* We always use R10 in 64-bit mode. */
29248 const_tree fntype
, fndecl
;
29251 /* By default in 32-bit mode we use ECX to pass the static chain. */
29254 if (TREE_CODE (fndecl_or_type
) == FUNCTION_DECL
)
29256 fntype
= TREE_TYPE (fndecl_or_type
);
29257 fndecl
= fndecl_or_type
;
29261 fntype
= fndecl_or_type
;
29265 ccvt
= ix86_get_callcvt (fntype
);
29266 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
29268 /* Fastcall functions use ecx/edx for arguments, which leaves
29269 us with EAX for the static chain.
29270 Thiscall functions use ecx for arguments, which also
29271 leaves us with EAX for the static chain. */
29274 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
29276 /* Thiscall functions use ecx for arguments, which leaves
29277 us with EAX and EDX for the static chain.
29278 We are using for abi-compatibility EAX. */
29281 else if (ix86_function_regparm (fntype
, fndecl
) == 3)
29283 /* For regparm 3, we have no free call-clobbered registers in
29284 which to store the static chain. In order to implement this,
29285 we have the trampoline push the static chain to the stack.
29286 However, we can't push a value below the return address when
29287 we call the nested function directly, so we have to use an
29288 alternate entry point. For this we use ESI, and have the
29289 alternate entry point push ESI, so that things appear the
29290 same once we're executing the nested function. */
29293 if (fndecl
== current_function_decl
29294 && !ix86_static_chain_on_stack
)
29296 gcc_assert (!reload_completed
);
29297 ix86_static_chain_on_stack
= true;
29299 return gen_frame_mem (SImode
,
29300 plus_constant (Pmode
,
29301 arg_pointer_rtx
, -8));
29307 return gen_rtx_REG (Pmode
, regno
);
29310 /* Emit RTL insns to initialize the variable parts of a trampoline.
29311 FNDECL is the decl of the target address; M_TRAMP is a MEM for
29312 the trampoline, and CHAIN_VALUE is an RTX for the static chain
29313 to be passed to the target function. */
29316 ix86_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
29322 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
29328 /* Load the function address to r11. Try to load address using
29329 the shorter movl instead of movabs. We may want to support
29330 movq for kernel mode, but kernel does not use trampolines at
29331 the moment. FNADDR is a 32bit address and may not be in
29332 DImode when ptr_mode == SImode. Always use movl in this
29334 if (ptr_mode
== SImode
29335 || x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
29337 fnaddr
= copy_addr_to_reg (fnaddr
);
29339 mem
= adjust_address (m_tramp
, HImode
, offset
);
29340 emit_move_insn (mem
, gen_int_mode (0xbb41, HImode
));
29342 mem
= adjust_address (m_tramp
, SImode
, offset
+ 2);
29343 emit_move_insn (mem
, gen_lowpart (SImode
, fnaddr
));
29348 mem
= adjust_address (m_tramp
, HImode
, offset
);
29349 emit_move_insn (mem
, gen_int_mode (0xbb49, HImode
));
29351 mem
= adjust_address (m_tramp
, DImode
, offset
+ 2);
29352 emit_move_insn (mem
, fnaddr
);
29356 /* Load static chain using movabs to r10. Use the shorter movl
29357 instead of movabs when ptr_mode == SImode. */
29358 if (ptr_mode
== SImode
)
29369 mem
= adjust_address (m_tramp
, HImode
, offset
);
29370 emit_move_insn (mem
, gen_int_mode (opcode
, HImode
));
29372 mem
= adjust_address (m_tramp
, ptr_mode
, offset
+ 2);
29373 emit_move_insn (mem
, chain_value
);
29376 /* Jump to r11; the last (unused) byte is a nop, only there to
29377 pad the write out to a single 32-bit store. */
29378 mem
= adjust_address (m_tramp
, SImode
, offset
);
29379 emit_move_insn (mem
, gen_int_mode (0x90e3ff49, SImode
));
29386 /* Depending on the static chain location, either load a register
29387 with a constant, or push the constant to the stack. All of the
29388 instructions are the same size. */
29389 chain
= ix86_static_chain (fndecl
, true);
29392 switch (REGNO (chain
))
29395 opcode
= 0xb8; break;
29397 opcode
= 0xb9; break;
29399 gcc_unreachable ();
29405 mem
= adjust_address (m_tramp
, QImode
, offset
);
29406 emit_move_insn (mem
, gen_int_mode (opcode
, QImode
));
29408 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
29409 emit_move_insn (mem
, chain_value
);
29412 mem
= adjust_address (m_tramp
, QImode
, offset
);
29413 emit_move_insn (mem
, gen_int_mode (0xe9, QImode
));
29415 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
29417 /* Compute offset from the end of the jmp to the target function.
29418 In the case in which the trampoline stores the static chain on
29419 the stack, we need to skip the first insn which pushes the
29420 (call-saved) register static chain; this push is 1 byte. */
29422 disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
29423 plus_constant (Pmode
, XEXP (m_tramp
, 0),
29424 offset
- (MEM_P (chain
) ? 1 : 0)),
29425 NULL_RTX
, 1, OPTAB_DIRECT
);
29426 emit_move_insn (mem
, disp
);
29429 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
29431 #ifdef HAVE_ENABLE_EXECUTE_STACK
29432 #ifdef CHECK_EXECUTE_STACK_ENABLED
29433 if (CHECK_EXECUTE_STACK_ENABLED
)
29435 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
29436 LCT_NORMAL
, VOIDmode
, XEXP (m_tramp
, 0), Pmode
);
29441 ix86_allocate_stack_slots_for_args (void)
29443 /* Naked functions should not allocate stack slots for arguments. */
29444 return !ix86_function_naked (current_function_decl
);
29448 ix86_warn_func_return (tree decl
)
29450 /* Naked functions are implemented entirely in assembly, including the
29451 return sequence, so suppress warnings about this. */
29452 return !ix86_function_naked (decl
);
29455 /* The following file contains several enumerations and data structures
29456 built from the definitions in i386-builtin-types.def. */
29458 #include "i386-builtin-types.inc"
29460 /* Table for the ix86 builtin non-function types. */
29461 static GTY(()) tree ix86_builtin_type_tab
[(int) IX86_BT_LAST_CPTR
+ 1];
29463 /* Retrieve an element from the above table, building some of
29464 the types lazily. */
29467 ix86_get_builtin_type (enum ix86_builtin_type tcode
)
29469 unsigned int index
;
29472 gcc_assert ((unsigned)tcode
< ARRAY_SIZE(ix86_builtin_type_tab
));
29474 type
= ix86_builtin_type_tab
[(int) tcode
];
29478 gcc_assert (tcode
> IX86_BT_LAST_PRIM
);
29479 if (tcode
<= IX86_BT_LAST_VECT
)
29483 index
= tcode
- IX86_BT_LAST_PRIM
- 1;
29484 itype
= ix86_get_builtin_type (ix86_builtin_type_vect_base
[index
]);
29485 mode
= ix86_builtin_type_vect_mode
[index
];
29487 type
= build_vector_type_for_mode (itype
, mode
);
29493 index
= tcode
- IX86_BT_LAST_VECT
- 1;
29494 if (tcode
<= IX86_BT_LAST_PTR
)
29495 quals
= TYPE_UNQUALIFIED
;
29497 quals
= TYPE_QUAL_CONST
;
29499 itype
= ix86_get_builtin_type (ix86_builtin_type_ptr_base
[index
]);
29500 if (quals
!= TYPE_UNQUALIFIED
)
29501 itype
= build_qualified_type (itype
, quals
);
29503 type
= build_pointer_type (itype
);
29506 ix86_builtin_type_tab
[(int) tcode
] = type
;
29510 /* Table for the ix86 builtin function types. */
29511 static GTY(()) tree ix86_builtin_func_type_tab
[(int) IX86_BT_LAST_ALIAS
+ 1];
29513 /* Retrieve an element from the above table, building some of
29514 the types lazily. */
29517 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode
)
29521 gcc_assert ((unsigned)tcode
< ARRAY_SIZE (ix86_builtin_func_type_tab
));
29523 type
= ix86_builtin_func_type_tab
[(int) tcode
];
29527 if (tcode
<= IX86_BT_LAST_FUNC
)
29529 unsigned start
= ix86_builtin_func_start
[(int) tcode
];
29530 unsigned after
= ix86_builtin_func_start
[(int) tcode
+ 1];
29531 tree rtype
, atype
, args
= void_list_node
;
29534 rtype
= ix86_get_builtin_type (ix86_builtin_func_args
[start
]);
29535 for (i
= after
- 1; i
> start
; --i
)
29537 atype
= ix86_get_builtin_type (ix86_builtin_func_args
[i
]);
29538 args
= tree_cons (NULL
, atype
, args
);
29541 type
= build_function_type (rtype
, args
);
29545 unsigned index
= tcode
- IX86_BT_LAST_FUNC
- 1;
29546 enum ix86_builtin_func_type icode
;
29548 icode
= ix86_builtin_func_alias_base
[index
];
29549 type
= ix86_get_builtin_func_type (icode
);
29552 ix86_builtin_func_type_tab
[(int) tcode
] = type
;
29557 /* Codes for all the SSE/MMX builtins. Builtins not mentioned in any
29558 bdesc_* arrays below should come first, then builtins for each bdesc_*
29559 array in ascending order, so that we can use direct array accesses. */
29562 IX86_BUILTIN_MASKMOVQ
,
29563 IX86_BUILTIN_LDMXCSR
,
29564 IX86_BUILTIN_STMXCSR
,
29565 IX86_BUILTIN_MASKMOVDQU
,
29566 IX86_BUILTIN_PSLLDQ128
,
29567 IX86_BUILTIN_CLFLUSH
,
29568 IX86_BUILTIN_MONITOR
,
29569 IX86_BUILTIN_MWAIT
,
29570 IX86_BUILTIN_CLZERO
,
29571 IX86_BUILTIN_VEC_INIT_V2SI
,
29572 IX86_BUILTIN_VEC_INIT_V4HI
,
29573 IX86_BUILTIN_VEC_INIT_V8QI
,
29574 IX86_BUILTIN_VEC_EXT_V2DF
,
29575 IX86_BUILTIN_VEC_EXT_V2DI
,
29576 IX86_BUILTIN_VEC_EXT_V4SF
,
29577 IX86_BUILTIN_VEC_EXT_V4SI
,
29578 IX86_BUILTIN_VEC_EXT_V8HI
,
29579 IX86_BUILTIN_VEC_EXT_V2SI
,
29580 IX86_BUILTIN_VEC_EXT_V4HI
,
29581 IX86_BUILTIN_VEC_EXT_V16QI
,
29582 IX86_BUILTIN_VEC_SET_V2DI
,
29583 IX86_BUILTIN_VEC_SET_V4SF
,
29584 IX86_BUILTIN_VEC_SET_V4SI
,
29585 IX86_BUILTIN_VEC_SET_V8HI
,
29586 IX86_BUILTIN_VEC_SET_V4HI
,
29587 IX86_BUILTIN_VEC_SET_V16QI
,
29588 IX86_BUILTIN_GATHERSIV2DF
,
29589 IX86_BUILTIN_GATHERSIV4DF
,
29590 IX86_BUILTIN_GATHERDIV2DF
,
29591 IX86_BUILTIN_GATHERDIV4DF
,
29592 IX86_BUILTIN_GATHERSIV4SF
,
29593 IX86_BUILTIN_GATHERSIV8SF
,
29594 IX86_BUILTIN_GATHERDIV4SF
,
29595 IX86_BUILTIN_GATHERDIV8SF
,
29596 IX86_BUILTIN_GATHERSIV2DI
,
29597 IX86_BUILTIN_GATHERSIV4DI
,
29598 IX86_BUILTIN_GATHERDIV2DI
,
29599 IX86_BUILTIN_GATHERDIV4DI
,
29600 IX86_BUILTIN_GATHERSIV4SI
,
29601 IX86_BUILTIN_GATHERSIV8SI
,
29602 IX86_BUILTIN_GATHERDIV4SI
,
29603 IX86_BUILTIN_GATHERDIV8SI
,
29604 IX86_BUILTIN_VFMSUBSD3_MASK3
,
29605 IX86_BUILTIN_VFMSUBSS3_MASK3
,
29606 IX86_BUILTIN_GATHER3SIV8SF
,
29607 IX86_BUILTIN_GATHER3SIV4SF
,
29608 IX86_BUILTIN_GATHER3SIV4DF
,
29609 IX86_BUILTIN_GATHER3SIV2DF
,
29610 IX86_BUILTIN_GATHER3DIV8SF
,
29611 IX86_BUILTIN_GATHER3DIV4SF
,
29612 IX86_BUILTIN_GATHER3DIV4DF
,
29613 IX86_BUILTIN_GATHER3DIV2DF
,
29614 IX86_BUILTIN_GATHER3SIV8SI
,
29615 IX86_BUILTIN_GATHER3SIV4SI
,
29616 IX86_BUILTIN_GATHER3SIV4DI
,
29617 IX86_BUILTIN_GATHER3SIV2DI
,
29618 IX86_BUILTIN_GATHER3DIV8SI
,
29619 IX86_BUILTIN_GATHER3DIV4SI
,
29620 IX86_BUILTIN_GATHER3DIV4DI
,
29621 IX86_BUILTIN_GATHER3DIV2DI
,
29622 IX86_BUILTIN_SCATTERSIV8SF
,
29623 IX86_BUILTIN_SCATTERSIV4SF
,
29624 IX86_BUILTIN_SCATTERSIV4DF
,
29625 IX86_BUILTIN_SCATTERSIV2DF
,
29626 IX86_BUILTIN_SCATTERDIV8SF
,
29627 IX86_BUILTIN_SCATTERDIV4SF
,
29628 IX86_BUILTIN_SCATTERDIV4DF
,
29629 IX86_BUILTIN_SCATTERDIV2DF
,
29630 IX86_BUILTIN_SCATTERSIV8SI
,
29631 IX86_BUILTIN_SCATTERSIV4SI
,
29632 IX86_BUILTIN_SCATTERSIV4DI
,
29633 IX86_BUILTIN_SCATTERSIV2DI
,
29634 IX86_BUILTIN_SCATTERDIV8SI
,
29635 IX86_BUILTIN_SCATTERDIV4SI
,
29636 IX86_BUILTIN_SCATTERDIV4DI
,
29637 IX86_BUILTIN_SCATTERDIV2DI
,
29638 /* Alternate 4 and 8 element gather/scatter for the vectorizer
29639 where all operands are 32-byte or 64-byte wide respectively. */
29640 IX86_BUILTIN_GATHERALTSIV4DF
,
29641 IX86_BUILTIN_GATHERALTDIV8SF
,
29642 IX86_BUILTIN_GATHERALTSIV4DI
,
29643 IX86_BUILTIN_GATHERALTDIV8SI
,
29644 IX86_BUILTIN_GATHER3ALTDIV16SF
,
29645 IX86_BUILTIN_GATHER3ALTDIV16SI
,
29646 IX86_BUILTIN_GATHER3ALTSIV4DF
,
29647 IX86_BUILTIN_GATHER3ALTDIV8SF
,
29648 IX86_BUILTIN_GATHER3ALTSIV4DI
,
29649 IX86_BUILTIN_GATHER3ALTDIV8SI
,
29650 IX86_BUILTIN_GATHER3ALTSIV8DF
,
29651 IX86_BUILTIN_GATHER3ALTSIV8DI
,
29652 IX86_BUILTIN_GATHER3DIV16SF
,
29653 IX86_BUILTIN_GATHER3DIV16SI
,
29654 IX86_BUILTIN_GATHER3DIV8DF
,
29655 IX86_BUILTIN_GATHER3DIV8DI
,
29656 IX86_BUILTIN_GATHER3SIV16SF
,
29657 IX86_BUILTIN_GATHER3SIV16SI
,
29658 IX86_BUILTIN_GATHER3SIV8DF
,
29659 IX86_BUILTIN_GATHER3SIV8DI
,
29660 IX86_BUILTIN_SCATTERALTSIV8DF
,
29661 IX86_BUILTIN_SCATTERALTDIV16SF
,
29662 IX86_BUILTIN_SCATTERALTSIV8DI
,
29663 IX86_BUILTIN_SCATTERALTDIV16SI
,
29664 IX86_BUILTIN_SCATTERDIV16SF
,
29665 IX86_BUILTIN_SCATTERDIV16SI
,
29666 IX86_BUILTIN_SCATTERDIV8DF
,
29667 IX86_BUILTIN_SCATTERDIV8DI
,
29668 IX86_BUILTIN_SCATTERSIV16SF
,
29669 IX86_BUILTIN_SCATTERSIV16SI
,
29670 IX86_BUILTIN_SCATTERSIV8DF
,
29671 IX86_BUILTIN_SCATTERSIV8DI
,
29672 IX86_BUILTIN_GATHERPFQPD
,
29673 IX86_BUILTIN_GATHERPFDPS
,
29674 IX86_BUILTIN_GATHERPFDPD
,
29675 IX86_BUILTIN_GATHERPFQPS
,
29676 IX86_BUILTIN_SCATTERPFDPD
,
29677 IX86_BUILTIN_SCATTERPFDPS
,
29678 IX86_BUILTIN_SCATTERPFQPD
,
29679 IX86_BUILTIN_SCATTERPFQPS
,
29681 IX86_BUILTIN_CLFLUSHOPT
,
29683 IX86_BUILTIN_HUGE_VALQ
,
29685 IX86_BUILTIN_NANSQ
,
29686 IX86_BUILTIN_XABORT
,
29687 IX86_BUILTIN_ADDCARRYX32
,
29688 IX86_BUILTIN_ADDCARRYX64
,
29689 IX86_BUILTIN_SBB32
,
29690 IX86_BUILTIN_SBB64
,
29691 IX86_BUILTIN_RDRAND16_STEP
,
29692 IX86_BUILTIN_RDRAND32_STEP
,
29693 IX86_BUILTIN_RDRAND64_STEP
,
29694 IX86_BUILTIN_RDSEED16_STEP
,
29695 IX86_BUILTIN_RDSEED32_STEP
,
29696 IX86_BUILTIN_RDSEED64_STEP
,
29697 IX86_BUILTIN_MONITORX
,
29698 IX86_BUILTIN_MWAITX
,
29699 IX86_BUILTIN_CFSTRING
,
29700 IX86_BUILTIN_CPU_INIT
,
29701 IX86_BUILTIN_CPU_IS
,
29702 IX86_BUILTIN_CPU_SUPPORTS
,
29703 IX86_BUILTIN_READ_FLAGS
,
29704 IX86_BUILTIN_WRITE_FLAGS
,
29706 /* All the remaining builtins are tracked in bdesc_* arrays in
29707 i386-builtin.def. Don't add any IX86_BUILTIN_* enumerators after
29709 #define BDESC(mask, icode, name, code, comparison, flag) \
29711 #define BDESC_FIRST(kind, kindu, mask, icode, name, code, comparison, flag) \
29713 IX86_BUILTIN__BDESC_##kindu##_FIRST = code,
29714 #define BDESC_END(kind, next_kind)
29716 #include "i386-builtin.def"
29724 IX86_BUILTIN__BDESC_MAX_FIRST
= IX86_BUILTIN_MAX
,
29726 /* Now just the aliases for bdesc_* start/end. */
29727 #define BDESC(mask, icode, name, code, comparison, flag)
29728 #define BDESC_FIRST(kind, kindu, mask, icode, name, code, comparison, flag)
29729 #define BDESC_END(kind, next_kind) \
29730 IX86_BUILTIN__BDESC_##kind##_LAST \
29731 = IX86_BUILTIN__BDESC_##next_kind##_FIRST - 1,
29733 #include "i386-builtin.def"
29739 /* Just to make sure there is no comma after the last enumerator. */
29740 IX86_BUILTIN__BDESC_MAX_LAST
= IX86_BUILTIN__BDESC_MAX_FIRST
29743 /* Table for the ix86 builtin decls. */
29744 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
29746 /* Table of all of the builtin functions that are possible with different ISA's
29747 but are waiting to be built until a function is declared to use that
29749 struct builtin_isa
{
29750 HOST_WIDE_INT isa
; /* isa_flags this builtin is defined for */
29751 HOST_WIDE_INT isa2
; /* additional isa_flags this builtin is defined for */
29752 const char *name
; /* function name */
29753 enum ix86_builtin_func_type tcode
; /* type to use in the declaration */
29754 unsigned char const_p
:1; /* true if the declaration is constant */
29755 unsigned char pure_p
:1; /* true if the declaration has pure attribute */
29756 bool leaf_p
; /* true if the declaration has leaf attribute */
29757 bool nothrow_p
; /* true if the declaration has nothrow attribute */
29758 bool set_and_not_built_p
;
29761 static struct builtin_isa ix86_builtins_isa
[(int) IX86_BUILTIN_MAX
];
29763 /* Bits that can still enable any inclusion of a builtin. */
29764 static HOST_WIDE_INT deferred_isa_values
= 0;
29765 static HOST_WIDE_INT deferred_isa_values2
= 0;
29767 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
29768 of which isa_flags to use in the ix86_builtins_isa array. Stores the
29769 function decl in the ix86_builtins array. Returns the function decl or
29770 NULL_TREE, if the builtin was not added.
29772 If the front end has a special hook for builtin functions, delay adding
29773 builtin functions that aren't in the current ISA until the ISA is changed
29774 with function specific optimization. Doing so, can save about 300K for the
29775 default compiler. When the builtin is expanded, check at that time whether
29778 If the front end doesn't have a special hook, record all builtins, even if
29779 it isn't an instruction set in the current ISA in case the user uses
29780 function specific options for a different ISA, so that we don't get scope
29781 errors if a builtin is added in the middle of a function scope. */
29784 def_builtin (HOST_WIDE_INT mask
, const char *name
,
29785 enum ix86_builtin_func_type tcode
,
29786 enum ix86_builtins code
)
29788 tree decl
= NULL_TREE
;
29790 if (!(mask
& OPTION_MASK_ISA_64BIT
) || TARGET_64BIT
)
29792 ix86_builtins_isa
[(int) code
].isa
= mask
;
29794 /* OPTION_MASK_ISA_AVX512VL has special meaning. Despite of generic case,
29795 where any bit set means that built-in is enable, this bit must be *and-ed*
29796 with another one. E.g.: OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL
29797 means that *both* cpuid bits must be set for the built-in to be available.
29798 Handle this here. */
29799 if (mask
& ix86_isa_flags
& OPTION_MASK_ISA_AVX512VL
)
29800 mask
&= ~OPTION_MASK_ISA_AVX512VL
;
29802 mask
&= ~OPTION_MASK_ISA_64BIT
;
29804 || (mask
& ix86_isa_flags
) != 0
29805 || (lang_hooks
.builtin_function
29806 == lang_hooks
.builtin_function_ext_scope
))
29809 tree type
= ix86_get_builtin_func_type (tcode
);
29810 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
29812 ix86_builtins
[(int) code
] = decl
;
29813 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= false;
29817 /* Just a MASK where set_and_not_built_p == true can potentially
29818 include a builtin. */
29819 deferred_isa_values
|= mask
;
29820 ix86_builtins
[(int) code
] = NULL_TREE
;
29821 ix86_builtins_isa
[(int) code
].tcode
= tcode
;
29822 ix86_builtins_isa
[(int) code
].name
= name
;
29823 ix86_builtins_isa
[(int) code
].leaf_p
= false;
29824 ix86_builtins_isa
[(int) code
].nothrow_p
= false;
29825 ix86_builtins_isa
[(int) code
].const_p
= false;
29826 ix86_builtins_isa
[(int) code
].pure_p
= false;
29827 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= true;
29834 /* Like def_builtin, but also marks the function decl "const". */
29837 def_builtin_const (HOST_WIDE_INT mask
, const char *name
,
29838 enum ix86_builtin_func_type tcode
, enum ix86_builtins code
)
29840 tree decl
= def_builtin (mask
, name
, tcode
, code
);
29842 TREE_READONLY (decl
) = 1;
29844 ix86_builtins_isa
[(int) code
].const_p
= true;
29849 /* Like def_builtin, but also marks the function decl "pure". */
29852 def_builtin_pure (HOST_WIDE_INT mask
, const char *name
,
29853 enum ix86_builtin_func_type tcode
, enum ix86_builtins code
)
29855 tree decl
= def_builtin (mask
, name
, tcode
, code
);
29857 DECL_PURE_P (decl
) = 1;
29859 ix86_builtins_isa
[(int) code
].pure_p
= true;
29864 /* Like def_builtin, but for additional isa2 flags. */
29867 def_builtin2 (HOST_WIDE_INT mask
, const char *name
,
29868 enum ix86_builtin_func_type tcode
,
29869 enum ix86_builtins code
)
29871 tree decl
= NULL_TREE
;
29873 ix86_builtins_isa
[(int) code
].isa2
= mask
;
29876 || (mask
& ix86_isa_flags2
) != 0
29877 || (lang_hooks
.builtin_function
29878 == lang_hooks
.builtin_function_ext_scope
))
29881 tree type
= ix86_get_builtin_func_type (tcode
);
29882 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
29884 ix86_builtins
[(int) code
] = decl
;
29885 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= false;
29889 /* Just a MASK where set_and_not_built_p == true can potentially
29890 include a builtin. */
29891 deferred_isa_values2
|= mask
;
29892 ix86_builtins
[(int) code
] = NULL_TREE
;
29893 ix86_builtins_isa
[(int) code
].tcode
= tcode
;
29894 ix86_builtins_isa
[(int) code
].name
= name
;
29895 ix86_builtins_isa
[(int) code
].leaf_p
= false;
29896 ix86_builtins_isa
[(int) code
].nothrow_p
= false;
29897 ix86_builtins_isa
[(int) code
].const_p
= false;
29898 ix86_builtins_isa
[(int) code
].pure_p
= false;
29899 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= true;
29905 /* Like def_builtin, but also marks the function decl "const". */
29908 def_builtin_const2 (HOST_WIDE_INT mask
, const char *name
,
29909 enum ix86_builtin_func_type tcode
, enum ix86_builtins code
)
29911 tree decl
= def_builtin2 (mask
, name
, tcode
, code
);
29913 TREE_READONLY (decl
) = 1;
29915 ix86_builtins_isa
[(int) code
].const_p
= true;
29920 /* Like def_builtin, but also marks the function decl "pure". */
29923 def_builtin_pure2 (HOST_WIDE_INT mask
, const char *name
,
29924 enum ix86_builtin_func_type tcode
, enum ix86_builtins code
)
29926 tree decl
= def_builtin2 (mask
, name
, tcode
, code
);
29928 DECL_PURE_P (decl
) = 1;
29930 ix86_builtins_isa
[(int) code
].pure_p
= true;
29935 /* Add any new builtin functions for a given ISA that may not have been
29936 declared. This saves a bit of space compared to adding all of the
29937 declarations to the tree, even if we didn't use them. */
29940 ix86_add_new_builtins (HOST_WIDE_INT isa
, HOST_WIDE_INT isa2
)
29942 if ((isa
& deferred_isa_values
) == 0
29943 && (isa2
& deferred_isa_values2
) == 0)
29946 /* Bits in ISA value can be removed from potential isa values. */
29947 deferred_isa_values
&= ~isa
;
29948 deferred_isa_values2
&= ~isa2
;
29951 tree saved_current_target_pragma
= current_target_pragma
;
29952 current_target_pragma
= NULL_TREE
;
29954 for (i
= 0; i
< (int)IX86_BUILTIN_MAX
; i
++)
29956 if (((ix86_builtins_isa
[i
].isa
& isa
) != 0
29957 || (ix86_builtins_isa
[i
].isa2
& isa2
) != 0)
29958 && ix86_builtins_isa
[i
].set_and_not_built_p
)
29962 /* Don't define the builtin again. */
29963 ix86_builtins_isa
[i
].set_and_not_built_p
= false;
29965 type
= ix86_get_builtin_func_type (ix86_builtins_isa
[i
].tcode
);
29966 decl
= add_builtin_function_ext_scope (ix86_builtins_isa
[i
].name
,
29967 type
, i
, BUILT_IN_MD
, NULL
,
29970 ix86_builtins
[i
] = decl
;
29971 if (ix86_builtins_isa
[i
].const_p
)
29972 TREE_READONLY (decl
) = 1;
29973 if (ix86_builtins_isa
[i
].pure_p
)
29974 DECL_PURE_P (decl
) = 1;
29975 if (ix86_builtins_isa
[i
].leaf_p
)
29976 DECL_ATTRIBUTES (decl
) = build_tree_list (get_identifier ("leaf"),
29978 if (ix86_builtins_isa
[i
].nothrow_p
)
29979 TREE_NOTHROW (decl
) = 1;
29983 current_target_pragma
= saved_current_target_pragma
;
29986 /* Bits for builtin_description.flag. */
29988 /* Set when we don't support the comparison natively, and should
29989 swap_comparison in order to support it. */
29990 #define BUILTIN_DESC_SWAP_OPERANDS 1
29992 struct builtin_description
29994 const HOST_WIDE_INT mask
;
29995 const enum insn_code icode
;
29996 const char *const name
;
29997 const enum ix86_builtins code
;
29998 const enum rtx_code comparison
;
30002 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
30003 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
30004 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
30005 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
30006 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
30007 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
30008 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
30009 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
30010 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
30011 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
30012 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
30013 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
30014 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
30015 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
30016 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
30017 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
30018 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
30019 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
30020 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
30021 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
30022 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
30023 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
30024 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
30025 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
30026 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
30027 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
30028 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
30029 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
30030 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
30031 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
30032 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
30033 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
30034 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
30035 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
30036 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
30037 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
30038 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
30039 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
30040 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
30041 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
30042 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
30043 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
30044 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
30045 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
30046 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
30047 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
30048 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
30049 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
30050 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
30051 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
30052 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
30053 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
30055 #define BDESC(mask, icode, name, code, comparison, flag) \
30056 { mask, icode, name, code, comparison, flag },
30057 #define BDESC_FIRST(kind, kindu, mask, icode, name, code, comparison, flag) \
30058 static const struct builtin_description bdesc_##kind[] = \
30060 BDESC (mask, icode, name, code, comparison, flag)
30061 #define BDESC_END(kind, next_kind) \
30064 #include "i386-builtin.def"
30070 /* TM vector builtins. */
30072 /* Reuse the existing x86-specific `struct builtin_description' cause
30073 we're lazy. Add casts to make them fit. */
30074 static const struct builtin_description bdesc_tm
[] =
30076 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
30077 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
30078 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
30079 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
30080 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
30081 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
30082 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
30084 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
30085 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaRM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
30086 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaWM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
30087 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
30088 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaRM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
30089 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
30090 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RfWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
30092 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
30093 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
30094 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
30095 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
30096 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
30097 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
30098 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
30100 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_LM64", (enum ix86_builtins
) BUILT_IN_TM_LOG_M64
, UNKNOWN
, VOID_FTYPE_PCVOID
},
30101 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_LM128", (enum ix86_builtins
) BUILT_IN_TM_LOG_M128
, UNKNOWN
, VOID_FTYPE_PCVOID
},
30102 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_LM256", (enum ix86_builtins
) BUILT_IN_TM_LOG_M256
, UNKNOWN
, VOID_FTYPE_PCVOID
},
30105 /* Initialize the transactional memory vector load/store builtins. */
30108 ix86_init_tm_builtins (void)
30110 enum ix86_builtin_func_type ftype
;
30111 const struct builtin_description
*d
;
30114 tree attrs_load
, attrs_type_load
, attrs_store
, attrs_type_store
;
30115 tree attrs_log
, attrs_type_log
;
30120 /* If there are no builtins defined, we must be compiling in a
30121 language without trans-mem support. */
30122 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1
))
30125 /* Use whatever attributes a normal TM load has. */
30126 decl
= builtin_decl_explicit (BUILT_IN_TM_LOAD_1
);
30127 attrs_load
= DECL_ATTRIBUTES (decl
);
30128 attrs_type_load
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
30129 /* Use whatever attributes a normal TM store has. */
30130 decl
= builtin_decl_explicit (BUILT_IN_TM_STORE_1
);
30131 attrs_store
= DECL_ATTRIBUTES (decl
);
30132 attrs_type_store
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
30133 /* Use whatever attributes a normal TM log has. */
30134 decl
= builtin_decl_explicit (BUILT_IN_TM_LOG
);
30135 attrs_log
= DECL_ATTRIBUTES (decl
);
30136 attrs_type_log
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
30138 for (i
= 0, d
= bdesc_tm
;
30139 i
< ARRAY_SIZE (bdesc_tm
);
30142 if ((d
->mask
& ix86_isa_flags
) != 0
30143 || (lang_hooks
.builtin_function
30144 == lang_hooks
.builtin_function_ext_scope
))
30146 tree type
, attrs
, attrs_type
;
30147 enum built_in_function code
= (enum built_in_function
) d
->code
;
30149 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
30150 type
= ix86_get_builtin_func_type (ftype
);
30152 if (BUILTIN_TM_LOAD_P (code
))
30154 attrs
= attrs_load
;
30155 attrs_type
= attrs_type_load
;
30157 else if (BUILTIN_TM_STORE_P (code
))
30159 attrs
= attrs_store
;
30160 attrs_type
= attrs_type_store
;
30165 attrs_type
= attrs_type_log
;
30167 decl
= add_builtin_function (d
->name
, type
, code
, BUILT_IN_NORMAL
,
30168 /* The builtin without the prefix for
30169 calling it directly. */
30170 d
->name
+ strlen ("__builtin_"),
30172 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
30173 set the TYPE_ATTRIBUTES. */
30174 decl_attributes (&TREE_TYPE (decl
), attrs_type
, ATTR_FLAG_BUILT_IN
);
30176 set_builtin_decl (code
, decl
, false);
30181 /* Macros for verification of enum ix86_builtins order. */
30182 #define BDESC_VERIFY(x, y, z) \
30183 gcc_checking_assert ((x) == (enum ix86_builtins) ((y) + (z)))
30184 #define BDESC_VERIFYS(x, y, z) \
30185 STATIC_ASSERT ((x) == (enum ix86_builtins) ((y) + (z)))
30187 BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPESTR_FIRST
,
30188 IX86_BUILTIN__BDESC_COMI_LAST
, 1);
30189 BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPISTR_FIRST
,
30190 IX86_BUILTIN__BDESC_PCMPESTR_LAST
, 1);
30191 BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST
,
30192 IX86_BUILTIN__BDESC_PCMPISTR_LAST
, 1);
30193 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS_FIRST
,
30194 IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST
, 1);
30195 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST
,
30196 IX86_BUILTIN__BDESC_ARGS_LAST
, 1);
30197 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS2_FIRST
,
30198 IX86_BUILTIN__BDESC_ROUND_ARGS_LAST
, 1);
30199 BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS2_FIRST
,
30200 IX86_BUILTIN__BDESC_ARGS2_LAST
, 1);
30201 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MPX_FIRST
,
30202 IX86_BUILTIN__BDESC_SPECIAL_ARGS2_LAST
, 1);
30203 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MPX_CONST_FIRST
,
30204 IX86_BUILTIN__BDESC_MPX_LAST
, 1);
30205 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MULTI_ARG_FIRST
,
30206 IX86_BUILTIN__BDESC_MPX_CONST_LAST
, 1);
30207 BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_FIRST
,
30208 IX86_BUILTIN__BDESC_MULTI_ARG_LAST
, 1);
30209 BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_NORMAL_FIRST
,
30210 IX86_BUILTIN__BDESC_CET_LAST
, 1);
30211 BDESC_VERIFYS (IX86_BUILTIN_MAX
,
30212 IX86_BUILTIN__BDESC_CET_NORMAL_LAST
, 1);
30214 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
30215 in the current target ISA to allow the user to compile particular modules
30216 with different target specific options that differ from the command line
30219 ix86_init_mmx_sse_builtins (void)
30221 const struct builtin_description
* d
;
30222 enum ix86_builtin_func_type ftype
;
30225 /* Add all special builtins with variable number of operands. */
30226 for (i
= 0, d
= bdesc_special_args
;
30227 i
< ARRAY_SIZE (bdesc_special_args
);
30230 BDESC_VERIFY (d
->code
, IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST
, i
);
30234 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
30235 def_builtin (d
->mask
, d
->name
, ftype
, d
->code
);
30237 BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST
,
30238 IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST
,
30239 ARRAY_SIZE (bdesc_special_args
) - 1);
30241 /* Add all builtins with variable number of operands. */
30242 for (i
= 0, d
= bdesc_args
;
30243 i
< ARRAY_SIZE (bdesc_args
);
30246 BDESC_VERIFY (d
->code
, IX86_BUILTIN__BDESC_ARGS_FIRST
, i
);
30250 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
30251 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
30253 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS_LAST
,
30254 IX86_BUILTIN__BDESC_ARGS_FIRST
,
30255 ARRAY_SIZE (bdesc_args
) - 1);
30257 /* Add all builtins with variable number of operands. */
30258 for (i
= 0, d
= bdesc_args2
;
30259 i
< ARRAY_SIZE (bdesc_args2
);
30262 BDESC_VERIFY (d
->code
, IX86_BUILTIN__BDESC_ARGS2_FIRST
, i
);
30266 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
30267 def_builtin_const2 (d
->mask
, d
->name
, ftype
, d
->code
);
30269 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS2_LAST
,
30270 IX86_BUILTIN__BDESC_ARGS2_FIRST
,
30271 ARRAY_SIZE (bdesc_args2
) - 1);
30273 for (i
= 0, d
= bdesc_special_args2
;
30274 i
< ARRAY_SIZE (bdesc_special_args2
);
30277 BDESC_VERIFY (d
->code
, IX86_BUILTIN__BDESC_SPECIAL_ARGS2_FIRST
, i
);
30281 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
30282 def_builtin2 (d
->mask
, d
->name
, ftype
, d
->code
);
30284 BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS2_LAST
,
30285 IX86_BUILTIN__BDESC_SPECIAL_ARGS2_FIRST
,
30286 ARRAY_SIZE (bdesc_special_args2
) - 1);
30288 /* Add all builtins with rounding. */
30289 for (i
= 0, d
= bdesc_round_args
;
30290 i
< ARRAY_SIZE (bdesc_round_args
);
30293 BDESC_VERIFY (d
->code
, IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST
, i
);
30297 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
30298 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
30300 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ROUND_ARGS_LAST
,
30301 IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST
,
30302 ARRAY_SIZE (bdesc_round_args
) - 1);
30304 /* pcmpestr[im] insns. */
30305 for (i
= 0, d
= bdesc_pcmpestr
;
30306 i
< ARRAY_SIZE (bdesc_pcmpestr
);
30309 BDESC_VERIFY (d
->code
, IX86_BUILTIN__BDESC_PCMPESTR_FIRST
, i
);
30310 if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
30311 ftype
= V16QI_FTYPE_V16QI_INT_V16QI_INT_INT
;
30313 ftype
= INT_FTYPE_V16QI_INT_V16QI_INT_INT
;
30314 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
30316 BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPESTR_LAST
,
30317 IX86_BUILTIN__BDESC_PCMPESTR_FIRST
,
30318 ARRAY_SIZE (bdesc_pcmpestr
) - 1);
30320 /* pcmpistr[im] insns. */
30321 for (i
= 0, d
= bdesc_pcmpistr
;
30322 i
< ARRAY_SIZE (bdesc_pcmpistr
);
30325 BDESC_VERIFY (d
->code
, IX86_BUILTIN__BDESC_PCMPISTR_FIRST
, i
);
30326 if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
30327 ftype
= V16QI_FTYPE_V16QI_V16QI_INT
;
30329 ftype
= INT_FTYPE_V16QI_V16QI_INT
;
30330 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
30332 BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPISTR_LAST
,
30333 IX86_BUILTIN__BDESC_PCMPISTR_FIRST
,
30334 ARRAY_SIZE (bdesc_pcmpistr
) - 1);
30336 /* comi/ucomi insns. */
30337 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
30339 BDESC_VERIFY (d
->code
, IX86_BUILTIN__BDESC_COMI_FIRST
, i
);
30340 if (d
->mask
== OPTION_MASK_ISA_SSE2
)
30341 ftype
= INT_FTYPE_V2DF_V2DF
;
30343 ftype
= INT_FTYPE_V4SF_V4SF
;
30344 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
30346 BDESC_VERIFYS (IX86_BUILTIN__BDESC_COMI_LAST
,
30347 IX86_BUILTIN__BDESC_COMI_FIRST
,
30348 ARRAY_SIZE (bdesc_comi
) - 1);
30351 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_ldmxcsr",
30352 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_LDMXCSR
);
30353 def_builtin_pure (OPTION_MASK_ISA_SSE
, "__builtin_ia32_stmxcsr",
30354 UNSIGNED_FTYPE_VOID
, IX86_BUILTIN_STMXCSR
);
30356 /* SSE or 3DNow!A */
30357 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
30358 /* As it uses V4HImode, we have to require -mmmx too. */
30359 | OPTION_MASK_ISA_MMX
,
30360 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR
,
30361 IX86_BUILTIN_MASKMOVQ
);
30364 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_maskmovdqu",
30365 VOID_FTYPE_V16QI_V16QI_PCHAR
, IX86_BUILTIN_MASKMOVDQU
);
30367 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_clflush",
30368 VOID_FTYPE_PCVOID
, IX86_BUILTIN_CLFLUSH
);
30369 x86_mfence
= def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_mfence",
30370 VOID_FTYPE_VOID
, IX86_BUILTIN_MFENCE
);
30373 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_monitor",
30374 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MONITOR
);
30375 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_mwait",
30376 VOID_FTYPE_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MWAIT
);
30379 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenc128",
30380 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENC128
);
30381 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenclast128",
30382 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENCLAST128
);
30383 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdec128",
30384 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDEC128
);
30385 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdeclast128",
30386 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDECLAST128
);
30387 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesimc128",
30388 V2DI_FTYPE_V2DI
, IX86_BUILTIN_AESIMC128
);
30389 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aeskeygenassist128",
30390 V2DI_FTYPE_V2DI_INT
, IX86_BUILTIN_AESKEYGENASSIST128
);
30393 def_builtin_const (OPTION_MASK_ISA_PCLMUL
, "__builtin_ia32_pclmulqdq128",
30394 V2DI_FTYPE_V2DI_V2DI_INT
, IX86_BUILTIN_PCLMULQDQ128
);
30397 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand16_step",
30398 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDRAND16_STEP
);
30399 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand32_step",
30400 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDRAND32_STEP
);
30401 def_builtin (OPTION_MASK_ISA_RDRND
| OPTION_MASK_ISA_64BIT
,
30402 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG
,
30403 IX86_BUILTIN_RDRAND64_STEP
);
30406 def_builtin_pure (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2df",
30407 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT
,
30408 IX86_BUILTIN_GATHERSIV2DF
);
30410 def_builtin_pure (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4df",
30411 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT
,
30412 IX86_BUILTIN_GATHERSIV4DF
);
30414 def_builtin_pure (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2df",
30415 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT
,
30416 IX86_BUILTIN_GATHERDIV2DF
);
30418 def_builtin_pure (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4df",
30419 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT
,
30420 IX86_BUILTIN_GATHERDIV4DF
);
30422 def_builtin_pure (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4sf",
30423 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT
,
30424 IX86_BUILTIN_GATHERSIV4SF
);
30426 def_builtin_pure (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8sf",
30427 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT
,
30428 IX86_BUILTIN_GATHERSIV8SF
);
30430 def_builtin_pure (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf",
30431 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT
,
30432 IX86_BUILTIN_GATHERDIV4SF
);
30434 def_builtin_pure (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf256",
30435 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT
,
30436 IX86_BUILTIN_GATHERDIV8SF
);
30438 def_builtin_pure (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2di",
30439 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT
,
30440 IX86_BUILTIN_GATHERSIV2DI
);
30442 def_builtin_pure (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4di",
30443 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT
,
30444 IX86_BUILTIN_GATHERSIV4DI
);
30446 def_builtin_pure (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2di",
30447 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT
,
30448 IX86_BUILTIN_GATHERDIV2DI
);
30450 def_builtin_pure (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4di",
30451 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT
,
30452 IX86_BUILTIN_GATHERDIV4DI
);
30454 def_builtin_pure (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4si",
30455 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT
,
30456 IX86_BUILTIN_GATHERSIV4SI
);
30458 def_builtin_pure (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8si",
30459 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT
,
30460 IX86_BUILTIN_GATHERSIV8SI
);
30462 def_builtin_pure (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si",
30463 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT
,
30464 IX86_BUILTIN_GATHERDIV4SI
);
30466 def_builtin_pure (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si256",
30467 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT
,
30468 IX86_BUILTIN_GATHERDIV8SI
);
30470 def_builtin_pure (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4df ",
30471 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT
,
30472 IX86_BUILTIN_GATHERALTSIV4DF
);
30474 def_builtin_pure (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4sf256 ",
30475 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT
,
30476 IX86_BUILTIN_GATHERALTDIV8SF
);
30478 def_builtin_pure (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4di ",
30479 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT
,
30480 IX86_BUILTIN_GATHERALTSIV4DI
);
30482 def_builtin_pure (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4si256 ",
30483 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT
,
30484 IX86_BUILTIN_GATHERALTDIV8SI
);
30487 def_builtin_pure (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_gathersiv16sf",
30488 V16SF_FTYPE_V16SF_PCVOID_V16SI_HI_INT
,
30489 IX86_BUILTIN_GATHER3SIV16SF
);
30491 def_builtin_pure (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_gathersiv8df",
30492 V8DF_FTYPE_V8DF_PCVOID_V8SI_QI_INT
,
30493 IX86_BUILTIN_GATHER3SIV8DF
);
30495 def_builtin_pure (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_gatherdiv16sf",
30496 V8SF_FTYPE_V8SF_PCVOID_V8DI_QI_INT
,
30497 IX86_BUILTIN_GATHER3DIV16SF
);
30499 def_builtin_pure (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_gatherdiv8df",
30500 V8DF_FTYPE_V8DF_PCVOID_V8DI_QI_INT
,
30501 IX86_BUILTIN_GATHER3DIV8DF
);
30503 def_builtin_pure (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_gathersiv16si",
30504 V16SI_FTYPE_V16SI_PCVOID_V16SI_HI_INT
,
30505 IX86_BUILTIN_GATHER3SIV16SI
);
30507 def_builtin_pure (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_gathersiv8di",
30508 V8DI_FTYPE_V8DI_PCVOID_V8SI_QI_INT
,
30509 IX86_BUILTIN_GATHER3SIV8DI
);
30511 def_builtin_pure (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_gatherdiv16si",
30512 V8SI_FTYPE_V8SI_PCVOID_V8DI_QI_INT
,
30513 IX86_BUILTIN_GATHER3DIV16SI
);
30515 def_builtin_pure (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_gatherdiv8di",
30516 V8DI_FTYPE_V8DI_PCVOID_V8DI_QI_INT
,
30517 IX86_BUILTIN_GATHER3DIV8DI
);
30519 def_builtin_pure (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_gatheraltsiv8df ",
30520 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT
,
30521 IX86_BUILTIN_GATHER3ALTSIV8DF
);
30523 def_builtin_pure (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_gatheraltdiv8sf ",
30524 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT
,
30525 IX86_BUILTIN_GATHER3ALTDIV16SF
);
30527 def_builtin_pure (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_gatheraltsiv8di ",
30528 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT
,
30529 IX86_BUILTIN_GATHER3ALTSIV8DI
);
30531 def_builtin_pure (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_gatheraltdiv8si ",
30532 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT
,
30533 IX86_BUILTIN_GATHER3ALTDIV16SI
);
30535 def_builtin (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_scattersiv16sf",
30536 VOID_FTYPE_PVOID_HI_V16SI_V16SF_INT
,
30537 IX86_BUILTIN_SCATTERSIV16SF
);
30539 def_builtin (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_scattersiv8df",
30540 VOID_FTYPE_PVOID_QI_V8SI_V8DF_INT
,
30541 IX86_BUILTIN_SCATTERSIV8DF
);
30543 def_builtin (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_scatterdiv16sf",
30544 VOID_FTYPE_PVOID_QI_V8DI_V8SF_INT
,
30545 IX86_BUILTIN_SCATTERDIV16SF
);
30547 def_builtin (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_scatterdiv8df",
30548 VOID_FTYPE_PVOID_QI_V8DI_V8DF_INT
,
30549 IX86_BUILTIN_SCATTERDIV8DF
);
30551 def_builtin (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_scattersiv16si",
30552 VOID_FTYPE_PVOID_HI_V16SI_V16SI_INT
,
30553 IX86_BUILTIN_SCATTERSIV16SI
);
30555 def_builtin (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_scattersiv8di",
30556 VOID_FTYPE_PVOID_QI_V8SI_V8DI_INT
,
30557 IX86_BUILTIN_SCATTERSIV8DI
);
30559 def_builtin (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_scatterdiv16si",
30560 VOID_FTYPE_PVOID_QI_V8DI_V8SI_INT
,
30561 IX86_BUILTIN_SCATTERDIV16SI
);
30563 def_builtin (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_scatterdiv8di",
30564 VOID_FTYPE_PVOID_QI_V8DI_V8DI_INT
,
30565 IX86_BUILTIN_SCATTERDIV8DI
);
30568 def_builtin_pure (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_gather3siv2df",
30569 V2DF_FTYPE_V2DF_PCVOID_V4SI_QI_INT
,
30570 IX86_BUILTIN_GATHER3SIV2DF
);
30572 def_builtin_pure (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_gather3siv4df",
30573 V4DF_FTYPE_V4DF_PCVOID_V4SI_QI_INT
,
30574 IX86_BUILTIN_GATHER3SIV4DF
);
30576 def_builtin_pure (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_gather3div2df",
30577 V2DF_FTYPE_V2DF_PCVOID_V2DI_QI_INT
,
30578 IX86_BUILTIN_GATHER3DIV2DF
);
30580 def_builtin_pure (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_gather3div4df",
30581 V4DF_FTYPE_V4DF_PCVOID_V4DI_QI_INT
,
30582 IX86_BUILTIN_GATHER3DIV4DF
);
30584 def_builtin_pure (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_gather3siv4sf",
30585 V4SF_FTYPE_V4SF_PCVOID_V4SI_QI_INT
,
30586 IX86_BUILTIN_GATHER3SIV4SF
);
30588 def_builtin_pure (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_gather3siv8sf",
30589 V8SF_FTYPE_V8SF_PCVOID_V8SI_QI_INT
,
30590 IX86_BUILTIN_GATHER3SIV8SF
);
30592 def_builtin_pure (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_gather3div4sf",
30593 V4SF_FTYPE_V4SF_PCVOID_V2DI_QI_INT
,
30594 IX86_BUILTIN_GATHER3DIV4SF
);
30596 def_builtin_pure (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_gather3div8sf",
30597 V4SF_FTYPE_V4SF_PCVOID_V4DI_QI_INT
,
30598 IX86_BUILTIN_GATHER3DIV8SF
);
30600 def_builtin_pure (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_gather3siv2di",
30601 V2DI_FTYPE_V2DI_PCVOID_V4SI_QI_INT
,
30602 IX86_BUILTIN_GATHER3SIV2DI
);
30604 def_builtin_pure (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_gather3siv4di",
30605 V4DI_FTYPE_V4DI_PCVOID_V4SI_QI_INT
,
30606 IX86_BUILTIN_GATHER3SIV4DI
);
30608 def_builtin_pure (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_gather3div2di",
30609 V2DI_FTYPE_V2DI_PCVOID_V2DI_QI_INT
,
30610 IX86_BUILTIN_GATHER3DIV2DI
);
30612 def_builtin_pure (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_gather3div4di",
30613 V4DI_FTYPE_V4DI_PCVOID_V4DI_QI_INT
,
30614 IX86_BUILTIN_GATHER3DIV4DI
);
30616 def_builtin_pure (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_gather3siv4si",
30617 V4SI_FTYPE_V4SI_PCVOID_V4SI_QI_INT
,
30618 IX86_BUILTIN_GATHER3SIV4SI
);
30620 def_builtin_pure (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_gather3siv8si",
30621 V8SI_FTYPE_V8SI_PCVOID_V8SI_QI_INT
,
30622 IX86_BUILTIN_GATHER3SIV8SI
);
30624 def_builtin_pure (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_gather3div4si",
30625 V4SI_FTYPE_V4SI_PCVOID_V2DI_QI_INT
,
30626 IX86_BUILTIN_GATHER3DIV4SI
);
30628 def_builtin_pure (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_gather3div8si",
30629 V4SI_FTYPE_V4SI_PCVOID_V4DI_QI_INT
,
30630 IX86_BUILTIN_GATHER3DIV8SI
);
30632 def_builtin_pure (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_gather3altsiv4df ",
30633 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT
,
30634 IX86_BUILTIN_GATHER3ALTSIV4DF
);
30636 def_builtin_pure (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_gather3altdiv8sf ",
30637 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT
,
30638 IX86_BUILTIN_GATHER3ALTDIV8SF
);
30640 def_builtin_pure (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_gather3altsiv4di ",
30641 V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT
,
30642 IX86_BUILTIN_GATHER3ALTSIV4DI
);
30644 def_builtin_pure (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_gather3altdiv8si ",
30645 V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT
,
30646 IX86_BUILTIN_GATHER3ALTDIV8SI
);
30648 def_builtin (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_scattersiv8sf",
30649 VOID_FTYPE_PVOID_QI_V8SI_V8SF_INT
,
30650 IX86_BUILTIN_SCATTERSIV8SF
);
30652 def_builtin (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_scattersiv4sf",
30653 VOID_FTYPE_PVOID_QI_V4SI_V4SF_INT
,
30654 IX86_BUILTIN_SCATTERSIV4SF
);
30656 def_builtin (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_scattersiv4df",
30657 VOID_FTYPE_PVOID_QI_V4SI_V4DF_INT
,
30658 IX86_BUILTIN_SCATTERSIV4DF
);
30660 def_builtin (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_scattersiv2df",
30661 VOID_FTYPE_PVOID_QI_V4SI_V2DF_INT
,
30662 IX86_BUILTIN_SCATTERSIV2DF
);
30664 def_builtin (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_scatterdiv8sf",
30665 VOID_FTYPE_PVOID_QI_V4DI_V4SF_INT
,
30666 IX86_BUILTIN_SCATTERDIV8SF
);
30668 def_builtin (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_scatterdiv4sf",
30669 VOID_FTYPE_PVOID_QI_V2DI_V4SF_INT
,
30670 IX86_BUILTIN_SCATTERDIV4SF
);
30672 def_builtin (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_scatterdiv4df",
30673 VOID_FTYPE_PVOID_QI_V4DI_V4DF_INT
,
30674 IX86_BUILTIN_SCATTERDIV4DF
);
30676 def_builtin (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_scatterdiv2df",
30677 VOID_FTYPE_PVOID_QI_V2DI_V2DF_INT
,
30678 IX86_BUILTIN_SCATTERDIV2DF
);
30680 def_builtin (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_scattersiv8si",
30681 VOID_FTYPE_PVOID_QI_V8SI_V8SI_INT
,
30682 IX86_BUILTIN_SCATTERSIV8SI
);
30684 def_builtin (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_scattersiv4si",
30685 VOID_FTYPE_PVOID_QI_V4SI_V4SI_INT
,
30686 IX86_BUILTIN_SCATTERSIV4SI
);
30688 def_builtin (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_scattersiv4di",
30689 VOID_FTYPE_PVOID_QI_V4SI_V4DI_INT
,
30690 IX86_BUILTIN_SCATTERSIV4DI
);
30692 def_builtin (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_scattersiv2di",
30693 VOID_FTYPE_PVOID_QI_V4SI_V2DI_INT
,
30694 IX86_BUILTIN_SCATTERSIV2DI
);
30696 def_builtin (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_scatterdiv8si",
30697 VOID_FTYPE_PVOID_QI_V4DI_V4SI_INT
,
30698 IX86_BUILTIN_SCATTERDIV8SI
);
30700 def_builtin (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_scatterdiv4si",
30701 VOID_FTYPE_PVOID_QI_V2DI_V4SI_INT
,
30702 IX86_BUILTIN_SCATTERDIV4SI
);
30704 def_builtin (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_scatterdiv4di",
30705 VOID_FTYPE_PVOID_QI_V4DI_V4DI_INT
,
30706 IX86_BUILTIN_SCATTERDIV4DI
);
30708 def_builtin (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_scatterdiv2di",
30709 VOID_FTYPE_PVOID_QI_V2DI_V2DI_INT
,
30710 IX86_BUILTIN_SCATTERDIV2DI
);
30711 def_builtin (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_scatteraltsiv8df ",
30712 VOID_FTYPE_PDOUBLE_QI_V16SI_V8DF_INT
,
30713 IX86_BUILTIN_SCATTERALTSIV8DF
);
30715 def_builtin (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_scatteraltdiv8sf ",
30716 VOID_FTYPE_PFLOAT_HI_V8DI_V16SF_INT
,
30717 IX86_BUILTIN_SCATTERALTDIV16SF
);
30719 def_builtin (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_scatteraltsiv8di ",
30720 VOID_FTYPE_PLONGLONG_QI_V16SI_V8DI_INT
,
30721 IX86_BUILTIN_SCATTERALTSIV8DI
);
30723 def_builtin (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_scatteraltdiv8si ",
30724 VOID_FTYPE_PINT_HI_V8DI_V16SI_INT
,
30725 IX86_BUILTIN_SCATTERALTDIV16SI
);
30728 def_builtin (OPTION_MASK_ISA_AVX512PF
, "__builtin_ia32_gatherpfdpd",
30729 VOID_FTYPE_QI_V8SI_PCVOID_INT_INT
,
30730 IX86_BUILTIN_GATHERPFDPD
);
30731 def_builtin (OPTION_MASK_ISA_AVX512PF
, "__builtin_ia32_gatherpfdps",
30732 VOID_FTYPE_HI_V16SI_PCVOID_INT_INT
,
30733 IX86_BUILTIN_GATHERPFDPS
);
30734 def_builtin (OPTION_MASK_ISA_AVX512PF
, "__builtin_ia32_gatherpfqpd",
30735 VOID_FTYPE_QI_V8DI_PCVOID_INT_INT
,
30736 IX86_BUILTIN_GATHERPFQPD
);
30737 def_builtin (OPTION_MASK_ISA_AVX512PF
, "__builtin_ia32_gatherpfqps",
30738 VOID_FTYPE_QI_V8DI_PCVOID_INT_INT
,
30739 IX86_BUILTIN_GATHERPFQPS
);
30740 def_builtin (OPTION_MASK_ISA_AVX512PF
, "__builtin_ia32_scatterpfdpd",
30741 VOID_FTYPE_QI_V8SI_PCVOID_INT_INT
,
30742 IX86_BUILTIN_SCATTERPFDPD
);
30743 def_builtin (OPTION_MASK_ISA_AVX512PF
, "__builtin_ia32_scatterpfdps",
30744 VOID_FTYPE_HI_V16SI_PCVOID_INT_INT
,
30745 IX86_BUILTIN_SCATTERPFDPS
);
30746 def_builtin (OPTION_MASK_ISA_AVX512PF
, "__builtin_ia32_scatterpfqpd",
30747 VOID_FTYPE_QI_V8DI_PCVOID_INT_INT
,
30748 IX86_BUILTIN_SCATTERPFQPD
);
30749 def_builtin (OPTION_MASK_ISA_AVX512PF
, "__builtin_ia32_scatterpfqps",
30750 VOID_FTYPE_QI_V8DI_PCVOID_INT_INT
,
30751 IX86_BUILTIN_SCATTERPFQPS
);
30754 def_builtin_const (OPTION_MASK_ISA_SHA
, "__builtin_ia32_sha1msg1",
30755 V4SI_FTYPE_V4SI_V4SI
, IX86_BUILTIN_SHA1MSG1
);
30756 def_builtin_const (OPTION_MASK_ISA_SHA
, "__builtin_ia32_sha1msg2",
30757 V4SI_FTYPE_V4SI_V4SI
, IX86_BUILTIN_SHA1MSG2
);
30758 def_builtin_const (OPTION_MASK_ISA_SHA
, "__builtin_ia32_sha1nexte",
30759 V4SI_FTYPE_V4SI_V4SI
, IX86_BUILTIN_SHA1NEXTE
);
30760 def_builtin_const (OPTION_MASK_ISA_SHA
, "__builtin_ia32_sha1rnds4",
30761 V4SI_FTYPE_V4SI_V4SI_INT
, IX86_BUILTIN_SHA1RNDS4
);
30762 def_builtin_const (OPTION_MASK_ISA_SHA
, "__builtin_ia32_sha256msg1",
30763 V4SI_FTYPE_V4SI_V4SI
, IX86_BUILTIN_SHA256MSG1
);
30764 def_builtin_const (OPTION_MASK_ISA_SHA
, "__builtin_ia32_sha256msg2",
30765 V4SI_FTYPE_V4SI_V4SI
, IX86_BUILTIN_SHA256MSG2
);
30766 def_builtin_const (OPTION_MASK_ISA_SHA
, "__builtin_ia32_sha256rnds2",
30767 V4SI_FTYPE_V4SI_V4SI_V4SI
, IX86_BUILTIN_SHA256RNDS2
);
30770 def_builtin (OPTION_MASK_ISA_RTM
, "__builtin_ia32_xabort",
30771 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_XABORT
);
30773 /* MMX access to the vec_init patterns. */
30774 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v2si",
30775 V2SI_FTYPE_INT_INT
, IX86_BUILTIN_VEC_INIT_V2SI
);
30777 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v4hi",
30778 V4HI_FTYPE_HI_HI_HI_HI
,
30779 IX86_BUILTIN_VEC_INIT_V4HI
);
30781 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v8qi",
30782 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI
,
30783 IX86_BUILTIN_VEC_INIT_V8QI
);
30785 /* Access to the vec_extract patterns. */
30786 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2df",
30787 DOUBLE_FTYPE_V2DF_INT
, IX86_BUILTIN_VEC_EXT_V2DF
);
30788 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2di",
30789 DI_FTYPE_V2DI_INT
, IX86_BUILTIN_VEC_EXT_V2DI
);
30790 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_vec_ext_v4sf",
30791 FLOAT_FTYPE_V4SF_INT
, IX86_BUILTIN_VEC_EXT_V4SF
);
30792 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v4si",
30793 SI_FTYPE_V4SI_INT
, IX86_BUILTIN_VEC_EXT_V4SI
);
30794 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v8hi",
30795 HI_FTYPE_V8HI_INT
, IX86_BUILTIN_VEC_EXT_V8HI
);
30797 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
30798 /* As it uses V4HImode, we have to require -mmmx too. */
30799 | OPTION_MASK_ISA_MMX
,
30800 "__builtin_ia32_vec_ext_v4hi",
30801 HI_FTYPE_V4HI_INT
, IX86_BUILTIN_VEC_EXT_V4HI
);
30803 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_ext_v2si",
30804 SI_FTYPE_V2SI_INT
, IX86_BUILTIN_VEC_EXT_V2SI
);
30806 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v16qi",
30807 QI_FTYPE_V16QI_INT
, IX86_BUILTIN_VEC_EXT_V16QI
);
30809 /* Access to the vec_set patterns. */
30810 def_builtin_const (OPTION_MASK_ISA_SSE4_1
| OPTION_MASK_ISA_64BIT
,
30811 "__builtin_ia32_vec_set_v2di",
30812 V2DI_FTYPE_V2DI_DI_INT
, IX86_BUILTIN_VEC_SET_V2DI
);
30814 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4sf",
30815 V4SF_FTYPE_V4SF_FLOAT_INT
, IX86_BUILTIN_VEC_SET_V4SF
);
30817 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4si",
30818 V4SI_FTYPE_V4SI_SI_INT
, IX86_BUILTIN_VEC_SET_V4SI
);
30820 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_set_v8hi",
30821 V8HI_FTYPE_V8HI_HI_INT
, IX86_BUILTIN_VEC_SET_V8HI
);
30823 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
30824 /* As it uses V4HImode, we have to require -mmmx too. */
30825 | OPTION_MASK_ISA_MMX
,
30826 "__builtin_ia32_vec_set_v4hi",
30827 V4HI_FTYPE_V4HI_HI_INT
, IX86_BUILTIN_VEC_SET_V4HI
);
30829 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v16qi",
30830 V16QI_FTYPE_V16QI_QI_INT
, IX86_BUILTIN_VEC_SET_V16QI
);
30833 def_builtin (OPTION_MASK_ISA_RDSEED
, "__builtin_ia32_rdseed_hi_step",
30834 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDSEED16_STEP
);
30835 def_builtin (OPTION_MASK_ISA_RDSEED
, "__builtin_ia32_rdseed_si_step",
30836 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDSEED32_STEP
);
30837 def_builtin (OPTION_MASK_ISA_RDSEED
| OPTION_MASK_ISA_64BIT
,
30838 "__builtin_ia32_rdseed_di_step",
30839 INT_FTYPE_PULONGLONG
, IX86_BUILTIN_RDSEED64_STEP
);
30842 def_builtin (0, "__builtin_ia32_addcarryx_u32",
30843 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED
, IX86_BUILTIN_ADDCARRYX32
);
30844 def_builtin (OPTION_MASK_ISA_64BIT
,
30845 "__builtin_ia32_addcarryx_u64",
30846 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG
,
30847 IX86_BUILTIN_ADDCARRYX64
);
30850 def_builtin (0, "__builtin_ia32_sbb_u32",
30851 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED
, IX86_BUILTIN_SBB32
);
30852 def_builtin (OPTION_MASK_ISA_64BIT
,
30853 "__builtin_ia32_sbb_u64",
30854 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG
,
30855 IX86_BUILTIN_SBB64
);
30857 /* Read/write FLAGS. */
30858 def_builtin (0, "__builtin_ia32_readeflags_u32",
30859 UNSIGNED_FTYPE_VOID
, IX86_BUILTIN_READ_FLAGS
);
30860 def_builtin (OPTION_MASK_ISA_64BIT
, "__builtin_ia32_readeflags_u64",
30861 UINT64_FTYPE_VOID
, IX86_BUILTIN_READ_FLAGS
);
30862 def_builtin (0, "__builtin_ia32_writeeflags_u32",
30863 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_WRITE_FLAGS
);
30864 def_builtin (OPTION_MASK_ISA_64BIT
, "__builtin_ia32_writeeflags_u64",
30865 VOID_FTYPE_UINT64
, IX86_BUILTIN_WRITE_FLAGS
);
30868 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT
, "__builtin_ia32_clflushopt",
30869 VOID_FTYPE_PCVOID
, IX86_BUILTIN_CLFLUSHOPT
);
30872 def_builtin (OPTION_MASK_ISA_CLWB
, "__builtin_ia32_clwb",
30873 VOID_FTYPE_PCVOID
, IX86_BUILTIN_CLWB
);
30875 /* MONITORX and MWAITX. */
30876 def_builtin (OPTION_MASK_ISA_MWAITX
, "__builtin_ia32_monitorx",
30877 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MONITORX
);
30878 def_builtin (OPTION_MASK_ISA_MWAITX
, "__builtin_ia32_mwaitx",
30879 VOID_FTYPE_UNSIGNED_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MWAITX
);
30882 def_builtin (OPTION_MASK_ISA_CLZERO
, "__builtin_ia32_clzero",
30883 VOID_FTYPE_PCVOID
, IX86_BUILTIN_CLZERO
);
30885 /* Add FMA4 multi-arg argument instructions */
30886 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
30888 BDESC_VERIFY (d
->code
, IX86_BUILTIN__BDESC_MULTI_ARG_FIRST
, i
);
30892 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
30893 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
30895 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MULTI_ARG_LAST
,
30896 IX86_BUILTIN__BDESC_MULTI_ARG_FIRST
,
30897 ARRAY_SIZE (bdesc_multi_arg
) - 1);
30899 /* Add CET inrinsics. */
30900 for (i
= 0, d
= bdesc_cet
; i
< ARRAY_SIZE (bdesc_cet
); i
++, d
++)
30902 BDESC_VERIFY (d
->code
, IX86_BUILTIN__BDESC_CET_FIRST
, i
);
30906 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
30907 def_builtin2 (d
->mask
, d
->name
, ftype
, d
->code
);
30909 BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_LAST
,
30910 IX86_BUILTIN__BDESC_CET_FIRST
,
30911 ARRAY_SIZE (bdesc_cet
) - 1);
30913 for (i
= 0, d
= bdesc_cet_rdssp
;
30914 i
< ARRAY_SIZE (bdesc_cet_rdssp
);
30917 BDESC_VERIFY (d
->code
, IX86_BUILTIN__BDESC_CET_NORMAL_FIRST
, i
);
30921 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
30922 def_builtin2 (d
->mask
, d
->name
, ftype
, d
->code
);
30924 BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_NORMAL_LAST
,
30925 IX86_BUILTIN__BDESC_CET_NORMAL_FIRST
,
30926 ARRAY_SIZE (bdesc_cet_rdssp
) - 1);
30930 ix86_init_mpx_builtins ()
30932 const struct builtin_description
* d
;
30933 enum ix86_builtin_func_type ftype
;
30937 for (i
= 0, d
= bdesc_mpx
;
30938 i
< ARRAY_SIZE (bdesc_mpx
);
30941 BDESC_VERIFY (d
->code
, IX86_BUILTIN__BDESC_MPX_FIRST
, i
);
30945 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
30946 decl
= def_builtin2 (d
->mask
, d
->name
, ftype
, d
->code
);
30948 /* With no leaf and nothrow flags for MPX builtins
30949 abnormal edges may follow its call when setjmp
30950 presents in the function. Since we may have a lot
30951 of MPX builtins calls it causes lots of useless
30952 edges and enormous PHI nodes. To avoid this we mark
30953 MPX builtins as leaf and nothrow. */
30956 DECL_ATTRIBUTES (decl
) = build_tree_list (get_identifier ("leaf"),
30958 TREE_NOTHROW (decl
) = 1;
30962 ix86_builtins_isa
[(int)d
->code
].leaf_p
= true;
30963 ix86_builtins_isa
[(int)d
->code
].nothrow_p
= true;
30966 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MPX_LAST
,
30967 IX86_BUILTIN__BDESC_MPX_FIRST
,
30968 ARRAY_SIZE (bdesc_mpx
) - 1);
30970 for (i
= 0, d
= bdesc_mpx_const
;
30971 i
< ARRAY_SIZE (bdesc_mpx_const
);
30974 BDESC_VERIFY (d
->code
, IX86_BUILTIN__BDESC_MPX_CONST_FIRST
, i
);
30978 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
30979 decl
= def_builtin_const2 (d
->mask
, d
->name
, ftype
, d
->code
);
30983 DECL_ATTRIBUTES (decl
) = build_tree_list (get_identifier ("leaf"),
30985 TREE_NOTHROW (decl
) = 1;
30989 ix86_builtins_isa
[(int)d
->code
].leaf_p
= true;
30990 ix86_builtins_isa
[(int)d
->code
].nothrow_p
= true;
30993 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MPX_CONST_LAST
,
30994 IX86_BUILTIN__BDESC_MPX_CONST_FIRST
,
30995 ARRAY_SIZE (bdesc_mpx_const
) - 1);
30997 #undef BDESC_VERIFY
30998 #undef BDESC_VERIFYS
31000 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
31001 to return a pointer to VERSION_DECL if the outcome of the expression
31002 formed by PREDICATE_CHAIN is true. This function will be called during
31003 version dispatch to decide which function version to execute. It returns
31004 the basic block at the end, to which more conditions can be added. */
31007 add_condition_to_bb (tree function_decl
, tree version_decl
,
31008 tree predicate_chain
, basic_block new_bb
)
31010 gimple
*return_stmt
;
31011 tree convert_expr
, result_var
;
31012 gimple
*convert_stmt
;
31013 gimple
*call_cond_stmt
;
31014 gimple
*if_else_stmt
;
31016 basic_block bb1
, bb2
, bb3
;
31019 tree cond_var
, and_expr_var
= NULL_TREE
;
31022 tree predicate_decl
, predicate_arg
;
31024 push_cfun (DECL_STRUCT_FUNCTION (function_decl
));
31026 gcc_assert (new_bb
!= NULL
);
31027 gseq
= bb_seq (new_bb
);
31030 convert_expr
= build1 (CONVERT_EXPR
, ptr_type_node
,
31031 build_fold_addr_expr (version_decl
));
31032 result_var
= create_tmp_var (ptr_type_node
);
31033 convert_stmt
= gimple_build_assign (result_var
, convert_expr
);
31034 return_stmt
= gimple_build_return (result_var
);
31036 if (predicate_chain
== NULL_TREE
)
31038 gimple_seq_add_stmt (&gseq
, convert_stmt
);
31039 gimple_seq_add_stmt (&gseq
, return_stmt
);
31040 set_bb_seq (new_bb
, gseq
);
31041 gimple_set_bb (convert_stmt
, new_bb
);
31042 gimple_set_bb (return_stmt
, new_bb
);
31047 while (predicate_chain
!= NULL
)
31049 cond_var
= create_tmp_var (integer_type_node
);
31050 predicate_decl
= TREE_PURPOSE (predicate_chain
);
31051 predicate_arg
= TREE_VALUE (predicate_chain
);
31052 call_cond_stmt
= gimple_build_call (predicate_decl
, 1, predicate_arg
);
31053 gimple_call_set_lhs (call_cond_stmt
, cond_var
);
31055 gimple_set_block (call_cond_stmt
, DECL_INITIAL (function_decl
));
31056 gimple_set_bb (call_cond_stmt
, new_bb
);
31057 gimple_seq_add_stmt (&gseq
, call_cond_stmt
);
31059 predicate_chain
= TREE_CHAIN (predicate_chain
);
31061 if (and_expr_var
== NULL
)
31062 and_expr_var
= cond_var
;
31065 gimple
*assign_stmt
;
31066 /* Use MIN_EXPR to check if any integer is zero?.
31067 and_expr_var = min_expr <cond_var, and_expr_var> */
31068 assign_stmt
= gimple_build_assign (and_expr_var
,
31069 build2 (MIN_EXPR
, integer_type_node
,
31070 cond_var
, and_expr_var
));
31072 gimple_set_block (assign_stmt
, DECL_INITIAL (function_decl
));
31073 gimple_set_bb (assign_stmt
, new_bb
);
31074 gimple_seq_add_stmt (&gseq
, assign_stmt
);
31078 if_else_stmt
= gimple_build_cond (GT_EXPR
, and_expr_var
,
31080 NULL_TREE
, NULL_TREE
);
31081 gimple_set_block (if_else_stmt
, DECL_INITIAL (function_decl
));
31082 gimple_set_bb (if_else_stmt
, new_bb
);
31083 gimple_seq_add_stmt (&gseq
, if_else_stmt
);
31085 gimple_seq_add_stmt (&gseq
, convert_stmt
);
31086 gimple_seq_add_stmt (&gseq
, return_stmt
);
31087 set_bb_seq (new_bb
, gseq
);
31090 e12
= split_block (bb1
, if_else_stmt
);
31092 e12
->flags
&= ~EDGE_FALLTHRU
;
31093 e12
->flags
|= EDGE_TRUE_VALUE
;
31095 e23
= split_block (bb2
, return_stmt
);
31097 gimple_set_bb (convert_stmt
, bb2
);
31098 gimple_set_bb (return_stmt
, bb2
);
31101 make_edge (bb1
, bb3
, EDGE_FALSE_VALUE
);
31104 make_edge (bb2
, EXIT_BLOCK_PTR_FOR_FN (cfun
), 0);
31111 /* This parses the attribute arguments to target in DECL and determines
31112 the right builtin to use to match the platform specification.
31113 It returns the priority value for this version decl. If PREDICATE_LIST
31114 is not NULL, it stores the list of cpu features that need to be checked
31115 before dispatching this function. */
31117 static unsigned int
31118 get_builtin_code_for_version (tree decl
, tree
*predicate_list
)
31121 struct cl_target_option cur_target
;
31123 struct cl_target_option
*new_target
;
31124 const char *arg_str
= NULL
;
31125 const char *attrs_str
= NULL
;
31126 char *tok_str
= NULL
;
31129 /* Priority of i386 features, greater value is higher priority. This is
31130 used to decide the order in which function dispatch must happen. For
31131 instance, a version specialized for SSE4.2 should be checked for dispatch
31132 before a version for SSE3, as SSE4.2 implies SSE3. */
31133 enum feature_priority
31166 enum feature_priority priority
= P_ZERO
;
31168 /* These are the target attribute strings for which a dispatcher is
31169 available, from fold_builtin_cpu. */
31171 static struct _feature_list
31173 const char *const name
;
31174 const enum feature_priority priority
;
31176 const feature_list
[] =
31182 {"sse4a", P_SSE4_A
},
31183 {"ssse3", P_SSSE3
},
31184 {"sse4.1", P_SSE4_1
},
31185 {"sse4.2", P_SSE4_2
},
31186 {"popcnt", P_POPCNT
},
31188 {"pclmul", P_PCLMUL
},
31196 {"avx512f", P_AVX512F
}
31200 static unsigned int NUM_FEATURES
31201 = sizeof (feature_list
) / sizeof (struct _feature_list
);
31205 tree predicate_chain
= NULL_TREE
;
31206 tree predicate_decl
, predicate_arg
;
31208 attrs
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
31209 gcc_assert (attrs
!= NULL
);
31211 attrs
= TREE_VALUE (TREE_VALUE (attrs
));
31213 gcc_assert (TREE_CODE (attrs
) == STRING_CST
);
31214 attrs_str
= TREE_STRING_POINTER (attrs
);
31216 /* Return priority zero for default function. */
31217 if (strcmp (attrs_str
, "default") == 0)
31220 /* Handle arch= if specified. For priority, set it to be 1 more than
31221 the best instruction set the processor can handle. For instance, if
31222 there is a version for atom and a version for ssse3 (the highest ISA
31223 priority for atom), the atom version must be checked for dispatch
31224 before the ssse3 version. */
31225 if (strstr (attrs_str
, "arch=") != NULL
)
31227 cl_target_option_save (&cur_target
, &global_options
);
31228 target_node
= ix86_valid_target_attribute_tree (attrs
, &global_options
,
31229 &global_options_set
);
31231 gcc_assert (target_node
);
31232 new_target
= TREE_TARGET_OPTION (target_node
);
31233 gcc_assert (new_target
);
31235 if (new_target
->arch_specified
&& new_target
->arch
> 0)
31237 switch (new_target
->arch
)
31239 case PROCESSOR_CORE2
:
31241 priority
= P_PROC_SSSE3
;
31243 case PROCESSOR_NEHALEM
:
31244 if (new_target
->x_ix86_isa_flags
& OPTION_MASK_ISA_AES
)
31246 arg_str
= "westmere";
31251 /* We translate "arch=corei7" and "arch=nehalem" to
31252 "corei7" so that it will be mapped to M_INTEL_COREI7
31253 as cpu type to cover all M_INTEL_COREI7_XXXs. */
31254 arg_str
= "corei7";
31255 priority
= P_PROC_SSE4_2
;
31258 case PROCESSOR_SANDYBRIDGE
:
31259 if (new_target
->x_ix86_isa_flags
& OPTION_MASK_ISA_F16C
)
31260 arg_str
= "ivybridge";
31262 arg_str
= "sandybridge";
31263 priority
= P_PROC_AVX
;
31265 case PROCESSOR_HASWELL
:
31266 case PROCESSOR_SKYLAKE_AVX512
:
31267 if (new_target
->x_ix86_isa_flags
& OPTION_MASK_ISA_AVX512VBMI
)
31268 arg_str
= "cannonlake";
31269 else if (new_target
->x_ix86_isa_flags
& OPTION_MASK_ISA_AVX512VL
)
31270 arg_str
= "skylake-avx512";
31271 else if (new_target
->x_ix86_isa_flags
& OPTION_MASK_ISA_XSAVES
)
31272 arg_str
= "skylake";
31273 else if (new_target
->x_ix86_isa_flags
& OPTION_MASK_ISA_ADX
)
31274 arg_str
= "broadwell";
31276 arg_str
= "haswell";
31277 priority
= P_PROC_AVX2
;
31279 case PROCESSOR_BONNELL
:
31280 arg_str
= "bonnell";
31281 priority
= P_PROC_SSSE3
;
31283 case PROCESSOR_KNL
:
31285 priority
= P_PROC_AVX512F
;
31287 case PROCESSOR_KNM
:
31289 priority
= P_PROC_AVX512F
;
31291 case PROCESSOR_SILVERMONT
:
31292 arg_str
= "silvermont";
31293 priority
= P_PROC_SSE4_2
;
31295 case PROCESSOR_AMDFAM10
:
31296 arg_str
= "amdfam10h";
31297 priority
= P_PROC_SSE4_A
;
31299 case PROCESSOR_BTVER1
:
31300 arg_str
= "btver1";
31301 priority
= P_PROC_SSE4_A
;
31303 case PROCESSOR_BTVER2
:
31304 arg_str
= "btver2";
31305 priority
= P_PROC_BMI
;
31307 case PROCESSOR_BDVER1
:
31308 arg_str
= "bdver1";
31309 priority
= P_PROC_XOP
;
31311 case PROCESSOR_BDVER2
:
31312 arg_str
= "bdver2";
31313 priority
= P_PROC_FMA
;
31315 case PROCESSOR_BDVER3
:
31316 arg_str
= "bdver3";
31317 priority
= P_PROC_FMA
;
31319 case PROCESSOR_BDVER4
:
31320 arg_str
= "bdver4";
31321 priority
= P_PROC_AVX2
;
31323 case PROCESSOR_ZNVER1
:
31324 arg_str
= "znver1";
31325 priority
= P_PROC_AVX2
;
31330 cl_target_option_restore (&global_options
, &cur_target
);
31332 if (predicate_list
&& arg_str
== NULL
)
31334 error_at (DECL_SOURCE_LOCATION (decl
),
31335 "No dispatcher found for the versioning attributes");
31339 if (predicate_list
)
31341 predicate_decl
= ix86_builtins
[(int) IX86_BUILTIN_CPU_IS
];
31342 /* For a C string literal the length includes the trailing NULL. */
31343 predicate_arg
= build_string_literal (strlen (arg_str
) + 1, arg_str
);
31344 predicate_chain
= tree_cons (predicate_decl
, predicate_arg
,
31349 /* Process feature name. */
31350 tok_str
= (char *) xmalloc (strlen (attrs_str
) + 1);
31351 strcpy (tok_str
, attrs_str
);
31352 token
= strtok (tok_str
, ",");
31353 predicate_decl
= ix86_builtins
[(int) IX86_BUILTIN_CPU_SUPPORTS
];
31355 while (token
!= NULL
)
31357 /* Do not process "arch=" */
31358 if (strncmp (token
, "arch=", 5) == 0)
31360 token
= strtok (NULL
, ",");
31363 for (i
= 0; i
< NUM_FEATURES
; ++i
)
31365 if (strcmp (token
, feature_list
[i
].name
) == 0)
31367 if (predicate_list
)
31369 predicate_arg
= build_string_literal (
31370 strlen (feature_list
[i
].name
) + 1,
31371 feature_list
[i
].name
);
31372 predicate_chain
= tree_cons (predicate_decl
, predicate_arg
,
31375 /* Find the maximum priority feature. */
31376 if (feature_list
[i
].priority
> priority
)
31377 priority
= feature_list
[i
].priority
;
31382 if (predicate_list
&& i
== NUM_FEATURES
)
31384 error_at (DECL_SOURCE_LOCATION (decl
),
31385 "No dispatcher found for %s", token
);
31388 token
= strtok (NULL
, ",");
31392 if (predicate_list
&& predicate_chain
== NULL_TREE
)
31394 error_at (DECL_SOURCE_LOCATION (decl
),
31395 "No dispatcher found for the versioning attributes : %s",
31399 else if (predicate_list
)
31401 predicate_chain
= nreverse (predicate_chain
);
31402 *predicate_list
= predicate_chain
;
31408 /* This compares the priority of target features in function DECL1
31409 and DECL2. It returns positive value if DECL1 is higher priority,
31410 negative value if DECL2 is higher priority and 0 if they are the
31414 ix86_compare_version_priority (tree decl1
, tree decl2
)
31416 unsigned int priority1
= get_builtin_code_for_version (decl1
, NULL
);
31417 unsigned int priority2
= get_builtin_code_for_version (decl2
, NULL
);
31419 return (int)priority1
- (int)priority2
;
31422 /* V1 and V2 point to function versions with different priorities
31423 based on the target ISA. This function compares their priorities. */
31426 feature_compare (const void *v1
, const void *v2
)
31428 typedef struct _function_version_info
31431 tree predicate_chain
;
31432 unsigned int dispatch_priority
;
31433 } function_version_info
;
31435 const function_version_info c1
= *(const function_version_info
*)v1
;
31436 const function_version_info c2
= *(const function_version_info
*)v2
;
31437 return (c2
.dispatch_priority
- c1
.dispatch_priority
);
31440 /* This function generates the dispatch function for
31441 multi-versioned functions. DISPATCH_DECL is the function which will
31442 contain the dispatch logic. FNDECLS are the function choices for
31443 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
31444 in DISPATCH_DECL in which the dispatch code is generated. */
31447 dispatch_function_versions (tree dispatch_decl
,
31449 basic_block
*empty_bb
)
31452 gimple
*ifunc_cpu_init_stmt
;
31456 vec
<tree
> *fndecls
;
31457 unsigned int num_versions
= 0;
31458 unsigned int actual_versions
= 0;
31461 struct _function_version_info
31464 tree predicate_chain
;
31465 unsigned int dispatch_priority
;
31466 }*function_version_info
;
31468 gcc_assert (dispatch_decl
!= NULL
31469 && fndecls_p
!= NULL
31470 && empty_bb
!= NULL
);
31472 /*fndecls_p is actually a vector. */
31473 fndecls
= static_cast<vec
<tree
> *> (fndecls_p
);
31475 /* At least one more version other than the default. */
31476 num_versions
= fndecls
->length ();
31477 gcc_assert (num_versions
>= 2);
31479 function_version_info
= (struct _function_version_info
*)
31480 XNEWVEC (struct _function_version_info
, (num_versions
- 1));
31482 /* The first version in the vector is the default decl. */
31483 default_decl
= (*fndecls
)[0];
31485 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl
));
31487 gseq
= bb_seq (*empty_bb
);
31488 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
31489 constructors, so explicity call __builtin_cpu_init here. */
31490 ifunc_cpu_init_stmt
= gimple_build_call_vec (
31491 ix86_builtins
[(int) IX86_BUILTIN_CPU_INIT
], vNULL
);
31492 gimple_seq_add_stmt (&gseq
, ifunc_cpu_init_stmt
);
31493 gimple_set_bb (ifunc_cpu_init_stmt
, *empty_bb
);
31494 set_bb_seq (*empty_bb
, gseq
);
31499 for (ix
= 1; fndecls
->iterate (ix
, &ele
); ++ix
)
31501 tree version_decl
= ele
;
31502 tree predicate_chain
= NULL_TREE
;
31503 unsigned int priority
;
31504 /* Get attribute string, parse it and find the right predicate decl.
31505 The predicate function could be a lengthy combination of many
31506 features, like arch-type and various isa-variants. */
31507 priority
= get_builtin_code_for_version (version_decl
,
31510 if (predicate_chain
== NULL_TREE
)
31513 function_version_info
[actual_versions
].version_decl
= version_decl
;
31514 function_version_info
[actual_versions
].predicate_chain
31516 function_version_info
[actual_versions
].dispatch_priority
= priority
;
31520 /* Sort the versions according to descending order of dispatch priority. The
31521 priority is based on the ISA. This is not a perfect solution. There
31522 could still be ambiguity. If more than one function version is suitable
31523 to execute, which one should be dispatched? In future, allow the user
31524 to specify a dispatch priority next to the version. */
31525 qsort (function_version_info
, actual_versions
,
31526 sizeof (struct _function_version_info
), feature_compare
);
31528 for (i
= 0; i
< actual_versions
; ++i
)
31529 *empty_bb
= add_condition_to_bb (dispatch_decl
,
31530 function_version_info
[i
].version_decl
,
31531 function_version_info
[i
].predicate_chain
,
31534 /* dispatch default version at the end. */
31535 *empty_bb
= add_condition_to_bb (dispatch_decl
, default_decl
,
31538 free (function_version_info
);
31542 /* This function changes the assembler name for functions that are
31543 versions. If DECL is a function version and has a "target"
31544 attribute, it appends the attribute string to its assembler name. */
31547 ix86_mangle_function_version_assembler_name (tree decl
, tree id
)
31550 const char *orig_name
, *version_string
;
31551 char *attr_str
, *assembler_name
;
31553 if (DECL_DECLARED_INLINE_P (decl
)
31554 && lookup_attribute ("gnu_inline",
31555 DECL_ATTRIBUTES (decl
)))
31556 error_at (DECL_SOURCE_LOCATION (decl
),
31557 "Function versions cannot be marked as gnu_inline,"
31558 " bodies have to be generated");
31560 if (DECL_VIRTUAL_P (decl
)
31561 || DECL_VINDEX (decl
))
31562 sorry ("Virtual function multiversioning not supported");
31564 version_attr
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
31566 /* target attribute string cannot be NULL. */
31567 gcc_assert (version_attr
!= NULL_TREE
);
31569 orig_name
= IDENTIFIER_POINTER (id
);
31571 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr
)));
31573 if (strcmp (version_string
, "default") == 0)
31576 attr_str
= sorted_attr_string (TREE_VALUE (version_attr
));
31577 assembler_name
= XNEWVEC (char, strlen (orig_name
) + strlen (attr_str
) + 2);
31579 sprintf (assembler_name
, "%s.%s", orig_name
, attr_str
);
31581 /* Allow assembler name to be modified if already set. */
31582 if (DECL_ASSEMBLER_NAME_SET_P (decl
))
31583 SET_DECL_RTL (decl
, NULL
);
31585 tree ret
= get_identifier (assembler_name
);
31586 XDELETEVEC (attr_str
);
31587 XDELETEVEC (assembler_name
);
31593 ix86_mangle_decl_assembler_name (tree decl
, tree id
)
31595 /* For function version, add the target suffix to the assembler name. */
31596 if (TREE_CODE (decl
) == FUNCTION_DECL
31597 && DECL_FUNCTION_VERSIONED (decl
))
31598 id
= ix86_mangle_function_version_assembler_name (decl
, id
);
31599 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
31600 id
= SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl
, id
);
31606 /* Make a dispatcher declaration for the multi-versioned function DECL.
31607 Calls to DECL function will be replaced with calls to the dispatcher
31608 by the front-end. Returns the decl of the dispatcher function. */
31611 ix86_get_function_versions_dispatcher (void *decl
)
31613 tree fn
= (tree
) decl
;
31614 struct cgraph_node
*node
= NULL
;
31615 struct cgraph_node
*default_node
= NULL
;
31616 struct cgraph_function_version_info
*node_v
= NULL
;
31617 struct cgraph_function_version_info
*first_v
= NULL
;
31619 tree dispatch_decl
= NULL
;
31621 struct cgraph_function_version_info
*default_version_info
= NULL
;
31623 gcc_assert (fn
!= NULL
&& DECL_FUNCTION_VERSIONED (fn
));
31625 node
= cgraph_node::get (fn
);
31626 gcc_assert (node
!= NULL
);
31628 node_v
= node
->function_version ();
31629 gcc_assert (node_v
!= NULL
);
31631 if (node_v
->dispatcher_resolver
!= NULL
)
31632 return node_v
->dispatcher_resolver
;
31634 /* Find the default version and make it the first node. */
31636 /* Go to the beginning of the chain. */
31637 while (first_v
->prev
!= NULL
)
31638 first_v
= first_v
->prev
;
31639 default_version_info
= first_v
;
31640 while (default_version_info
!= NULL
)
31642 if (is_function_default_version
31643 (default_version_info
->this_node
->decl
))
31645 default_version_info
= default_version_info
->next
;
31648 /* If there is no default node, just return NULL. */
31649 if (default_version_info
== NULL
)
31652 /* Make default info the first node. */
31653 if (first_v
!= default_version_info
)
31655 default_version_info
->prev
->next
= default_version_info
->next
;
31656 if (default_version_info
->next
)
31657 default_version_info
->next
->prev
= default_version_info
->prev
;
31658 first_v
->prev
= default_version_info
;
31659 default_version_info
->next
= first_v
;
31660 default_version_info
->prev
= NULL
;
31663 default_node
= default_version_info
->this_node
;
31665 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
31666 if (targetm
.has_ifunc_p ())
31668 struct cgraph_function_version_info
*it_v
= NULL
;
31669 struct cgraph_node
*dispatcher_node
= NULL
;
31670 struct cgraph_function_version_info
*dispatcher_version_info
= NULL
;
31672 /* Right now, the dispatching is done via ifunc. */
31673 dispatch_decl
= make_dispatcher_decl (default_node
->decl
);
31675 dispatcher_node
= cgraph_node::get_create (dispatch_decl
);
31676 gcc_assert (dispatcher_node
!= NULL
);
31677 dispatcher_node
->dispatcher_function
= 1;
31678 dispatcher_version_info
31679 = dispatcher_node
->insert_new_function_version ();
31680 dispatcher_version_info
->next
= default_version_info
;
31681 dispatcher_node
->definition
= 1;
31683 /* Set the dispatcher for all the versions. */
31684 it_v
= default_version_info
;
31685 while (it_v
!= NULL
)
31687 it_v
->dispatcher_resolver
= dispatch_decl
;
31694 error_at (DECL_SOURCE_LOCATION (default_node
->decl
),
31695 "multiversioning needs ifunc which is not supported "
31699 return dispatch_decl
;
31702 /* Make the resolver function decl to dispatch the versions of
31703 a multi-versioned function, DEFAULT_DECL. IFUNC_ALIAS_DECL is
31704 ifunc alias that will point to the created resolver. Create an
31705 empty basic block in the resolver and store the pointer in
31706 EMPTY_BB. Return the decl of the resolver function. */
31709 make_resolver_func (const tree default_decl
,
31710 const tree ifunc_alias_decl
,
31711 basic_block
*empty_bb
)
31713 char *resolver_name
;
31714 tree decl
, type
, decl_name
, t
;
31716 /* IFUNC's have to be globally visible. So, if the default_decl is
31717 not, then the name of the IFUNC should be made unique. */
31718 if (TREE_PUBLIC (default_decl
) == 0)
31720 char *ifunc_name
= make_unique_name (default_decl
, "ifunc", true);
31721 symtab
->change_decl_assembler_name (ifunc_alias_decl
,
31722 get_identifier (ifunc_name
));
31723 XDELETEVEC (ifunc_name
);
31726 resolver_name
= make_unique_name (default_decl
, "resolver", false);
31728 /* The resolver function should return a (void *). */
31729 type
= build_function_type_list (ptr_type_node
, NULL_TREE
);
31731 decl
= build_fn_decl (resolver_name
, type
);
31732 decl_name
= get_identifier (resolver_name
);
31733 SET_DECL_ASSEMBLER_NAME (decl
, decl_name
);
31735 DECL_NAME (decl
) = decl_name
;
31736 TREE_USED (decl
) = 1;
31737 DECL_ARTIFICIAL (decl
) = 1;
31738 DECL_IGNORED_P (decl
) = 1;
31739 TREE_PUBLIC (decl
) = 0;
31740 DECL_UNINLINABLE (decl
) = 1;
31742 /* Resolver is not external, body is generated. */
31743 DECL_EXTERNAL (decl
) = 0;
31744 DECL_EXTERNAL (ifunc_alias_decl
) = 0;
31746 DECL_CONTEXT (decl
) = NULL_TREE
;
31747 DECL_INITIAL (decl
) = make_node (BLOCK
);
31748 DECL_STATIC_CONSTRUCTOR (decl
) = 0;
31750 if (DECL_COMDAT_GROUP (default_decl
)
31751 || TREE_PUBLIC (default_decl
))
31753 /* In this case, each translation unit with a call to this
31754 versioned function will put out a resolver. Ensure it
31755 is comdat to keep just one copy. */
31756 DECL_COMDAT (decl
) = 1;
31757 make_decl_one_only (decl
, DECL_ASSEMBLER_NAME (decl
));
31759 /* Build result decl and add to function_decl. */
31760 t
= build_decl (UNKNOWN_LOCATION
, RESULT_DECL
, NULL_TREE
, ptr_type_node
);
31761 DECL_ARTIFICIAL (t
) = 1;
31762 DECL_IGNORED_P (t
) = 1;
31763 DECL_RESULT (decl
) = t
;
31765 gimplify_function_tree (decl
);
31766 push_cfun (DECL_STRUCT_FUNCTION (decl
));
31767 *empty_bb
= init_lowered_empty_function (decl
, false,
31768 profile_count::uninitialized ());
31770 cgraph_node::add_new_function (decl
, true);
31771 symtab
->call_cgraph_insertion_hooks (cgraph_node::get_create (decl
));
31775 gcc_assert (ifunc_alias_decl
!= NULL
);
31776 /* Mark ifunc_alias_decl as "ifunc" with resolver as resolver_name. */
31777 DECL_ATTRIBUTES (ifunc_alias_decl
)
31778 = make_attribute ("ifunc", resolver_name
,
31779 DECL_ATTRIBUTES (ifunc_alias_decl
));
31781 /* Create the alias for dispatch to resolver here. */
31782 cgraph_node::create_same_body_alias (ifunc_alias_decl
, decl
);
31783 XDELETEVEC (resolver_name
);
31787 /* Generate the dispatching code body to dispatch multi-versioned function
31788 DECL. The target hook is called to process the "target" attributes and
31789 provide the code to dispatch the right function at run-time. NODE points
31790 to the dispatcher decl whose body will be created. */
31793 ix86_generate_version_dispatcher_body (void *node_p
)
31795 tree resolver_decl
;
31796 basic_block empty_bb
;
31797 tree default_ver_decl
;
31798 struct cgraph_node
*versn
;
31799 struct cgraph_node
*node
;
31801 struct cgraph_function_version_info
*node_version_info
= NULL
;
31802 struct cgraph_function_version_info
*versn_info
= NULL
;
31804 node
= (cgraph_node
*)node_p
;
31806 node_version_info
= node
->function_version ();
31807 gcc_assert (node
->dispatcher_function
31808 && node_version_info
!= NULL
);
31810 if (node_version_info
->dispatcher_resolver
)
31811 return node_version_info
->dispatcher_resolver
;
31813 /* The first version in the chain corresponds to the default version. */
31814 default_ver_decl
= node_version_info
->next
->this_node
->decl
;
31816 /* node is going to be an alias, so remove the finalized bit. */
31817 node
->definition
= false;
31819 resolver_decl
= make_resolver_func (default_ver_decl
,
31820 node
->decl
, &empty_bb
);
31822 node_version_info
->dispatcher_resolver
= resolver_decl
;
31824 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl
));
31826 auto_vec
<tree
, 2> fn_ver_vec
;
31828 for (versn_info
= node_version_info
->next
; versn_info
;
31829 versn_info
= versn_info
->next
)
31831 versn
= versn_info
->this_node
;
31832 /* Check for virtual functions here again, as by this time it should
31833 have been determined if this function needs a vtable index or
31834 not. This happens for methods in derived classes that override
31835 virtual methods in base classes but are not explicitly marked as
31837 if (DECL_VINDEX (versn
->decl
))
31838 sorry ("Virtual function multiversioning not supported");
31840 fn_ver_vec
.safe_push (versn
->decl
);
31843 dispatch_function_versions (resolver_decl
, &fn_ver_vec
, &empty_bb
);
31844 cgraph_edge::rebuild_edges ();
31846 return resolver_decl
;
31848 /* This builds the processor_model struct type defined in
31849 libgcc/config/i386/cpuinfo.c */
31852 build_processor_model_struct (void)
31854 const char *field_name
[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
31856 tree field
= NULL_TREE
, field_chain
= NULL_TREE
;
31858 tree type
= make_node (RECORD_TYPE
);
31860 /* The first 3 fields are unsigned int. */
31861 for (i
= 0; i
< 3; ++i
)
31863 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
31864 get_identifier (field_name
[i
]), unsigned_type_node
);
31865 if (field_chain
!= NULL_TREE
)
31866 DECL_CHAIN (field
) = field_chain
;
31867 field_chain
= field
;
31870 /* The last field is an array of unsigned integers of size one. */
31871 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
31872 get_identifier (field_name
[3]),
31873 build_array_type (unsigned_type_node
,
31874 build_index_type (size_one_node
)));
31875 if (field_chain
!= NULL_TREE
)
31876 DECL_CHAIN (field
) = field_chain
;
31877 field_chain
= field
;
31879 finish_builtin_struct (type
, "__processor_model", field_chain
, NULL_TREE
);
31883 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
31886 make_var_decl (tree type
, const char *name
)
31890 new_decl
= build_decl (UNKNOWN_LOCATION
,
31892 get_identifier(name
),
31895 DECL_EXTERNAL (new_decl
) = 1;
31896 TREE_STATIC (new_decl
) = 1;
31897 TREE_PUBLIC (new_decl
) = 1;
31898 DECL_INITIAL (new_decl
) = 0;
31899 DECL_ARTIFICIAL (new_decl
) = 0;
31900 DECL_PRESERVE_P (new_decl
) = 1;
31902 make_decl_one_only (new_decl
, DECL_ASSEMBLER_NAME (new_decl
));
31903 assemble_variable (new_decl
, 0, 0, 0);
31908 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
31909 into an integer defined in libgcc/config/i386/cpuinfo.c */
31912 fold_builtin_cpu (tree fndecl
, tree
*args
)
31915 enum ix86_builtins fn_code
= (enum ix86_builtins
)
31916 DECL_FUNCTION_CODE (fndecl
);
31917 tree param_string_cst
= NULL
;
31919 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
31920 enum processor_features
31956 /* These are the values for vendor types and cpu types and subtypes
31957 in cpuinfo.c. Cpu types and subtypes should be subtracted by
31958 the corresponding start value. */
31959 enum processor_model
31969 M_INTEL_SILVERMONT
,
31975 M_CPU_SUBTYPE_START
,
31976 M_INTEL_COREI7_NEHALEM
,
31977 M_INTEL_COREI7_WESTMERE
,
31978 M_INTEL_COREI7_SANDYBRIDGE
,
31979 M_AMDFAM10H_BARCELONA
,
31980 M_AMDFAM10H_SHANGHAI
,
31981 M_AMDFAM10H_ISTANBUL
,
31982 M_AMDFAM15H_BDVER1
,
31983 M_AMDFAM15H_BDVER2
,
31984 M_AMDFAM15H_BDVER3
,
31985 M_AMDFAM15H_BDVER4
,
31986 M_AMDFAM17H_ZNVER1
,
31987 M_INTEL_COREI7_IVYBRIDGE
,
31988 M_INTEL_COREI7_HASWELL
,
31989 M_INTEL_COREI7_BROADWELL
,
31990 M_INTEL_COREI7_SKYLAKE
,
31991 M_INTEL_COREI7_SKYLAKE_AVX512
,
31992 M_INTEL_COREI7_CANNONLAKE
31995 static struct _arch_names_table
31997 const char *const name
;
31998 const enum processor_model model
;
32000 const arch_names_table
[] =
32003 {"intel", M_INTEL
},
32004 {"atom", M_INTEL_BONNELL
},
32005 {"slm", M_INTEL_SILVERMONT
},
32006 {"core2", M_INTEL_CORE2
},
32007 {"corei7", M_INTEL_COREI7
},
32008 {"nehalem", M_INTEL_COREI7_NEHALEM
},
32009 {"westmere", M_INTEL_COREI7_WESTMERE
},
32010 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE
},
32011 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE
},
32012 {"haswell", M_INTEL_COREI7_HASWELL
},
32013 {"broadwell", M_INTEL_COREI7_BROADWELL
},
32014 {"skylake", M_INTEL_COREI7_SKYLAKE
},
32015 {"skylake-avx512", M_INTEL_COREI7_SKYLAKE_AVX512
},
32016 {"cannonlake", M_INTEL_COREI7_CANNONLAKE
},
32017 {"bonnell", M_INTEL_BONNELL
},
32018 {"silvermont", M_INTEL_SILVERMONT
},
32019 {"knl", M_INTEL_KNL
},
32020 {"knm", M_INTEL_KNM
},
32021 {"amdfam10h", M_AMDFAM10H
},
32022 {"barcelona", M_AMDFAM10H_BARCELONA
},
32023 {"shanghai", M_AMDFAM10H_SHANGHAI
},
32024 {"istanbul", M_AMDFAM10H_ISTANBUL
},
32025 {"btver1", M_AMD_BTVER1
},
32026 {"amdfam15h", M_AMDFAM15H
},
32027 {"bdver1", M_AMDFAM15H_BDVER1
},
32028 {"bdver2", M_AMDFAM15H_BDVER2
},
32029 {"bdver3", M_AMDFAM15H_BDVER3
},
32030 {"bdver4", M_AMDFAM15H_BDVER4
},
32031 {"btver2", M_AMD_BTVER2
},
32032 {"amdfam17h", M_AMDFAM17H
},
32033 {"znver1", M_AMDFAM17H_ZNVER1
},
32036 static struct _isa_names_table
32038 const char *const name
;
32039 const enum processor_features feature
;
32041 const isa_names_table
[] =
32045 {"popcnt", F_POPCNT
},
32049 {"ssse3", F_SSSE3
},
32050 {"sse4a", F_SSE4_A
},
32051 {"sse4.1", F_SSE4_1
},
32052 {"sse4.2", F_SSE4_2
},
32058 {"avx512f", F_AVX512F
},
32062 {"pclmul", F_PCLMUL
},
32063 {"avx512vl",F_AVX512VL
},
32064 {"avx512bw",F_AVX512BW
},
32065 {"avx512dq",F_AVX512DQ
},
32066 {"avx512cd",F_AVX512CD
},
32067 {"avx512er",F_AVX512ER
},
32068 {"avx512pf",F_AVX512PF
},
32069 {"avx512vbmi",F_AVX512VBMI
},
32070 {"avx512ifma",F_AVX512IFMA
},
32071 {"avx5124vnniw",F_AVX5124VNNIW
},
32072 {"avx5124fmaps",F_AVX5124FMAPS
},
32073 {"avx512vpopcntdq",F_AVX512VPOPCNTDQ
}
32076 tree __processor_model_type
= build_processor_model_struct ();
32077 tree __cpu_model_var
= make_var_decl (__processor_model_type
,
32081 varpool_node::add (__cpu_model_var
);
32083 gcc_assert ((args
!= NULL
) && (*args
!= NULL
));
32085 param_string_cst
= *args
;
32086 while (param_string_cst
32087 && TREE_CODE (param_string_cst
) != STRING_CST
)
32089 /* *args must be a expr that can contain other EXPRS leading to a
32091 if (!EXPR_P (param_string_cst
))
32093 error ("Parameter to builtin must be a string constant or literal");
32094 return integer_zero_node
;
32096 param_string_cst
= TREE_OPERAND (EXPR_CHECK (param_string_cst
), 0);
32099 gcc_assert (param_string_cst
);
32101 if (fn_code
== IX86_BUILTIN_CPU_IS
)
32107 unsigned int field_val
= 0;
32108 unsigned int NUM_ARCH_NAMES
32109 = sizeof (arch_names_table
) / sizeof (struct _arch_names_table
);
32111 for (i
= 0; i
< NUM_ARCH_NAMES
; i
++)
32112 if (strcmp (arch_names_table
[i
].name
,
32113 TREE_STRING_POINTER (param_string_cst
)) == 0)
32116 if (i
== NUM_ARCH_NAMES
)
32118 error ("Parameter to builtin not valid: %s",
32119 TREE_STRING_POINTER (param_string_cst
));
32120 return integer_zero_node
;
32123 field
= TYPE_FIELDS (__processor_model_type
);
32124 field_val
= arch_names_table
[i
].model
;
32126 /* CPU types are stored in the next field. */
32127 if (field_val
> M_CPU_TYPE_START
32128 && field_val
< M_CPU_SUBTYPE_START
)
32130 field
= DECL_CHAIN (field
);
32131 field_val
-= M_CPU_TYPE_START
;
32134 /* CPU subtypes are stored in the next field. */
32135 if (field_val
> M_CPU_SUBTYPE_START
)
32137 field
= DECL_CHAIN ( DECL_CHAIN (field
));
32138 field_val
-= M_CPU_SUBTYPE_START
;
32141 /* Get the appropriate field in __cpu_model. */
32142 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
32145 /* Check the value. */
32146 final
= build2 (EQ_EXPR
, unsigned_type_node
, ref
,
32147 build_int_cstu (unsigned_type_node
, field_val
));
32148 return build1 (CONVERT_EXPR
, integer_type_node
, final
);
32150 else if (fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
32157 unsigned int field_val
= 0;
32158 unsigned int NUM_ISA_NAMES
32159 = sizeof (isa_names_table
) / sizeof (struct _isa_names_table
);
32161 for (i
= 0; i
< NUM_ISA_NAMES
; i
++)
32162 if (strcmp (isa_names_table
[i
].name
,
32163 TREE_STRING_POINTER (param_string_cst
)) == 0)
32166 if (i
== NUM_ISA_NAMES
)
32168 error ("Parameter to builtin not valid: %s",
32169 TREE_STRING_POINTER (param_string_cst
));
32170 return integer_zero_node
;
32173 field
= TYPE_FIELDS (__processor_model_type
);
32174 /* Get the last field, which is __cpu_features. */
32175 while (DECL_CHAIN (field
))
32176 field
= DECL_CHAIN (field
);
32178 /* Get the appropriate field: __cpu_model.__cpu_features */
32179 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
32182 /* Access the 0th element of __cpu_features array. */
32183 array_elt
= build4 (ARRAY_REF
, unsigned_type_node
, ref
,
32184 integer_zero_node
, NULL_TREE
, NULL_TREE
);
32186 field_val
= (1 << isa_names_table
[i
].feature
);
32187 /* Return __cpu_model.__cpu_features[0] & field_val */
32188 final
= build2 (BIT_AND_EXPR
, unsigned_type_node
, array_elt
,
32189 build_int_cstu (unsigned_type_node
, field_val
));
32190 return build1 (CONVERT_EXPR
, integer_type_node
, final
);
32192 gcc_unreachable ();
32196 ix86_fold_builtin (tree fndecl
, int n_args
,
32197 tree
*args
, bool ignore ATTRIBUTE_UNUSED
)
32199 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
32201 enum ix86_builtins fn_code
= (enum ix86_builtins
)
32202 DECL_FUNCTION_CODE (fndecl
);
32205 case IX86_BUILTIN_CPU_IS
:
32206 case IX86_BUILTIN_CPU_SUPPORTS
:
32207 gcc_assert (n_args
== 1);
32208 return fold_builtin_cpu (fndecl
, args
);
32210 case IX86_BUILTIN_NANQ
:
32211 case IX86_BUILTIN_NANSQ
:
32213 tree type
= TREE_TYPE (TREE_TYPE (fndecl
));
32214 const char *str
= c_getstr (*args
);
32215 int quiet
= fn_code
== IX86_BUILTIN_NANQ
;
32216 REAL_VALUE_TYPE real
;
32218 if (str
&& real_nan (&real
, str
, quiet
, TYPE_MODE (type
)))
32219 return build_real (type
, real
);
32223 case IX86_BUILTIN_INFQ
:
32224 case IX86_BUILTIN_HUGE_VALQ
:
32226 tree type
= TREE_TYPE (TREE_TYPE (fndecl
));
32227 REAL_VALUE_TYPE inf
;
32229 return build_real (type
, inf
);
32232 case IX86_BUILTIN_TZCNT16
:
32233 case IX86_BUILTIN_CTZS
:
32234 case IX86_BUILTIN_TZCNT32
:
32235 case IX86_BUILTIN_TZCNT64
:
32236 gcc_assert (n_args
== 1);
32237 if (TREE_CODE (args
[0]) == INTEGER_CST
)
32239 tree type
= TREE_TYPE (TREE_TYPE (fndecl
));
32240 tree arg
= args
[0];
32241 if (fn_code
== IX86_BUILTIN_TZCNT16
32242 || fn_code
== IX86_BUILTIN_CTZS
)
32243 arg
= fold_convert (short_unsigned_type_node
, arg
);
32244 if (integer_zerop (arg
))
32245 return build_int_cst (type
, TYPE_PRECISION (TREE_TYPE (arg
)));
32247 return fold_const_call (CFN_CTZ
, type
, arg
);
32251 case IX86_BUILTIN_LZCNT16
:
32252 case IX86_BUILTIN_CLZS
:
32253 case IX86_BUILTIN_LZCNT32
:
32254 case IX86_BUILTIN_LZCNT64
:
32255 gcc_assert (n_args
== 1);
32256 if (TREE_CODE (args
[0]) == INTEGER_CST
)
32258 tree type
= TREE_TYPE (TREE_TYPE (fndecl
));
32259 tree arg
= args
[0];
32260 if (fn_code
== IX86_BUILTIN_LZCNT16
32261 || fn_code
== IX86_BUILTIN_CLZS
)
32262 arg
= fold_convert (short_unsigned_type_node
, arg
);
32263 if (integer_zerop (arg
))
32264 return build_int_cst (type
, TYPE_PRECISION (TREE_TYPE (arg
)));
32266 return fold_const_call (CFN_CLZ
, type
, arg
);
32270 case IX86_BUILTIN_BEXTR32
:
32271 case IX86_BUILTIN_BEXTR64
:
32272 case IX86_BUILTIN_BEXTRI32
:
32273 case IX86_BUILTIN_BEXTRI64
:
32274 gcc_assert (n_args
== 2);
32275 if (tree_fits_uhwi_p (args
[1]))
32277 unsigned HOST_WIDE_INT res
= 0;
32278 unsigned int prec
= TYPE_PRECISION (TREE_TYPE (args
[0]));
32279 unsigned int start
= tree_to_uhwi (args
[1]);
32280 unsigned int len
= (start
& 0xff00) >> 8;
32282 if (start
>= prec
|| len
== 0)
32284 else if (!tree_fits_uhwi_p (args
[0]))
32287 res
= tree_to_uhwi (args
[0]) >> start
;
32290 if (len
< HOST_BITS_PER_WIDE_INT
)
32291 res
&= (HOST_WIDE_INT_1U
<< len
) - 1;
32292 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl
)), res
);
32296 case IX86_BUILTIN_BZHI32
:
32297 case IX86_BUILTIN_BZHI64
:
32298 gcc_assert (n_args
== 2);
32299 if (tree_fits_uhwi_p (args
[1]))
32301 unsigned int idx
= tree_to_uhwi (args
[1]) & 0xff;
32302 if (idx
>= TYPE_PRECISION (TREE_TYPE (args
[0])))
32304 if (!tree_fits_uhwi_p (args
[0]))
32306 unsigned HOST_WIDE_INT res
= tree_to_uhwi (args
[0]);
32307 res
&= ~(HOST_WIDE_INT_M1U
<< idx
);
32308 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl
)), res
);
32312 case IX86_BUILTIN_PDEP32
:
32313 case IX86_BUILTIN_PDEP64
:
32314 gcc_assert (n_args
== 2);
32315 if (tree_fits_uhwi_p (args
[0]) && tree_fits_uhwi_p (args
[1]))
32317 unsigned HOST_WIDE_INT src
= tree_to_uhwi (args
[0]);
32318 unsigned HOST_WIDE_INT mask
= tree_to_uhwi (args
[1]);
32319 unsigned HOST_WIDE_INT res
= 0;
32320 unsigned HOST_WIDE_INT m
, k
= 1;
32321 for (m
= 1; m
; m
<<= 1)
32322 if ((mask
& m
) != 0)
32324 if ((src
& k
) != 0)
32328 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl
)), res
);
32332 case IX86_BUILTIN_PEXT32
:
32333 case IX86_BUILTIN_PEXT64
:
32334 gcc_assert (n_args
== 2);
32335 if (tree_fits_uhwi_p (args
[0]) && tree_fits_uhwi_p (args
[1]))
32337 unsigned HOST_WIDE_INT src
= tree_to_uhwi (args
[0]);
32338 unsigned HOST_WIDE_INT mask
= tree_to_uhwi (args
[1]);
32339 unsigned HOST_WIDE_INT res
= 0;
32340 unsigned HOST_WIDE_INT m
, k
= 1;
32341 for (m
= 1; m
; m
<<= 1)
32342 if ((mask
& m
) != 0)
32344 if ((src
& m
) != 0)
32348 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl
)), res
);
32357 #ifdef SUBTARGET_FOLD_BUILTIN
32358 return SUBTARGET_FOLD_BUILTIN (fndecl
, n_args
, args
, ignore
);
32364 /* Fold a MD builtin (use ix86_fold_builtin for folding into
32365 constant) in GIMPLE. */
32368 ix86_gimple_fold_builtin (gimple_stmt_iterator
*gsi
)
32370 gimple
*stmt
= gsi_stmt (*gsi
);
32371 tree fndecl
= gimple_call_fndecl (stmt
);
32372 gcc_checking_assert (fndecl
&& DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
);
32373 int n_args
= gimple_call_num_args (stmt
);
32374 enum ix86_builtins fn_code
= (enum ix86_builtins
) DECL_FUNCTION_CODE (fndecl
);
32375 tree decl
= NULL_TREE
;
32380 case IX86_BUILTIN_TZCNT32
:
32381 decl
= builtin_decl_implicit (BUILT_IN_CTZ
);
32382 goto fold_tzcnt_lzcnt
;
32384 case IX86_BUILTIN_TZCNT64
:
32385 decl
= builtin_decl_implicit (BUILT_IN_CTZLL
);
32386 goto fold_tzcnt_lzcnt
;
32388 case IX86_BUILTIN_LZCNT32
:
32389 decl
= builtin_decl_implicit (BUILT_IN_CLZ
);
32390 goto fold_tzcnt_lzcnt
;
32392 case IX86_BUILTIN_LZCNT64
:
32393 decl
= builtin_decl_implicit (BUILT_IN_CLZLL
);
32394 goto fold_tzcnt_lzcnt
;
32397 gcc_assert (n_args
== 1);
32398 arg0
= gimple_call_arg (stmt
, 0);
32399 if (TREE_CODE (arg0
) == SSA_NAME
&& decl
&& gimple_call_lhs (stmt
))
32401 int prec
= TYPE_PRECISION (TREE_TYPE (arg0
));
32402 /* If arg0 is provably non-zero, optimize into generic
32403 __builtin_c[tl]z{,ll} function the middle-end handles
32405 if (!expr_not_equal_to (arg0
, wi::zero (prec
)))
32408 location_t loc
= gimple_location (stmt
);
32409 gimple
*g
= gimple_build_call (decl
, 1, arg0
);
32410 gimple_set_location (g
, loc
);
32411 tree lhs
= make_ssa_name (integer_type_node
);
32412 gimple_call_set_lhs (g
, lhs
);
32413 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
32414 g
= gimple_build_assign (gimple_call_lhs (stmt
), NOP_EXPR
, lhs
);
32415 gimple_set_location (g
, loc
);
32416 gsi_replace (gsi
, g
, false);
32421 case IX86_BUILTIN_BZHI32
:
32422 case IX86_BUILTIN_BZHI64
:
32423 gcc_assert (n_args
== 2);
32424 arg1
= gimple_call_arg (stmt
, 1);
32425 if (tree_fits_uhwi_p (arg1
) && gimple_call_lhs (stmt
))
32427 unsigned int idx
= tree_to_uhwi (arg1
) & 0xff;
32428 arg0
= gimple_call_arg (stmt
, 0);
32429 if (idx
< TYPE_PRECISION (TREE_TYPE (arg0
)))
32431 location_t loc
= gimple_location (stmt
);
32432 gimple
*g
= gimple_build_assign (gimple_call_lhs (stmt
), arg0
);
32433 gimple_set_location (g
, loc
);
32434 gsi_replace (gsi
, g
, false);
32439 case IX86_BUILTIN_PDEP32
:
32440 case IX86_BUILTIN_PDEP64
:
32441 case IX86_BUILTIN_PEXT32
:
32442 case IX86_BUILTIN_PEXT64
:
32443 gcc_assert (n_args
== 2);
32444 arg1
= gimple_call_arg (stmt
, 1);
32445 if (integer_all_onesp (arg1
) && gimple_call_lhs (stmt
))
32447 location_t loc
= gimple_location (stmt
);
32448 arg0
= gimple_call_arg (stmt
, 0);
32449 gimple
*g
= gimple_build_assign (gimple_call_lhs (stmt
), arg0
);
32450 gimple_set_location (g
, loc
);
32451 gsi_replace (gsi
, g
, false);
32463 /* Make builtins to detect cpu type and features supported. NAME is
32464 the builtin name, CODE is the builtin code, and FTYPE is the function
32465 type of the builtin. */
32468 make_cpu_type_builtin (const char* name
, int code
,
32469 enum ix86_builtin_func_type ftype
, bool is_const
)
32474 type
= ix86_get_builtin_func_type (ftype
);
32475 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
32477 gcc_assert (decl
!= NULL_TREE
);
32478 ix86_builtins
[(int) code
] = decl
;
32479 TREE_READONLY (decl
) = is_const
;
32482 /* Make builtins to get CPU type and features supported. The created
32485 __builtin_cpu_init (), to detect cpu type and features,
32486 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
32487 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
32491 ix86_init_platform_type_builtins (void)
32493 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT
,
32494 INT_FTYPE_VOID
, false);
32495 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS
,
32496 INT_FTYPE_PCCHAR
, true);
32497 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS
,
32498 INT_FTYPE_PCCHAR
, true);
32501 /* Internal method for ix86_init_builtins. */
32504 ix86_init_builtins_va_builtins_abi (void)
32506 tree ms_va_ref
, sysv_va_ref
;
32507 tree fnvoid_va_end_ms
, fnvoid_va_end_sysv
;
32508 tree fnvoid_va_start_ms
, fnvoid_va_start_sysv
;
32509 tree fnvoid_va_copy_ms
, fnvoid_va_copy_sysv
;
32510 tree fnattr_ms
= NULL_TREE
, fnattr_sysv
= NULL_TREE
;
32514 fnattr_ms
= build_tree_list (get_identifier ("ms_abi"), NULL_TREE
);
32515 fnattr_sysv
= build_tree_list (get_identifier ("sysv_abi"), NULL_TREE
);
32516 ms_va_ref
= build_reference_type (ms_va_list_type_node
);
32518 build_pointer_type (TREE_TYPE (sysv_va_list_type_node
));
32521 build_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
32522 fnvoid_va_start_ms
=
32523 build_varargs_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
32524 fnvoid_va_end_sysv
=
32525 build_function_type_list (void_type_node
, sysv_va_ref
, NULL_TREE
);
32526 fnvoid_va_start_sysv
=
32527 build_varargs_function_type_list (void_type_node
, sysv_va_ref
,
32529 fnvoid_va_copy_ms
=
32530 build_function_type_list (void_type_node
, ms_va_ref
, ms_va_list_type_node
,
32532 fnvoid_va_copy_sysv
=
32533 build_function_type_list (void_type_node
, sysv_va_ref
,
32534 sysv_va_ref
, NULL_TREE
);
32536 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms
,
32537 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
32538 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms
,
32539 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
32540 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms
,
32541 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
32542 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv
,
32543 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
32544 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv
,
32545 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
32546 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv
,
32547 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
32551 ix86_init_builtin_types (void)
32553 tree float80_type_node
, const_string_type_node
;
32555 /* The __float80 type. */
32556 float80_type_node
= long_double_type_node
;
32557 if (TYPE_MODE (float80_type_node
) != XFmode
)
32559 if (float64x_type_node
!= NULL_TREE
32560 && TYPE_MODE (float64x_type_node
) == XFmode
)
32561 float80_type_node
= float64x_type_node
;
32564 /* The __float80 type. */
32565 float80_type_node
= make_node (REAL_TYPE
);
32567 TYPE_PRECISION (float80_type_node
) = 80;
32568 layout_type (float80_type_node
);
32571 lang_hooks
.types
.register_builtin_type (float80_type_node
, "__float80");
32573 /* The __float128 type. The node has already been created as
32574 _Float128, so we only need to register the __float128 name for
32576 lang_hooks
.types
.register_builtin_type (float128_type_node
, "__float128");
32578 const_string_type_node
32579 = build_pointer_type (build_qualified_type
32580 (char_type_node
, TYPE_QUAL_CONST
));
32582 /* This macro is built by i386-builtin-types.awk. */
32583 DEFINE_BUILTIN_PRIMITIVE_TYPES
;
32587 ix86_init_builtins (void)
32591 ix86_init_builtin_types ();
32593 /* Builtins to get CPU type and features. */
32594 ix86_init_platform_type_builtins ();
32596 /* TFmode support builtins. */
32597 def_builtin_const (0, "__builtin_infq",
32598 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_INFQ
);
32599 def_builtin_const (0, "__builtin_huge_valq",
32600 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_HUGE_VALQ
);
32602 ftype
= ix86_get_builtin_func_type (FLOAT128_FTYPE_CONST_STRING
);
32603 decl
= add_builtin_function ("__builtin_nanq", ftype
, IX86_BUILTIN_NANQ
,
32604 BUILT_IN_MD
, "nanq", NULL_TREE
);
32605 TREE_READONLY (decl
) = 1;
32606 ix86_builtins
[(int) IX86_BUILTIN_NANQ
] = decl
;
32608 decl
= add_builtin_function ("__builtin_nansq", ftype
, IX86_BUILTIN_NANSQ
,
32609 BUILT_IN_MD
, "nansq", NULL_TREE
);
32610 TREE_READONLY (decl
) = 1;
32611 ix86_builtins
[(int) IX86_BUILTIN_NANSQ
] = decl
;
32613 /* We will expand them to normal call if SSE isn't available since
32614 they are used by libgcc. */
32615 ftype
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128
);
32616 decl
= add_builtin_function ("__builtin_fabsq", ftype
, IX86_BUILTIN_FABSQ
,
32617 BUILT_IN_MD
, "__fabstf2", NULL_TREE
);
32618 TREE_READONLY (decl
) = 1;
32619 ix86_builtins
[(int) IX86_BUILTIN_FABSQ
] = decl
;
32621 ftype
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128
);
32622 decl
= add_builtin_function ("__builtin_copysignq", ftype
,
32623 IX86_BUILTIN_COPYSIGNQ
, BUILT_IN_MD
,
32624 "__copysigntf3", NULL_TREE
);
32625 TREE_READONLY (decl
) = 1;
32626 ix86_builtins
[(int) IX86_BUILTIN_COPYSIGNQ
] = decl
;
32628 ix86_init_tm_builtins ();
32629 ix86_init_mmx_sse_builtins ();
32630 ix86_init_mpx_builtins ();
32633 ix86_init_builtins_va_builtins_abi ();
32635 #ifdef SUBTARGET_INIT_BUILTINS
32636 SUBTARGET_INIT_BUILTINS
;
32640 /* Return the ix86 builtin for CODE. */
32643 ix86_builtin_decl (unsigned code
, bool)
32645 if (code
>= IX86_BUILTIN_MAX
)
32646 return error_mark_node
;
32648 return ix86_builtins
[code
];
32651 /* Errors in the source file can cause expand_expr to return const0_rtx
32652 where we expect a vector. To avoid crashing, use one of the vector
32653 clear instructions. */
32655 safe_vector_operand (rtx x
, machine_mode mode
)
32657 if (x
== const0_rtx
)
32658 x
= CONST0_RTX (mode
);
32662 /* Fixup modeless constants to fit required mode. */
32664 fixup_modeless_constant (rtx x
, machine_mode mode
)
32666 if (GET_MODE (x
) == VOIDmode
)
32667 x
= convert_to_mode (mode
, x
, 1);
32671 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
32674 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
32677 tree arg0
= CALL_EXPR_ARG (exp
, 0);
32678 tree arg1
= CALL_EXPR_ARG (exp
, 1);
32679 rtx op0
= expand_normal (arg0
);
32680 rtx op1
= expand_normal (arg1
);
32681 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
32682 machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
32683 machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
32685 if (VECTOR_MODE_P (mode0
))
32686 op0
= safe_vector_operand (op0
, mode0
);
32687 if (VECTOR_MODE_P (mode1
))
32688 op1
= safe_vector_operand (op1
, mode1
);
32690 if (optimize
|| !target
32691 || GET_MODE (target
) != tmode
32692 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
32693 target
= gen_reg_rtx (tmode
);
32695 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
32697 rtx x
= gen_reg_rtx (V4SImode
);
32698 emit_insn (gen_sse2_loadd (x
, op1
));
32699 op1
= gen_lowpart (TImode
, x
);
32702 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
32703 op0
= copy_to_mode_reg (mode0
, op0
);
32704 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode1
))
32705 op1
= copy_to_mode_reg (mode1
, op1
);
32707 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
32716 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
32719 ix86_expand_multi_arg_builtin (enum insn_code icode
, tree exp
, rtx target
,
32720 enum ix86_builtin_func_type m_type
,
32721 enum rtx_code sub_code
)
32726 bool comparison_p
= false;
32728 bool last_arg_constant
= false;
32729 int num_memory
= 0;
32735 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
32739 case MULTI_ARG_4_DF2_DI_I
:
32740 case MULTI_ARG_4_DF2_DI_I1
:
32741 case MULTI_ARG_4_SF2_SI_I
:
32742 case MULTI_ARG_4_SF2_SI_I1
:
32744 last_arg_constant
= true;
32747 case MULTI_ARG_3_SF
:
32748 case MULTI_ARG_3_DF
:
32749 case MULTI_ARG_3_SF2
:
32750 case MULTI_ARG_3_DF2
:
32751 case MULTI_ARG_3_DI
:
32752 case MULTI_ARG_3_SI
:
32753 case MULTI_ARG_3_SI_DI
:
32754 case MULTI_ARG_3_HI
:
32755 case MULTI_ARG_3_HI_SI
:
32756 case MULTI_ARG_3_QI
:
32757 case MULTI_ARG_3_DI2
:
32758 case MULTI_ARG_3_SI2
:
32759 case MULTI_ARG_3_HI2
:
32760 case MULTI_ARG_3_QI2
:
32764 case MULTI_ARG_2_SF
:
32765 case MULTI_ARG_2_DF
:
32766 case MULTI_ARG_2_DI
:
32767 case MULTI_ARG_2_SI
:
32768 case MULTI_ARG_2_HI
:
32769 case MULTI_ARG_2_QI
:
32773 case MULTI_ARG_2_DI_IMM
:
32774 case MULTI_ARG_2_SI_IMM
:
32775 case MULTI_ARG_2_HI_IMM
:
32776 case MULTI_ARG_2_QI_IMM
:
32778 last_arg_constant
= true;
32781 case MULTI_ARG_1_SF
:
32782 case MULTI_ARG_1_DF
:
32783 case MULTI_ARG_1_SF2
:
32784 case MULTI_ARG_1_DF2
:
32785 case MULTI_ARG_1_DI
:
32786 case MULTI_ARG_1_SI
:
32787 case MULTI_ARG_1_HI
:
32788 case MULTI_ARG_1_QI
:
32789 case MULTI_ARG_1_SI_DI
:
32790 case MULTI_ARG_1_HI_DI
:
32791 case MULTI_ARG_1_HI_SI
:
32792 case MULTI_ARG_1_QI_DI
:
32793 case MULTI_ARG_1_QI_SI
:
32794 case MULTI_ARG_1_QI_HI
:
32798 case MULTI_ARG_2_DI_CMP
:
32799 case MULTI_ARG_2_SI_CMP
:
32800 case MULTI_ARG_2_HI_CMP
:
32801 case MULTI_ARG_2_QI_CMP
:
32803 comparison_p
= true;
32806 case MULTI_ARG_2_SF_TF
:
32807 case MULTI_ARG_2_DF_TF
:
32808 case MULTI_ARG_2_DI_TF
:
32809 case MULTI_ARG_2_SI_TF
:
32810 case MULTI_ARG_2_HI_TF
:
32811 case MULTI_ARG_2_QI_TF
:
32817 gcc_unreachable ();
32820 if (optimize
|| !target
32821 || GET_MODE (target
) != tmode
32822 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
32823 target
= gen_reg_rtx (tmode
);
32824 else if (memory_operand (target
, tmode
))
32827 gcc_assert (nargs
<= 4);
32829 for (i
= 0; i
< nargs
; i
++)
32831 tree arg
= CALL_EXPR_ARG (exp
, i
);
32832 rtx op
= expand_normal (arg
);
32833 int adjust
= (comparison_p
) ? 1 : 0;
32834 machine_mode mode
= insn_data
[icode
].operand
[i
+adjust
+1].mode
;
32836 if (last_arg_constant
&& i
== nargs
- 1)
32838 if (!insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
))
32840 enum insn_code new_icode
= icode
;
32843 case CODE_FOR_xop_vpermil2v2df3
:
32844 case CODE_FOR_xop_vpermil2v4sf3
:
32845 case CODE_FOR_xop_vpermil2v4df3
:
32846 case CODE_FOR_xop_vpermil2v8sf3
:
32847 error ("the last argument must be a 2-bit immediate");
32848 return gen_reg_rtx (tmode
);
32849 case CODE_FOR_xop_rotlv2di3
:
32850 new_icode
= CODE_FOR_rotlv2di3
;
32852 case CODE_FOR_xop_rotlv4si3
:
32853 new_icode
= CODE_FOR_rotlv4si3
;
32855 case CODE_FOR_xop_rotlv8hi3
:
32856 new_icode
= CODE_FOR_rotlv8hi3
;
32858 case CODE_FOR_xop_rotlv16qi3
:
32859 new_icode
= CODE_FOR_rotlv16qi3
;
32861 if (CONST_INT_P (op
))
32863 int mask
= GET_MODE_UNIT_BITSIZE (tmode
) - 1;
32864 op
= GEN_INT (INTVAL (op
) & mask
);
32865 gcc_checking_assert
32866 (insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
));
32870 gcc_checking_assert
32872 && insn_data
[new_icode
].operand
[0].mode
== tmode
32873 && insn_data
[new_icode
].operand
[1].mode
== tmode
32874 && insn_data
[new_icode
].operand
[2].mode
== mode
32875 && insn_data
[new_icode
].operand
[0].predicate
32876 == insn_data
[icode
].operand
[0].predicate
32877 && insn_data
[new_icode
].operand
[1].predicate
32878 == insn_data
[icode
].operand
[1].predicate
);
32884 gcc_unreachable ();
32891 if (VECTOR_MODE_P (mode
))
32892 op
= safe_vector_operand (op
, mode
);
32894 /* If we aren't optimizing, only allow one memory operand to be
32896 if (memory_operand (op
, mode
))
32899 gcc_assert (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
);
32902 || !insn_data
[icode
].operand
[i
+adjust
+1].predicate (op
, mode
)
32904 op
= force_reg (mode
, op
);
32908 args
[i
].mode
= mode
;
32914 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
32919 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
32920 GEN_INT ((int)sub_code
));
32921 else if (! comparison_p
)
32922 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
32925 rtx cmp_op
= gen_rtx_fmt_ee (sub_code
, GET_MODE (target
),
32929 pat
= GEN_FCN (icode
) (target
, cmp_op
, args
[0].op
, args
[1].op
);
32934 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
32938 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
, args
[3].op
);
32942 gcc_unreachable ();
32952 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
32953 insns with vec_merge. */
32956 ix86_expand_unop_vec_merge_builtin (enum insn_code icode
, tree exp
,
32960 tree arg0
= CALL_EXPR_ARG (exp
, 0);
32961 rtx op1
, op0
= expand_normal (arg0
);
32962 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
32963 machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
32965 if (optimize
|| !target
32966 || GET_MODE (target
) != tmode
32967 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
32968 target
= gen_reg_rtx (tmode
);
32970 if (VECTOR_MODE_P (mode0
))
32971 op0
= safe_vector_operand (op0
, mode0
);
32973 if ((optimize
&& !register_operand (op0
, mode0
))
32974 || !insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
32975 op0
= copy_to_mode_reg (mode0
, op0
);
32978 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode0
))
32979 op1
= copy_to_mode_reg (mode0
, op1
);
32981 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
32988 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
32991 ix86_expand_sse_compare (const struct builtin_description
*d
,
32992 tree exp
, rtx target
, bool swap
)
32995 tree arg0
= CALL_EXPR_ARG (exp
, 0);
32996 tree arg1
= CALL_EXPR_ARG (exp
, 1);
32997 rtx op0
= expand_normal (arg0
);
32998 rtx op1
= expand_normal (arg1
);
33000 machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
33001 machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
33002 machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
33003 enum rtx_code comparison
= d
->comparison
;
33005 if (VECTOR_MODE_P (mode0
))
33006 op0
= safe_vector_operand (op0
, mode0
);
33007 if (VECTOR_MODE_P (mode1
))
33008 op1
= safe_vector_operand (op1
, mode1
);
33010 /* Swap operands if we have a comparison that isn't available in
33013 std::swap (op0
, op1
);
33015 if (optimize
|| !target
33016 || GET_MODE (target
) != tmode
33017 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
33018 target
= gen_reg_rtx (tmode
);
33020 if ((optimize
&& !register_operand (op0
, mode0
))
33021 || !insn_data
[d
->icode
].operand
[1].predicate (op0
, mode0
))
33022 op0
= copy_to_mode_reg (mode0
, op0
);
33023 if ((optimize
&& !register_operand (op1
, mode1
))
33024 || !insn_data
[d
->icode
].operand
[2].predicate (op1
, mode1
))
33025 op1
= copy_to_mode_reg (mode1
, op1
);
33027 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
33028 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
33035 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
33038 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
33042 tree arg0
= CALL_EXPR_ARG (exp
, 0);
33043 tree arg1
= CALL_EXPR_ARG (exp
, 1);
33044 rtx op0
= expand_normal (arg0
);
33045 rtx op1
= expand_normal (arg1
);
33046 machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
33047 machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
33048 enum rtx_code comparison
= d
->comparison
;
33050 if (VECTOR_MODE_P (mode0
))
33051 op0
= safe_vector_operand (op0
, mode0
);
33052 if (VECTOR_MODE_P (mode1
))
33053 op1
= safe_vector_operand (op1
, mode1
);
33055 /* Swap operands if we have a comparison that isn't available in
33057 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
33058 std::swap (op0
, op1
);
33060 target
= gen_reg_rtx (SImode
);
33061 emit_move_insn (target
, const0_rtx
);
33062 target
= gen_rtx_SUBREG (QImode
, target
, 0);
33064 if ((optimize
&& !register_operand (op0
, mode0
))
33065 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
33066 op0
= copy_to_mode_reg (mode0
, op0
);
33067 if ((optimize
&& !register_operand (op1
, mode1
))
33068 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
33069 op1
= copy_to_mode_reg (mode1
, op1
);
33071 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
33075 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
33076 gen_rtx_fmt_ee (comparison
, QImode
,
33080 return SUBREG_REG (target
);
33083 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
33086 ix86_expand_sse_round (const struct builtin_description
*d
, tree exp
,
33090 tree arg0
= CALL_EXPR_ARG (exp
, 0);
33091 rtx op1
, op0
= expand_normal (arg0
);
33092 machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
33093 machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
33095 if (optimize
|| target
== 0
33096 || GET_MODE (target
) != tmode
33097 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
33098 target
= gen_reg_rtx (tmode
);
33100 if (VECTOR_MODE_P (mode0
))
33101 op0
= safe_vector_operand (op0
, mode0
);
33103 if ((optimize
&& !register_operand (op0
, mode0
))
33104 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
33105 op0
= copy_to_mode_reg (mode0
, op0
);
33107 op1
= GEN_INT (d
->comparison
);
33109 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
);
33117 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description
*d
,
33118 tree exp
, rtx target
)
33121 tree arg0
= CALL_EXPR_ARG (exp
, 0);
33122 tree arg1
= CALL_EXPR_ARG (exp
, 1);
33123 rtx op0
= expand_normal (arg0
);
33124 rtx op1
= expand_normal (arg1
);
33126 machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
33127 machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
33128 machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
33130 if (optimize
|| target
== 0
33131 || GET_MODE (target
) != tmode
33132 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
33133 target
= gen_reg_rtx (tmode
);
33135 op0
= safe_vector_operand (op0
, mode0
);
33136 op1
= safe_vector_operand (op1
, mode1
);
33138 if ((optimize
&& !register_operand (op0
, mode0
))
33139 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
33140 op0
= copy_to_mode_reg (mode0
, op0
);
33141 if ((optimize
&& !register_operand (op1
, mode1
))
33142 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
33143 op1
= copy_to_mode_reg (mode1
, op1
);
33145 op2
= GEN_INT (d
->comparison
);
33147 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
33154 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
33157 ix86_expand_sse_ptest (const struct builtin_description
*d
, tree exp
,
33161 tree arg0
= CALL_EXPR_ARG (exp
, 0);
33162 tree arg1
= CALL_EXPR_ARG (exp
, 1);
33163 rtx op0
= expand_normal (arg0
);
33164 rtx op1
= expand_normal (arg1
);
33165 machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
33166 machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
33167 enum rtx_code comparison
= d
->comparison
;
33169 if (VECTOR_MODE_P (mode0
))
33170 op0
= safe_vector_operand (op0
, mode0
);
33171 if (VECTOR_MODE_P (mode1
))
33172 op1
= safe_vector_operand (op1
, mode1
);
33174 target
= gen_reg_rtx (SImode
);
33175 emit_move_insn (target
, const0_rtx
);
33176 target
= gen_rtx_SUBREG (QImode
, target
, 0);
33178 if ((optimize
&& !register_operand (op0
, mode0
))
33179 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
33180 op0
= copy_to_mode_reg (mode0
, op0
);
33181 if ((optimize
&& !register_operand (op1
, mode1
))
33182 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
33183 op1
= copy_to_mode_reg (mode1
, op1
);
33185 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
33189 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
33190 gen_rtx_fmt_ee (comparison
, QImode
,
33194 return SUBREG_REG (target
);
33197 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
33200 ix86_expand_sse_pcmpestr (const struct builtin_description
*d
,
33201 tree exp
, rtx target
)
33204 tree arg0
= CALL_EXPR_ARG (exp
, 0);
33205 tree arg1
= CALL_EXPR_ARG (exp
, 1);
33206 tree arg2
= CALL_EXPR_ARG (exp
, 2);
33207 tree arg3
= CALL_EXPR_ARG (exp
, 3);
33208 tree arg4
= CALL_EXPR_ARG (exp
, 4);
33209 rtx scratch0
, scratch1
;
33210 rtx op0
= expand_normal (arg0
);
33211 rtx op1
= expand_normal (arg1
);
33212 rtx op2
= expand_normal (arg2
);
33213 rtx op3
= expand_normal (arg3
);
33214 rtx op4
= expand_normal (arg4
);
33215 machine_mode tmode0
, tmode1
, modev2
, modei3
, modev4
, modei5
, modeimm
;
33217 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
33218 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
33219 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
33220 modei3
= insn_data
[d
->icode
].operand
[3].mode
;
33221 modev4
= insn_data
[d
->icode
].operand
[4].mode
;
33222 modei5
= insn_data
[d
->icode
].operand
[5].mode
;
33223 modeimm
= insn_data
[d
->icode
].operand
[6].mode
;
33225 if (VECTOR_MODE_P (modev2
))
33226 op0
= safe_vector_operand (op0
, modev2
);
33227 if (VECTOR_MODE_P (modev4
))
33228 op2
= safe_vector_operand (op2
, modev4
);
33230 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
33231 op0
= copy_to_mode_reg (modev2
, op0
);
33232 if (!insn_data
[d
->icode
].operand
[3].predicate (op1
, modei3
))
33233 op1
= copy_to_mode_reg (modei3
, op1
);
33234 if ((optimize
&& !register_operand (op2
, modev4
))
33235 || !insn_data
[d
->icode
].operand
[4].predicate (op2
, modev4
))
33236 op2
= copy_to_mode_reg (modev4
, op2
);
33237 if (!insn_data
[d
->icode
].operand
[5].predicate (op3
, modei5
))
33238 op3
= copy_to_mode_reg (modei5
, op3
);
33240 if (!insn_data
[d
->icode
].operand
[6].predicate (op4
, modeimm
))
33242 error ("the fifth argument must be an 8-bit immediate");
33246 if (d
->code
== IX86_BUILTIN_PCMPESTRI128
)
33248 if (optimize
|| !target
33249 || GET_MODE (target
) != tmode0
33250 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
33251 target
= gen_reg_rtx (tmode0
);
33253 scratch1
= gen_reg_rtx (tmode1
);
33255 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
, op3
, op4
);
33257 else if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
33259 if (optimize
|| !target
33260 || GET_MODE (target
) != tmode1
33261 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
33262 target
= gen_reg_rtx (tmode1
);
33264 scratch0
= gen_reg_rtx (tmode0
);
33266 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
, op3
, op4
);
33270 gcc_assert (d
->flag
);
33272 scratch0
= gen_reg_rtx (tmode0
);
33273 scratch1
= gen_reg_rtx (tmode1
);
33275 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
, op3
, op4
);
33285 target
= gen_reg_rtx (SImode
);
33286 emit_move_insn (target
, const0_rtx
);
33287 target
= gen_rtx_SUBREG (QImode
, target
, 0);
33290 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
33291 gen_rtx_fmt_ee (EQ
, QImode
,
33292 gen_rtx_REG ((machine_mode
) d
->flag
,
33295 return SUBREG_REG (target
);
33302 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
33305 ix86_expand_sse_pcmpistr (const struct builtin_description
*d
,
33306 tree exp
, rtx target
)
33309 tree arg0
= CALL_EXPR_ARG (exp
, 0);
33310 tree arg1
= CALL_EXPR_ARG (exp
, 1);
33311 tree arg2
= CALL_EXPR_ARG (exp
, 2);
33312 rtx scratch0
, scratch1
;
33313 rtx op0
= expand_normal (arg0
);
33314 rtx op1
= expand_normal (arg1
);
33315 rtx op2
= expand_normal (arg2
);
33316 machine_mode tmode0
, tmode1
, modev2
, modev3
, modeimm
;
33318 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
33319 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
33320 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
33321 modev3
= insn_data
[d
->icode
].operand
[3].mode
;
33322 modeimm
= insn_data
[d
->icode
].operand
[4].mode
;
33324 if (VECTOR_MODE_P (modev2
))
33325 op0
= safe_vector_operand (op0
, modev2
);
33326 if (VECTOR_MODE_P (modev3
))
33327 op1
= safe_vector_operand (op1
, modev3
);
33329 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
33330 op0
= copy_to_mode_reg (modev2
, op0
);
33331 if ((optimize
&& !register_operand (op1
, modev3
))
33332 || !insn_data
[d
->icode
].operand
[3].predicate (op1
, modev3
))
33333 op1
= copy_to_mode_reg (modev3
, op1
);
33335 if (!insn_data
[d
->icode
].operand
[4].predicate (op2
, modeimm
))
33337 error ("the third argument must be an 8-bit immediate");
33341 if (d
->code
== IX86_BUILTIN_PCMPISTRI128
)
33343 if (optimize
|| !target
33344 || GET_MODE (target
) != tmode0
33345 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
33346 target
= gen_reg_rtx (tmode0
);
33348 scratch1
= gen_reg_rtx (tmode1
);
33350 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
);
33352 else if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
33354 if (optimize
|| !target
33355 || GET_MODE (target
) != tmode1
33356 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
33357 target
= gen_reg_rtx (tmode1
);
33359 scratch0
= gen_reg_rtx (tmode0
);
33361 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
);
33365 gcc_assert (d
->flag
);
33367 scratch0
= gen_reg_rtx (tmode0
);
33368 scratch1
= gen_reg_rtx (tmode1
);
33370 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
);
33380 target
= gen_reg_rtx (SImode
);
33381 emit_move_insn (target
, const0_rtx
);
33382 target
= gen_rtx_SUBREG (QImode
, target
, 0);
33385 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
33386 gen_rtx_fmt_ee (EQ
, QImode
,
33387 gen_rtx_REG ((machine_mode
) d
->flag
,
33390 return SUBREG_REG (target
);
33396 /* Subroutine of ix86_expand_builtin to take care of insns with
33397 variable number of operands. */
33400 ix86_expand_args_builtin (const struct builtin_description
*d
,
33401 tree exp
, rtx target
)
33403 rtx pat
, real_target
;
33404 unsigned int i
, nargs
;
33405 unsigned int nargs_constant
= 0;
33406 unsigned int mask_pos
= 0;
33407 int num_memory
= 0;
33413 bool second_arg_count
= false;
33414 enum insn_code icode
= d
->icode
;
33415 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
33416 machine_mode tmode
= insn_p
->operand
[0].mode
;
33417 machine_mode rmode
= VOIDmode
;
33419 enum rtx_code comparison
= d
->comparison
;
33421 switch ((enum ix86_builtin_func_type
) d
->flag
)
33423 case V2DF_FTYPE_V2DF_ROUND
:
33424 case V4DF_FTYPE_V4DF_ROUND
:
33425 case V8DF_FTYPE_V8DF_ROUND
:
33426 case V4SF_FTYPE_V4SF_ROUND
:
33427 case V8SF_FTYPE_V8SF_ROUND
:
33428 case V16SF_FTYPE_V16SF_ROUND
:
33429 case V4SI_FTYPE_V4SF_ROUND
:
33430 case V8SI_FTYPE_V8SF_ROUND
:
33431 case V16SI_FTYPE_V16SF_ROUND
:
33432 return ix86_expand_sse_round (d
, exp
, target
);
33433 case V4SI_FTYPE_V2DF_V2DF_ROUND
:
33434 case V8SI_FTYPE_V4DF_V4DF_ROUND
:
33435 case V16SI_FTYPE_V8DF_V8DF_ROUND
:
33436 return ix86_expand_sse_round_vec_pack_sfix (d
, exp
, target
);
33437 case INT_FTYPE_V8SF_V8SF_PTEST
:
33438 case INT_FTYPE_V4DI_V4DI_PTEST
:
33439 case INT_FTYPE_V4DF_V4DF_PTEST
:
33440 case INT_FTYPE_V4SF_V4SF_PTEST
:
33441 case INT_FTYPE_V2DI_V2DI_PTEST
:
33442 case INT_FTYPE_V2DF_V2DF_PTEST
:
33443 return ix86_expand_sse_ptest (d
, exp
, target
);
33444 case FLOAT128_FTYPE_FLOAT128
:
33445 case FLOAT_FTYPE_FLOAT
:
33446 case INT_FTYPE_INT
:
33447 case UINT_FTYPE_UINT
:
33448 case UINT16_FTYPE_UINT16
:
33449 case UINT64_FTYPE_INT
:
33450 case UINT64_FTYPE_UINT64
:
33451 case INT64_FTYPE_INT64
:
33452 case INT64_FTYPE_V4SF
:
33453 case INT64_FTYPE_V2DF
:
33454 case INT_FTYPE_V16QI
:
33455 case INT_FTYPE_V8QI
:
33456 case INT_FTYPE_V8SF
:
33457 case INT_FTYPE_V4DF
:
33458 case INT_FTYPE_V4SF
:
33459 case INT_FTYPE_V2DF
:
33460 case INT_FTYPE_V32QI
:
33461 case V16QI_FTYPE_V16QI
:
33462 case V8SI_FTYPE_V8SF
:
33463 case V8SI_FTYPE_V4SI
:
33464 case V8HI_FTYPE_V8HI
:
33465 case V8HI_FTYPE_V16QI
:
33466 case V8QI_FTYPE_V8QI
:
33467 case V8SF_FTYPE_V8SF
:
33468 case V8SF_FTYPE_V8SI
:
33469 case V8SF_FTYPE_V4SF
:
33470 case V8SF_FTYPE_V8HI
:
33471 case V4SI_FTYPE_V4SI
:
33472 case V4SI_FTYPE_V16QI
:
33473 case V4SI_FTYPE_V4SF
:
33474 case V4SI_FTYPE_V8SI
:
33475 case V4SI_FTYPE_V8HI
:
33476 case V4SI_FTYPE_V4DF
:
33477 case V4SI_FTYPE_V2DF
:
33478 case V4HI_FTYPE_V4HI
:
33479 case V4DF_FTYPE_V4DF
:
33480 case V4DF_FTYPE_V4SI
:
33481 case V4DF_FTYPE_V4SF
:
33482 case V4DF_FTYPE_V2DF
:
33483 case V4SF_FTYPE_V4SF
:
33484 case V4SF_FTYPE_V4SI
:
33485 case V4SF_FTYPE_V8SF
:
33486 case V4SF_FTYPE_V4DF
:
33487 case V4SF_FTYPE_V8HI
:
33488 case V4SF_FTYPE_V2DF
:
33489 case V2DI_FTYPE_V2DI
:
33490 case V2DI_FTYPE_V16QI
:
33491 case V2DI_FTYPE_V8HI
:
33492 case V2DI_FTYPE_V4SI
:
33493 case V2DF_FTYPE_V2DF
:
33494 case V2DF_FTYPE_V4SI
:
33495 case V2DF_FTYPE_V4DF
:
33496 case V2DF_FTYPE_V4SF
:
33497 case V2DF_FTYPE_V2SI
:
33498 case V2SI_FTYPE_V2SI
:
33499 case V2SI_FTYPE_V4SF
:
33500 case V2SI_FTYPE_V2SF
:
33501 case V2SI_FTYPE_V2DF
:
33502 case V2SF_FTYPE_V2SF
:
33503 case V2SF_FTYPE_V2SI
:
33504 case V32QI_FTYPE_V32QI
:
33505 case V32QI_FTYPE_V16QI
:
33506 case V16HI_FTYPE_V16HI
:
33507 case V16HI_FTYPE_V8HI
:
33508 case V8SI_FTYPE_V8SI
:
33509 case V16HI_FTYPE_V16QI
:
33510 case V8SI_FTYPE_V16QI
:
33511 case V4DI_FTYPE_V16QI
:
33512 case V8SI_FTYPE_V8HI
:
33513 case V4DI_FTYPE_V8HI
:
33514 case V4DI_FTYPE_V4SI
:
33515 case V4DI_FTYPE_V2DI
:
33516 case UQI_FTYPE_UQI
:
33517 case UHI_FTYPE_UHI
:
33518 case USI_FTYPE_USI
:
33519 case USI_FTYPE_UQI
:
33520 case USI_FTYPE_UHI
:
33521 case UDI_FTYPE_UDI
:
33522 case UHI_FTYPE_V16QI
:
33523 case USI_FTYPE_V32QI
:
33524 case UDI_FTYPE_V64QI
:
33525 case V16QI_FTYPE_UHI
:
33526 case V32QI_FTYPE_USI
:
33527 case V64QI_FTYPE_UDI
:
33528 case V8HI_FTYPE_UQI
:
33529 case V16HI_FTYPE_UHI
:
33530 case V32HI_FTYPE_USI
:
33531 case V4SI_FTYPE_UQI
:
33532 case V8SI_FTYPE_UQI
:
33533 case V4SI_FTYPE_UHI
:
33534 case V8SI_FTYPE_UHI
:
33535 case UQI_FTYPE_V8HI
:
33536 case UHI_FTYPE_V16HI
:
33537 case USI_FTYPE_V32HI
:
33538 case UQI_FTYPE_V4SI
:
33539 case UQI_FTYPE_V8SI
:
33540 case UHI_FTYPE_V16SI
:
33541 case UQI_FTYPE_V2DI
:
33542 case UQI_FTYPE_V4DI
:
33543 case UQI_FTYPE_V8DI
:
33544 case V16SI_FTYPE_UHI
:
33545 case V2DI_FTYPE_UQI
:
33546 case V4DI_FTYPE_UQI
:
33547 case V16SI_FTYPE_INT
:
33548 case V16SF_FTYPE_V8SF
:
33549 case V16SI_FTYPE_V8SI
:
33550 case V16SF_FTYPE_V4SF
:
33551 case V16SI_FTYPE_V4SI
:
33552 case V16SI_FTYPE_V16SF
:
33553 case V16SI_FTYPE_V16SI
:
33554 case V16SF_FTYPE_V16SF
:
33555 case V8DI_FTYPE_UQI
:
33556 case V8DI_FTYPE_V8DI
:
33557 case V8DF_FTYPE_V4DF
:
33558 case V8DF_FTYPE_V2DF
:
33559 case V8DF_FTYPE_V8DF
:
33562 case V4SF_FTYPE_V4SF_VEC_MERGE
:
33563 case V2DF_FTYPE_V2DF_VEC_MERGE
:
33564 return ix86_expand_unop_vec_merge_builtin (icode
, exp
, target
);
33565 case FLOAT128_FTYPE_FLOAT128_FLOAT128
:
33566 case V16QI_FTYPE_V16QI_V16QI
:
33567 case V16QI_FTYPE_V8HI_V8HI
:
33568 case V16SF_FTYPE_V16SF_V16SF
:
33569 case V8QI_FTYPE_V8QI_V8QI
:
33570 case V8QI_FTYPE_V4HI_V4HI
:
33571 case V8HI_FTYPE_V8HI_V8HI
:
33572 case V8HI_FTYPE_V16QI_V16QI
:
33573 case V8HI_FTYPE_V4SI_V4SI
:
33574 case V8SF_FTYPE_V8SF_V8SF
:
33575 case V8SF_FTYPE_V8SF_V8SI
:
33576 case V8DF_FTYPE_V8DF_V8DF
:
33577 case V4SI_FTYPE_V4SI_V4SI
:
33578 case V4SI_FTYPE_V8HI_V8HI
:
33579 case V4SI_FTYPE_V2DF_V2DF
:
33580 case V4HI_FTYPE_V4HI_V4HI
:
33581 case V4HI_FTYPE_V8QI_V8QI
:
33582 case V4HI_FTYPE_V2SI_V2SI
:
33583 case V4DF_FTYPE_V4DF_V4DF
:
33584 case V4DF_FTYPE_V4DF_V4DI
:
33585 case V4SF_FTYPE_V4SF_V4SF
:
33586 case V4SF_FTYPE_V4SF_V4SI
:
33587 case V4SF_FTYPE_V4SF_V2SI
:
33588 case V4SF_FTYPE_V4SF_V2DF
:
33589 case V4SF_FTYPE_V4SF_UINT
:
33590 case V4SF_FTYPE_V4SF_DI
:
33591 case V4SF_FTYPE_V4SF_SI
:
33592 case V2DI_FTYPE_V2DI_V2DI
:
33593 case V2DI_FTYPE_V16QI_V16QI
:
33594 case V2DI_FTYPE_V4SI_V4SI
:
33595 case V2DI_FTYPE_V2DI_V16QI
:
33596 case V2SI_FTYPE_V2SI_V2SI
:
33597 case V2SI_FTYPE_V4HI_V4HI
:
33598 case V2SI_FTYPE_V2SF_V2SF
:
33599 case V2DF_FTYPE_V2DF_V2DF
:
33600 case V2DF_FTYPE_V2DF_V4SF
:
33601 case V2DF_FTYPE_V2DF_V2DI
:
33602 case V2DF_FTYPE_V2DF_DI
:
33603 case V2DF_FTYPE_V2DF_SI
:
33604 case V2DF_FTYPE_V2DF_UINT
:
33605 case V2SF_FTYPE_V2SF_V2SF
:
33606 case V1DI_FTYPE_V1DI_V1DI
:
33607 case V1DI_FTYPE_V8QI_V8QI
:
33608 case V1DI_FTYPE_V2SI_V2SI
:
33609 case V32QI_FTYPE_V16HI_V16HI
:
33610 case V16HI_FTYPE_V8SI_V8SI
:
33611 case V64QI_FTYPE_V64QI_V64QI
:
33612 case V32QI_FTYPE_V32QI_V32QI
:
33613 case V16HI_FTYPE_V32QI_V32QI
:
33614 case V16HI_FTYPE_V16HI_V16HI
:
33615 case V8SI_FTYPE_V4DF_V4DF
:
33616 case V8SI_FTYPE_V8SI_V8SI
:
33617 case V8SI_FTYPE_V16HI_V16HI
:
33618 case V4DI_FTYPE_V4DI_V4DI
:
33619 case V4DI_FTYPE_V8SI_V8SI
:
33620 case V8DI_FTYPE_V64QI_V64QI
:
33621 if (comparison
== UNKNOWN
)
33622 return ix86_expand_binop_builtin (icode
, exp
, target
);
33625 case V4SF_FTYPE_V4SF_V4SF_SWAP
:
33626 case V2DF_FTYPE_V2DF_V2DF_SWAP
:
33627 gcc_assert (comparison
!= UNKNOWN
);
33631 case V16HI_FTYPE_V16HI_V8HI_COUNT
:
33632 case V16HI_FTYPE_V16HI_SI_COUNT
:
33633 case V8SI_FTYPE_V8SI_V4SI_COUNT
:
33634 case V8SI_FTYPE_V8SI_SI_COUNT
:
33635 case V4DI_FTYPE_V4DI_V2DI_COUNT
:
33636 case V4DI_FTYPE_V4DI_INT_COUNT
:
33637 case V8HI_FTYPE_V8HI_V8HI_COUNT
:
33638 case V8HI_FTYPE_V8HI_SI_COUNT
:
33639 case V4SI_FTYPE_V4SI_V4SI_COUNT
:
33640 case V4SI_FTYPE_V4SI_SI_COUNT
:
33641 case V4HI_FTYPE_V4HI_V4HI_COUNT
:
33642 case V4HI_FTYPE_V4HI_SI_COUNT
:
33643 case V2DI_FTYPE_V2DI_V2DI_COUNT
:
33644 case V2DI_FTYPE_V2DI_SI_COUNT
:
33645 case V2SI_FTYPE_V2SI_V2SI_COUNT
:
33646 case V2SI_FTYPE_V2SI_SI_COUNT
:
33647 case V1DI_FTYPE_V1DI_V1DI_COUNT
:
33648 case V1DI_FTYPE_V1DI_SI_COUNT
:
33650 second_arg_count
= true;
33652 case V16HI_FTYPE_V16HI_INT_V16HI_UHI_COUNT
:
33653 case V16HI_FTYPE_V16HI_V8HI_V16HI_UHI_COUNT
:
33654 case V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT
:
33655 case V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT
:
33656 case V2DI_FTYPE_V2DI_INT_V2DI_UQI_COUNT
:
33657 case V2DI_FTYPE_V2DI_V2DI_V2DI_UQI_COUNT
:
33658 case V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT
:
33659 case V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT
:
33660 case V4DI_FTYPE_V4DI_INT_V4DI_UQI_COUNT
:
33661 case V4DI_FTYPE_V4DI_V2DI_V4DI_UQI_COUNT
:
33662 case V4SI_FTYPE_V4SI_INT_V4SI_UQI_COUNT
:
33663 case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI_COUNT
:
33664 case V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT
:
33665 case V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT
:
33666 case V8HI_FTYPE_V8HI_INT_V8HI_UQI_COUNT
:
33667 case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI_COUNT
:
33668 case V8SI_FTYPE_V8SI_INT_V8SI_UQI_COUNT
:
33669 case V8SI_FTYPE_V8SI_V4SI_V8SI_UQI_COUNT
:
33671 second_arg_count
= true;
33673 case UINT64_FTYPE_UINT64_UINT64
:
33674 case UINT_FTYPE_UINT_UINT
:
33675 case UINT_FTYPE_UINT_USHORT
:
33676 case UINT_FTYPE_UINT_UCHAR
:
33677 case UINT16_FTYPE_UINT16_INT
:
33678 case UINT8_FTYPE_UINT8_INT
:
33679 case UQI_FTYPE_UQI_UQI
:
33680 case UHI_FTYPE_UHI_UHI
:
33681 case USI_FTYPE_USI_USI
:
33682 case UDI_FTYPE_UDI_UDI
:
33683 case V16SI_FTYPE_V8DF_V8DF
:
33686 case V2DI_FTYPE_V2DI_INT_CONVERT
:
33689 nargs_constant
= 1;
33691 case V4DI_FTYPE_V4DI_INT_CONVERT
:
33694 nargs_constant
= 1;
33696 case V8DI_FTYPE_V8DI_INT_CONVERT
:
33699 nargs_constant
= 1;
33701 case V8HI_FTYPE_V8HI_INT
:
33702 case V8HI_FTYPE_V8SF_INT
:
33703 case V16HI_FTYPE_V16SF_INT
:
33704 case V8HI_FTYPE_V4SF_INT
:
33705 case V8SF_FTYPE_V8SF_INT
:
33706 case V4SF_FTYPE_V16SF_INT
:
33707 case V16SF_FTYPE_V16SF_INT
:
33708 case V4SI_FTYPE_V4SI_INT
:
33709 case V4SI_FTYPE_V8SI_INT
:
33710 case V4HI_FTYPE_V4HI_INT
:
33711 case V4DF_FTYPE_V4DF_INT
:
33712 case V4DF_FTYPE_V8DF_INT
:
33713 case V4SF_FTYPE_V4SF_INT
:
33714 case V4SF_FTYPE_V8SF_INT
:
33715 case V2DI_FTYPE_V2DI_INT
:
33716 case V2DF_FTYPE_V2DF_INT
:
33717 case V2DF_FTYPE_V4DF_INT
:
33718 case V16HI_FTYPE_V16HI_INT
:
33719 case V8SI_FTYPE_V8SI_INT
:
33720 case V16SI_FTYPE_V16SI_INT
:
33721 case V4SI_FTYPE_V16SI_INT
:
33722 case V4DI_FTYPE_V4DI_INT
:
33723 case V2DI_FTYPE_V4DI_INT
:
33724 case V4DI_FTYPE_V8DI_INT
:
33725 case QI_FTYPE_V4SF_INT
:
33726 case QI_FTYPE_V2DF_INT
:
33727 case UQI_FTYPE_UQI_UQI_CONST
:
33728 case UHI_FTYPE_UHI_UQI
:
33729 case USI_FTYPE_USI_UQI
:
33730 case UDI_FTYPE_UDI_UQI
:
33732 nargs_constant
= 1;
33734 case V16QI_FTYPE_V16QI_V16QI_V16QI
:
33735 case V8SF_FTYPE_V8SF_V8SF_V8SF
:
33736 case V4DF_FTYPE_V4DF_V4DF_V4DF
:
33737 case V4SF_FTYPE_V4SF_V4SF_V4SF
:
33738 case V2DF_FTYPE_V2DF_V2DF_V2DF
:
33739 case V32QI_FTYPE_V32QI_V32QI_V32QI
:
33740 case UHI_FTYPE_V16SI_V16SI_UHI
:
33741 case UQI_FTYPE_V8DI_V8DI_UQI
:
33742 case V16HI_FTYPE_V16SI_V16HI_UHI
:
33743 case V16QI_FTYPE_V16SI_V16QI_UHI
:
33744 case V16QI_FTYPE_V8DI_V16QI_UQI
:
33745 case V16SF_FTYPE_V16SF_V16SF_UHI
:
33746 case V16SF_FTYPE_V4SF_V16SF_UHI
:
33747 case V16SI_FTYPE_SI_V16SI_UHI
:
33748 case V16SI_FTYPE_V16HI_V16SI_UHI
:
33749 case V16SI_FTYPE_V16QI_V16SI_UHI
:
33750 case V8SF_FTYPE_V4SF_V8SF_UQI
:
33751 case V4DF_FTYPE_V2DF_V4DF_UQI
:
33752 case V8SI_FTYPE_V4SI_V8SI_UQI
:
33753 case V8SI_FTYPE_SI_V8SI_UQI
:
33754 case V4SI_FTYPE_V4SI_V4SI_UQI
:
33755 case V4SI_FTYPE_SI_V4SI_UQI
:
33756 case V4DI_FTYPE_V2DI_V4DI_UQI
:
33757 case V4DI_FTYPE_DI_V4DI_UQI
:
33758 case V2DI_FTYPE_V2DI_V2DI_UQI
:
33759 case V2DI_FTYPE_DI_V2DI_UQI
:
33760 case V64QI_FTYPE_V64QI_V64QI_UDI
:
33761 case V64QI_FTYPE_V16QI_V64QI_UDI
:
33762 case V64QI_FTYPE_QI_V64QI_UDI
:
33763 case V32QI_FTYPE_V32QI_V32QI_USI
:
33764 case V32QI_FTYPE_V16QI_V32QI_USI
:
33765 case V32QI_FTYPE_QI_V32QI_USI
:
33766 case V16QI_FTYPE_V16QI_V16QI_UHI
:
33767 case V16QI_FTYPE_QI_V16QI_UHI
:
33768 case V32HI_FTYPE_V8HI_V32HI_USI
:
33769 case V32HI_FTYPE_HI_V32HI_USI
:
33770 case V16HI_FTYPE_V8HI_V16HI_UHI
:
33771 case V16HI_FTYPE_HI_V16HI_UHI
:
33772 case V8HI_FTYPE_V8HI_V8HI_UQI
:
33773 case V8HI_FTYPE_HI_V8HI_UQI
:
33774 case V8SF_FTYPE_V8HI_V8SF_UQI
:
33775 case V4SF_FTYPE_V8HI_V4SF_UQI
:
33776 case V8SI_FTYPE_V8SF_V8SI_UQI
:
33777 case V4SI_FTYPE_V4SF_V4SI_UQI
:
33778 case V4DI_FTYPE_V4SF_V4DI_UQI
:
33779 case V2DI_FTYPE_V4SF_V2DI_UQI
:
33780 case V4SF_FTYPE_V4DI_V4SF_UQI
:
33781 case V4SF_FTYPE_V2DI_V4SF_UQI
:
33782 case V4DF_FTYPE_V4DI_V4DF_UQI
:
33783 case V2DF_FTYPE_V2DI_V2DF_UQI
:
33784 case V16QI_FTYPE_V8HI_V16QI_UQI
:
33785 case V16QI_FTYPE_V16HI_V16QI_UHI
:
33786 case V16QI_FTYPE_V4SI_V16QI_UQI
:
33787 case V16QI_FTYPE_V8SI_V16QI_UQI
:
33788 case V8HI_FTYPE_V4SI_V8HI_UQI
:
33789 case V8HI_FTYPE_V8SI_V8HI_UQI
:
33790 case V16QI_FTYPE_V2DI_V16QI_UQI
:
33791 case V16QI_FTYPE_V4DI_V16QI_UQI
:
33792 case V8HI_FTYPE_V2DI_V8HI_UQI
:
33793 case V8HI_FTYPE_V4DI_V8HI_UQI
:
33794 case V4SI_FTYPE_V2DI_V4SI_UQI
:
33795 case V4SI_FTYPE_V4DI_V4SI_UQI
:
33796 case V32QI_FTYPE_V32HI_V32QI_USI
:
33797 case UHI_FTYPE_V16QI_V16QI_UHI
:
33798 case USI_FTYPE_V32QI_V32QI_USI
:
33799 case UDI_FTYPE_V64QI_V64QI_UDI
:
33800 case UQI_FTYPE_V8HI_V8HI_UQI
:
33801 case UHI_FTYPE_V16HI_V16HI_UHI
:
33802 case USI_FTYPE_V32HI_V32HI_USI
:
33803 case UQI_FTYPE_V4SI_V4SI_UQI
:
33804 case UQI_FTYPE_V8SI_V8SI_UQI
:
33805 case UQI_FTYPE_V2DI_V2DI_UQI
:
33806 case UQI_FTYPE_V4DI_V4DI_UQI
:
33807 case V4SF_FTYPE_V2DF_V4SF_UQI
:
33808 case V4SF_FTYPE_V4DF_V4SF_UQI
:
33809 case V16SI_FTYPE_V16SI_V16SI_UHI
:
33810 case V16SI_FTYPE_V4SI_V16SI_UHI
:
33811 case V2DI_FTYPE_V4SI_V2DI_UQI
:
33812 case V2DI_FTYPE_V8HI_V2DI_UQI
:
33813 case V2DI_FTYPE_V16QI_V2DI_UQI
:
33814 case V4DI_FTYPE_V4DI_V4DI_UQI
:
33815 case V4DI_FTYPE_V4SI_V4DI_UQI
:
33816 case V4DI_FTYPE_V8HI_V4DI_UQI
:
33817 case V4DI_FTYPE_V16QI_V4DI_UQI
:
33818 case V4DI_FTYPE_V4DF_V4DI_UQI
:
33819 case V2DI_FTYPE_V2DF_V2DI_UQI
:
33820 case V4SI_FTYPE_V4DF_V4SI_UQI
:
33821 case V4SI_FTYPE_V2DF_V4SI_UQI
:
33822 case V4SI_FTYPE_V8HI_V4SI_UQI
:
33823 case V4SI_FTYPE_V16QI_V4SI_UQI
:
33824 case V4DI_FTYPE_V4DI_V4DI_V4DI
:
33825 case V8DF_FTYPE_V2DF_V8DF_UQI
:
33826 case V8DF_FTYPE_V4DF_V8DF_UQI
:
33827 case V8DF_FTYPE_V8DF_V8DF_UQI
:
33828 case V8SF_FTYPE_V8SF_V8SF_UQI
:
33829 case V8SF_FTYPE_V8SI_V8SF_UQI
:
33830 case V4DF_FTYPE_V4DF_V4DF_UQI
:
33831 case V4SF_FTYPE_V4SF_V4SF_UQI
:
33832 case V2DF_FTYPE_V2DF_V2DF_UQI
:
33833 case V2DF_FTYPE_V4SF_V2DF_UQI
:
33834 case V2DF_FTYPE_V4SI_V2DF_UQI
:
33835 case V4SF_FTYPE_V4SI_V4SF_UQI
:
33836 case V4DF_FTYPE_V4SF_V4DF_UQI
:
33837 case V4DF_FTYPE_V4SI_V4DF_UQI
:
33838 case V8SI_FTYPE_V8SI_V8SI_UQI
:
33839 case V8SI_FTYPE_V8HI_V8SI_UQI
:
33840 case V8SI_FTYPE_V16QI_V8SI_UQI
:
33841 case V8DF_FTYPE_V8SI_V8DF_UQI
:
33842 case V8DI_FTYPE_DI_V8DI_UQI
:
33843 case V16SF_FTYPE_V8SF_V16SF_UHI
:
33844 case V16SI_FTYPE_V8SI_V16SI_UHI
:
33845 case V16HI_FTYPE_V16HI_V16HI_UHI
:
33846 case V8HI_FTYPE_V16QI_V8HI_UQI
:
33847 case V16HI_FTYPE_V16QI_V16HI_UHI
:
33848 case V32HI_FTYPE_V32HI_V32HI_USI
:
33849 case V32HI_FTYPE_V32QI_V32HI_USI
:
33850 case V8DI_FTYPE_V16QI_V8DI_UQI
:
33851 case V8DI_FTYPE_V2DI_V8DI_UQI
:
33852 case V8DI_FTYPE_V4DI_V8DI_UQI
:
33853 case V8DI_FTYPE_V8DI_V8DI_UQI
:
33854 case V8DI_FTYPE_V8HI_V8DI_UQI
:
33855 case V8DI_FTYPE_V8SI_V8DI_UQI
:
33856 case V8HI_FTYPE_V8DI_V8HI_UQI
:
33857 case V8SI_FTYPE_V8DI_V8SI_UQI
:
33858 case V4SI_FTYPE_V4SI_V4SI_V4SI
:
33859 case V16SI_FTYPE_V16SI_V16SI_V16SI
:
33860 case V8DI_FTYPE_V8DI_V8DI_V8DI
:
33861 case V32HI_FTYPE_V32HI_V32HI_V32HI
:
33862 case V2DI_FTYPE_V2DI_V2DI_V2DI
:
33863 case V16HI_FTYPE_V16HI_V16HI_V16HI
:
33864 case V8SI_FTYPE_V8SI_V8SI_V8SI
:
33865 case V8HI_FTYPE_V8HI_V8HI_V8HI
:
33868 case V32QI_FTYPE_V32QI_V32QI_INT
:
33869 case V16HI_FTYPE_V16HI_V16HI_INT
:
33870 case V16QI_FTYPE_V16QI_V16QI_INT
:
33871 case V4DI_FTYPE_V4DI_V4DI_INT
:
33872 case V8HI_FTYPE_V8HI_V8HI_INT
:
33873 case V8SI_FTYPE_V8SI_V8SI_INT
:
33874 case V8SI_FTYPE_V8SI_V4SI_INT
:
33875 case V8SF_FTYPE_V8SF_V8SF_INT
:
33876 case V8SF_FTYPE_V8SF_V4SF_INT
:
33877 case V4SI_FTYPE_V4SI_V4SI_INT
:
33878 case V4DF_FTYPE_V4DF_V4DF_INT
:
33879 case V16SF_FTYPE_V16SF_V16SF_INT
:
33880 case V16SF_FTYPE_V16SF_V4SF_INT
:
33881 case V16SI_FTYPE_V16SI_V4SI_INT
:
33882 case V4DF_FTYPE_V4DF_V2DF_INT
:
33883 case V4SF_FTYPE_V4SF_V4SF_INT
:
33884 case V2DI_FTYPE_V2DI_V2DI_INT
:
33885 case V4DI_FTYPE_V4DI_V2DI_INT
:
33886 case V2DF_FTYPE_V2DF_V2DF_INT
:
33887 case UQI_FTYPE_V8DI_V8UDI_INT
:
33888 case UQI_FTYPE_V8DF_V8DF_INT
:
33889 case UQI_FTYPE_V2DF_V2DF_INT
:
33890 case UQI_FTYPE_V4SF_V4SF_INT
:
33891 case UHI_FTYPE_V16SI_V16SI_INT
:
33892 case UHI_FTYPE_V16SF_V16SF_INT
:
33893 case V64QI_FTYPE_V64QI_V64QI_INT
:
33894 case V32HI_FTYPE_V32HI_V32HI_INT
:
33895 case V16SI_FTYPE_V16SI_V16SI_INT
:
33896 case V8DI_FTYPE_V8DI_V8DI_INT
:
33898 nargs_constant
= 1;
33900 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
:
33903 nargs_constant
= 1;
33905 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
:
33908 nargs_constant
= 1;
33910 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
:
33913 nargs_constant
= 1;
33915 case V2DI_FTYPE_V2DI_UINT_UINT
:
33917 nargs_constant
= 2;
33919 case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT
:
33922 nargs_constant
= 1;
33924 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT
:
33928 nargs_constant
= 1;
33930 case QI_FTYPE_V8DF_INT_UQI
:
33931 case QI_FTYPE_V4DF_INT_UQI
:
33932 case QI_FTYPE_V2DF_INT_UQI
:
33933 case HI_FTYPE_V16SF_INT_UHI
:
33934 case QI_FTYPE_V8SF_INT_UQI
:
33935 case QI_FTYPE_V4SF_INT_UQI
:
33938 nargs_constant
= 1;
33940 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_USI_CONVERT
:
33944 nargs_constant
= 1;
33946 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UHI_CONVERT
:
33950 nargs_constant
= 1;
33952 case V32QI_FTYPE_V32QI_V32QI_V32QI_USI
:
33953 case V32HI_FTYPE_V32HI_V32HI_V32HI_USI
:
33954 case V32HI_FTYPE_V64QI_V64QI_V32HI_USI
:
33955 case V16SI_FTYPE_V32HI_V32HI_V16SI_UHI
:
33956 case V64QI_FTYPE_V64QI_V64QI_V64QI_UDI
:
33957 case V32HI_FTYPE_V32HI_V8HI_V32HI_USI
:
33958 case V16HI_FTYPE_V16HI_V8HI_V16HI_UHI
:
33959 case V8SI_FTYPE_V8SI_V4SI_V8SI_UQI
:
33960 case V4DI_FTYPE_V4DI_V2DI_V4DI_UQI
:
33961 case V64QI_FTYPE_V32HI_V32HI_V64QI_UDI
:
33962 case V32QI_FTYPE_V16HI_V16HI_V32QI_USI
:
33963 case V16QI_FTYPE_V8HI_V8HI_V16QI_UHI
:
33964 case V32HI_FTYPE_V16SI_V16SI_V32HI_USI
:
33965 case V16HI_FTYPE_V8SI_V8SI_V16HI_UHI
:
33966 case V8HI_FTYPE_V4SI_V4SI_V8HI_UQI
:
33967 case V4DF_FTYPE_V4DF_V4DI_V4DF_UQI
:
33968 case V8SF_FTYPE_V8SF_V8SI_V8SF_UQI
:
33969 case V4SF_FTYPE_V4SF_V4SI_V4SF_UQI
:
33970 case V2DF_FTYPE_V2DF_V2DI_V2DF_UQI
:
33971 case V2DI_FTYPE_V4SI_V4SI_V2DI_UQI
:
33972 case V4DI_FTYPE_V8SI_V8SI_V4DI_UQI
:
33973 case V4DF_FTYPE_V4DI_V4DF_V4DF_UQI
:
33974 case V8SF_FTYPE_V8SI_V8SF_V8SF_UQI
:
33975 case V2DF_FTYPE_V2DI_V2DF_V2DF_UQI
:
33976 case V4SF_FTYPE_V4SI_V4SF_V4SF_UQI
:
33977 case V16SF_FTYPE_V16SF_V16SF_V16SF_UHI
:
33978 case V16SF_FTYPE_V16SF_V16SI_V16SF_UHI
:
33979 case V16SF_FTYPE_V16SI_V16SF_V16SF_UHI
:
33980 case V16SI_FTYPE_V16SI_V16SI_V16SI_UHI
:
33981 case V16SI_FTYPE_V16SI_V4SI_V16SI_UHI
:
33982 case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI
:
33983 case V8SI_FTYPE_V8SI_V8SI_V8SI_UQI
:
33984 case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI
:
33985 case V8SF_FTYPE_V8SF_V8SF_V8SF_UQI
:
33986 case V16QI_FTYPE_V16QI_V16QI_V16QI_UHI
:
33987 case V16HI_FTYPE_V16HI_V16HI_V16HI_UHI
:
33988 case V2DI_FTYPE_V2DI_V2DI_V2DI_UQI
:
33989 case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI
:
33990 case V4DI_FTYPE_V4DI_V4DI_V4DI_UQI
:
33991 case V4DF_FTYPE_V4DF_V4DF_V4DF_UQI
:
33992 case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI
:
33993 case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI
:
33994 case V8DF_FTYPE_V8DF_V8DI_V8DF_UQI
:
33995 case V8DF_FTYPE_V8DI_V8DF_V8DF_UQI
:
33996 case V8DI_FTYPE_V16SI_V16SI_V8DI_UQI
:
33997 case V8DI_FTYPE_V8DI_V2DI_V8DI_UQI
:
33998 case V8DI_FTYPE_V8DI_V8DI_V8DI_UQI
:
33999 case V8HI_FTYPE_V16QI_V16QI_V8HI_UQI
:
34000 case V16HI_FTYPE_V32QI_V32QI_V16HI_UHI
:
34001 case V8SI_FTYPE_V16HI_V16HI_V8SI_UQI
:
34002 case V4SI_FTYPE_V8HI_V8HI_V4SI_UQI
:
34005 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT
:
34006 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT
:
34007 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT
:
34008 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT
:
34009 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT
:
34011 nargs_constant
= 1;
34013 case UQI_FTYPE_V4DI_V4DI_INT_UQI
:
34014 case UQI_FTYPE_V8SI_V8SI_INT_UQI
:
34015 case QI_FTYPE_V4DF_V4DF_INT_UQI
:
34016 case QI_FTYPE_V8SF_V8SF_INT_UQI
:
34017 case UQI_FTYPE_V2DI_V2DI_INT_UQI
:
34018 case UQI_FTYPE_V4SI_V4SI_INT_UQI
:
34019 case UQI_FTYPE_V2DF_V2DF_INT_UQI
:
34020 case UQI_FTYPE_V4SF_V4SF_INT_UQI
:
34021 case UDI_FTYPE_V64QI_V64QI_INT_UDI
:
34022 case USI_FTYPE_V32QI_V32QI_INT_USI
:
34023 case UHI_FTYPE_V16QI_V16QI_INT_UHI
:
34024 case USI_FTYPE_V32HI_V32HI_INT_USI
:
34025 case UHI_FTYPE_V16HI_V16HI_INT_UHI
:
34026 case UQI_FTYPE_V8HI_V8HI_INT_UQI
:
34027 case V32HI_FTYPE_V32HI_V32HI_V32HI_INT
:
34028 case V16HI_FTYPE_V16HI_V16HI_V16HI_INT
:
34029 case V8HI_FTYPE_V8HI_V8HI_V8HI_INT
:
34030 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT
:
34031 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT
:
34032 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT
:
34033 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT
:
34034 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT
:
34035 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT
:
34038 nargs_constant
= 1;
34040 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT
:
34042 nargs_constant
= 2;
34044 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED
:
34045 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG
:
34048 case UQI_FTYPE_V8DI_V8DI_INT_UQI
:
34049 case UHI_FTYPE_V16SI_V16SI_INT_UHI
:
34052 nargs_constant
= 1;
34054 case V8SF_FTYPE_V8SF_INT_V8SF_UQI
:
34055 case V4SF_FTYPE_V4SF_INT_V4SF_UQI
:
34056 case V2DF_FTYPE_V4DF_INT_V2DF_UQI
:
34057 case V2DI_FTYPE_V4DI_INT_V2DI_UQI
:
34058 case V8SF_FTYPE_V16SF_INT_V8SF_UQI
:
34059 case V8SI_FTYPE_V16SI_INT_V8SI_UQI
:
34060 case V2DF_FTYPE_V8DF_INT_V2DF_UQI
:
34061 case V2DI_FTYPE_V8DI_INT_V2DI_UQI
:
34062 case V4SF_FTYPE_V8SF_INT_V4SF_UQI
:
34063 case V4SI_FTYPE_V8SI_INT_V4SI_UQI
:
34064 case V8HI_FTYPE_V8SF_INT_V8HI_UQI
:
34065 case V8HI_FTYPE_V4SF_INT_V8HI_UQI
:
34066 case V32HI_FTYPE_V32HI_INT_V32HI_USI
:
34067 case V16HI_FTYPE_V16HI_INT_V16HI_UHI
:
34068 case V8HI_FTYPE_V8HI_INT_V8HI_UQI
:
34069 case V4DI_FTYPE_V4DI_INT_V4DI_UQI
:
34070 case V2DI_FTYPE_V2DI_INT_V2DI_UQI
:
34071 case V8SI_FTYPE_V8SI_INT_V8SI_UQI
:
34072 case V4SI_FTYPE_V4SI_INT_V4SI_UQI
:
34073 case V4DF_FTYPE_V4DF_INT_V4DF_UQI
:
34074 case V2DF_FTYPE_V2DF_INT_V2DF_UQI
:
34075 case V8DF_FTYPE_V8DF_INT_V8DF_UQI
:
34076 case V16SF_FTYPE_V16SF_INT_V16SF_UHI
:
34077 case V16HI_FTYPE_V16SF_INT_V16HI_UHI
:
34078 case V16SI_FTYPE_V16SI_INT_V16SI_UHI
:
34079 case V4SI_FTYPE_V16SI_INT_V4SI_UQI
:
34080 case V4DI_FTYPE_V8DI_INT_V4DI_UQI
:
34081 case V4DF_FTYPE_V8DF_INT_V4DF_UQI
:
34082 case V4SF_FTYPE_V16SF_INT_V4SF_UQI
:
34083 case V8DI_FTYPE_V8DI_INT_V8DI_UQI
:
34086 nargs_constant
= 1;
34088 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI
:
34089 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI
:
34090 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI
:
34091 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI
:
34092 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI
:
34093 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI
:
34094 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI
:
34095 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI
:
34096 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI
:
34097 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI
:
34098 case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI
:
34099 case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI
:
34100 case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI
:
34101 case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI
:
34102 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI
:
34103 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI
:
34104 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_UQI
:
34105 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UQI
:
34106 case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI
:
34107 case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_UHI
:
34108 case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_UQI
:
34109 case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI
:
34110 case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI
:
34111 case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_UQI
:
34112 case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_UQI
:
34113 case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_UQI
:
34114 case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_UQI
:
34117 nargs_constant
= 1;
34119 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI
:
34120 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI
:
34121 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI
:
34122 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI
:
34123 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI
:
34124 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI
:
34125 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI
:
34126 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI
:
34127 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI
:
34128 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI
:
34131 nargs_constant
= 1;
34133 case V64QI_FTYPE_V64QI_V64QI_INT_V64QI_UDI
:
34134 case V32QI_FTYPE_V32QI_V32QI_INT_V32QI_USI
:
34135 case V16QI_FTYPE_V16QI_V16QI_INT_V16QI_UHI
:
34136 case V32HI_FTYPE_V32HI_V32HI_INT_V32HI_INT
:
34137 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_INT
:
34138 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_INT
:
34139 case V16HI_FTYPE_V16HI_V16HI_INT_V16HI_INT
:
34140 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_INT
:
34141 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_INT
:
34142 case V8HI_FTYPE_V8HI_V8HI_INT_V8HI_INT
:
34143 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_INT
:
34144 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_INT
:
34147 nargs_constant
= 2;
34151 gcc_unreachable ();
34154 gcc_assert (nargs
<= ARRAY_SIZE (args
));
34156 if (comparison
!= UNKNOWN
)
34158 gcc_assert (nargs
== 2);
34159 return ix86_expand_sse_compare (d
, exp
, target
, swap
);
34162 if (rmode
== VOIDmode
|| rmode
== tmode
)
34166 || GET_MODE (target
) != tmode
34167 || !insn_p
->operand
[0].predicate (target
, tmode
))
34168 target
= gen_reg_rtx (tmode
);
34169 else if (memory_operand (target
, tmode
))
34171 real_target
= target
;
34175 real_target
= gen_reg_rtx (tmode
);
34176 target
= lowpart_subreg (rmode
, real_target
, tmode
);
34179 for (i
= 0; i
< nargs
; i
++)
34181 tree arg
= CALL_EXPR_ARG (exp
, i
);
34182 rtx op
= expand_normal (arg
);
34183 machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
34184 bool match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
34186 if (second_arg_count
&& i
== 1)
34188 /* SIMD shift insns take either an 8-bit immediate or
34189 register as count. But builtin functions take int as
34190 count. If count doesn't match, we put it in register.
34191 The instructions are using 64-bit count, if op is just
34192 32-bit, zero-extend it, as negative shift counts
34193 are undefined behavior and zero-extension is more
34197 if (SCALAR_INT_MODE_P (GET_MODE (op
)))
34198 op
= convert_modes (mode
, GET_MODE (op
), op
, 1);
34200 op
= lowpart_subreg (mode
, op
, GET_MODE (op
));
34201 if (!insn_p
->operand
[i
+ 1].predicate (op
, mode
))
34202 op
= copy_to_reg (op
);
34205 else if ((mask_pos
&& (nargs
- i
- mask_pos
) == nargs_constant
) ||
34206 (!mask_pos
&& (nargs
- i
) <= nargs_constant
))
34211 case CODE_FOR_avx_vinsertf128v4di
:
34212 case CODE_FOR_avx_vextractf128v4di
:
34213 error ("the last argument must be an 1-bit immediate");
34216 case CODE_FOR_avx512f_cmpv8di3_mask
:
34217 case CODE_FOR_avx512f_cmpv16si3_mask
:
34218 case CODE_FOR_avx512f_ucmpv8di3_mask
:
34219 case CODE_FOR_avx512f_ucmpv16si3_mask
:
34220 case CODE_FOR_avx512vl_cmpv4di3_mask
:
34221 case CODE_FOR_avx512vl_cmpv8si3_mask
:
34222 case CODE_FOR_avx512vl_ucmpv4di3_mask
:
34223 case CODE_FOR_avx512vl_ucmpv8si3_mask
:
34224 case CODE_FOR_avx512vl_cmpv2di3_mask
:
34225 case CODE_FOR_avx512vl_cmpv4si3_mask
:
34226 case CODE_FOR_avx512vl_ucmpv2di3_mask
:
34227 case CODE_FOR_avx512vl_ucmpv4si3_mask
:
34228 error ("the last argument must be a 3-bit immediate");
34231 case CODE_FOR_sse4_1_roundsd
:
34232 case CODE_FOR_sse4_1_roundss
:
34234 case CODE_FOR_sse4_1_roundpd
:
34235 case CODE_FOR_sse4_1_roundps
:
34236 case CODE_FOR_avx_roundpd256
:
34237 case CODE_FOR_avx_roundps256
:
34239 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix
:
34240 case CODE_FOR_sse4_1_roundps_sfix
:
34241 case CODE_FOR_avx_roundpd_vec_pack_sfix256
:
34242 case CODE_FOR_avx_roundps_sfix256
:
34244 case CODE_FOR_sse4_1_blendps
:
34245 case CODE_FOR_avx_blendpd256
:
34246 case CODE_FOR_avx_vpermilv4df
:
34247 case CODE_FOR_avx_vpermilv4df_mask
:
34248 case CODE_FOR_avx512f_getmantv8df_mask
:
34249 case CODE_FOR_avx512f_getmantv16sf_mask
:
34250 case CODE_FOR_avx512vl_getmantv8sf_mask
:
34251 case CODE_FOR_avx512vl_getmantv4df_mask
:
34252 case CODE_FOR_avx512vl_getmantv4sf_mask
:
34253 case CODE_FOR_avx512vl_getmantv2df_mask
:
34254 case CODE_FOR_avx512dq_rangepv8df_mask_round
:
34255 case CODE_FOR_avx512dq_rangepv16sf_mask_round
:
34256 case CODE_FOR_avx512dq_rangepv4df_mask
:
34257 case CODE_FOR_avx512dq_rangepv8sf_mask
:
34258 case CODE_FOR_avx512dq_rangepv2df_mask
:
34259 case CODE_FOR_avx512dq_rangepv4sf_mask
:
34260 case CODE_FOR_avx_shufpd256_mask
:
34261 error ("the last argument must be a 4-bit immediate");
34264 case CODE_FOR_sha1rnds4
:
34265 case CODE_FOR_sse4_1_blendpd
:
34266 case CODE_FOR_avx_vpermilv2df
:
34267 case CODE_FOR_avx_vpermilv2df_mask
:
34268 case CODE_FOR_xop_vpermil2v2df3
:
34269 case CODE_FOR_xop_vpermil2v4sf3
:
34270 case CODE_FOR_xop_vpermil2v4df3
:
34271 case CODE_FOR_xop_vpermil2v8sf3
:
34272 case CODE_FOR_avx512f_vinsertf32x4_mask
:
34273 case CODE_FOR_avx512f_vinserti32x4_mask
:
34274 case CODE_FOR_avx512f_vextractf32x4_mask
:
34275 case CODE_FOR_avx512f_vextracti32x4_mask
:
34276 case CODE_FOR_sse2_shufpd
:
34277 case CODE_FOR_sse2_shufpd_mask
:
34278 case CODE_FOR_avx512dq_shuf_f64x2_mask
:
34279 case CODE_FOR_avx512dq_shuf_i64x2_mask
:
34280 case CODE_FOR_avx512vl_shuf_i32x4_mask
:
34281 case CODE_FOR_avx512vl_shuf_f32x4_mask
:
34282 error ("the last argument must be a 2-bit immediate");
34285 case CODE_FOR_avx_vextractf128v4df
:
34286 case CODE_FOR_avx_vextractf128v8sf
:
34287 case CODE_FOR_avx_vextractf128v8si
:
34288 case CODE_FOR_avx_vinsertf128v4df
:
34289 case CODE_FOR_avx_vinsertf128v8sf
:
34290 case CODE_FOR_avx_vinsertf128v8si
:
34291 case CODE_FOR_avx512f_vinsertf64x4_mask
:
34292 case CODE_FOR_avx512f_vinserti64x4_mask
:
34293 case CODE_FOR_avx512f_vextractf64x4_mask
:
34294 case CODE_FOR_avx512f_vextracti64x4_mask
:
34295 case CODE_FOR_avx512dq_vinsertf32x8_mask
:
34296 case CODE_FOR_avx512dq_vinserti32x8_mask
:
34297 case CODE_FOR_avx512vl_vinsertv4df
:
34298 case CODE_FOR_avx512vl_vinsertv4di
:
34299 case CODE_FOR_avx512vl_vinsertv8sf
:
34300 case CODE_FOR_avx512vl_vinsertv8si
:
34301 error ("the last argument must be a 1-bit immediate");
34304 case CODE_FOR_avx_vmcmpv2df3
:
34305 case CODE_FOR_avx_vmcmpv4sf3
:
34306 case CODE_FOR_avx_cmpv2df3
:
34307 case CODE_FOR_avx_cmpv4sf3
:
34308 case CODE_FOR_avx_cmpv4df3
:
34309 case CODE_FOR_avx_cmpv8sf3
:
34310 case CODE_FOR_avx512f_cmpv8df3_mask
:
34311 case CODE_FOR_avx512f_cmpv16sf3_mask
:
34312 case CODE_FOR_avx512f_vmcmpv2df3_mask
:
34313 case CODE_FOR_avx512f_vmcmpv4sf3_mask
:
34314 error ("the last argument must be a 5-bit immediate");
34318 switch (nargs_constant
)
34321 if ((mask_pos
&& (nargs
- i
- mask_pos
) == nargs_constant
) ||
34322 (!mask_pos
&& (nargs
- i
) == nargs_constant
))
34324 error ("the next to last argument must be an 8-bit immediate");
34329 error ("the last argument must be an 8-bit immediate");
34332 gcc_unreachable ();
34339 if (VECTOR_MODE_P (mode
))
34340 op
= safe_vector_operand (op
, mode
);
34342 /* If we aren't optimizing, only allow one memory operand to
34344 if (memory_operand (op
, mode
))
34347 op
= fixup_modeless_constant (op
, mode
);
34349 if (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
)
34351 if (optimize
|| !match
|| num_memory
> 1)
34352 op
= copy_to_mode_reg (mode
, op
);
34356 op
= copy_to_reg (op
);
34357 op
= lowpart_subreg (mode
, op
, GET_MODE (op
));
34362 args
[i
].mode
= mode
;
34368 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
);
34371 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
);
34374 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
34378 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
34379 args
[2].op
, args
[3].op
);
34382 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
34383 args
[2].op
, args
[3].op
, args
[4].op
);
34386 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
34387 args
[2].op
, args
[3].op
, args
[4].op
,
34391 gcc_unreachable ();
34401 /* Transform pattern of following layout:
34403 (unspec [B C] UNSPEC_EMBEDDED_ROUNDING))
34409 ix86_erase_embedded_rounding (rtx pat
)
34411 if (GET_CODE (pat
) == INSN
)
34412 pat
= PATTERN (pat
);
34414 gcc_assert (GET_CODE (pat
) == SET
);
34415 rtx src
= SET_SRC (pat
);
34416 gcc_assert (XVECLEN (src
, 0) == 2);
34417 rtx p0
= XVECEXP (src
, 0, 0);
34418 gcc_assert (GET_CODE (src
) == UNSPEC
34419 && XINT (src
, 1) == UNSPEC_EMBEDDED_ROUNDING
);
34420 rtx res
= gen_rtx_SET (SET_DEST (pat
), p0
);
34424 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
34427 ix86_expand_sse_comi_round (const struct builtin_description
*d
,
34428 tree exp
, rtx target
)
34431 tree arg0
= CALL_EXPR_ARG (exp
, 0);
34432 tree arg1
= CALL_EXPR_ARG (exp
, 1);
34433 tree arg2
= CALL_EXPR_ARG (exp
, 2);
34434 tree arg3
= CALL_EXPR_ARG (exp
, 3);
34435 rtx op0
= expand_normal (arg0
);
34436 rtx op1
= expand_normal (arg1
);
34437 rtx op2
= expand_normal (arg2
);
34438 rtx op3
= expand_normal (arg3
);
34439 enum insn_code icode
= d
->icode
;
34440 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
34441 machine_mode mode0
= insn_p
->operand
[0].mode
;
34442 machine_mode mode1
= insn_p
->operand
[1].mode
;
34443 enum rtx_code comparison
= UNEQ
;
34444 bool need_ucomi
= false;
34446 /* See avxintrin.h for values. */
34447 enum rtx_code comi_comparisons
[32] =
34449 UNEQ
, GT
, GE
, UNORDERED
, LTGT
, UNLE
, UNLT
, ORDERED
, UNEQ
, UNLT
,
34450 UNLE
, LT
, LTGT
, GE
, GT
, LT
, UNEQ
, GT
, GE
, UNORDERED
, LTGT
, UNLE
,
34451 UNLT
, ORDERED
, UNEQ
, UNLT
, UNLE
, LT
, LTGT
, GE
, GT
, LT
34453 bool need_ucomi_values
[32] =
34455 true, false, false, true, true, false, false, true,
34456 true, false, false, true, true, false, false, true,
34457 false, true, true, false, false, true, true, false,
34458 false, true, true, false, false, true, true, false
34461 if (!CONST_INT_P (op2
))
34463 error ("the third argument must be comparison constant");
34466 if (INTVAL (op2
) < 0 || INTVAL (op2
) >= 32)
34468 error ("incorrect comparison mode");
34472 if (!insn_p
->operand
[2].predicate (op3
, SImode
))
34474 error ("incorrect rounding operand");
34478 comparison
= comi_comparisons
[INTVAL (op2
)];
34479 need_ucomi
= need_ucomi_values
[INTVAL (op2
)];
34481 if (VECTOR_MODE_P (mode0
))
34482 op0
= safe_vector_operand (op0
, mode0
);
34483 if (VECTOR_MODE_P (mode1
))
34484 op1
= safe_vector_operand (op1
, mode1
);
34486 target
= gen_reg_rtx (SImode
);
34487 emit_move_insn (target
, const0_rtx
);
34488 target
= gen_rtx_SUBREG (QImode
, target
, 0);
34490 if ((optimize
&& !register_operand (op0
, mode0
))
34491 || !insn_p
->operand
[0].predicate (op0
, mode0
))
34492 op0
= copy_to_mode_reg (mode0
, op0
);
34493 if ((optimize
&& !register_operand (op1
, mode1
))
34494 || !insn_p
->operand
[1].predicate (op1
, mode1
))
34495 op1
= copy_to_mode_reg (mode1
, op1
);
34498 icode
= icode
== CODE_FOR_sse_comi_round
34499 ? CODE_FOR_sse_ucomi_round
34500 : CODE_FOR_sse2_ucomi_round
;
34502 pat
= GEN_FCN (icode
) (op0
, op1
, op3
);
34506 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
34507 if (INTVAL (op3
) == NO_ROUND
)
34509 pat
= ix86_erase_embedded_rounding (pat
);
34513 set_dst
= SET_DEST (pat
);
34517 gcc_assert (GET_CODE (pat
) == SET
);
34518 set_dst
= SET_DEST (pat
);
34522 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
34523 gen_rtx_fmt_ee (comparison
, QImode
,
34527 return SUBREG_REG (target
);
34531 ix86_expand_round_builtin (const struct builtin_description
*d
,
34532 tree exp
, rtx target
)
34535 unsigned int i
, nargs
;
34541 enum insn_code icode
= d
->icode
;
34542 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
34543 machine_mode tmode
= insn_p
->operand
[0].mode
;
34544 unsigned int nargs_constant
= 0;
34545 unsigned int redundant_embed_rnd
= 0;
34547 switch ((enum ix86_builtin_func_type
) d
->flag
)
34549 case UINT64_FTYPE_V2DF_INT
:
34550 case UINT64_FTYPE_V4SF_INT
:
34551 case UINT_FTYPE_V2DF_INT
:
34552 case UINT_FTYPE_V4SF_INT
:
34553 case INT64_FTYPE_V2DF_INT
:
34554 case INT64_FTYPE_V4SF_INT
:
34555 case INT_FTYPE_V2DF_INT
:
34556 case INT_FTYPE_V4SF_INT
:
34559 case V4SF_FTYPE_V4SF_UINT_INT
:
34560 case V4SF_FTYPE_V4SF_UINT64_INT
:
34561 case V2DF_FTYPE_V2DF_UINT64_INT
:
34562 case V4SF_FTYPE_V4SF_INT_INT
:
34563 case V4SF_FTYPE_V4SF_INT64_INT
:
34564 case V2DF_FTYPE_V2DF_INT64_INT
:
34565 case V4SF_FTYPE_V4SF_V4SF_INT
:
34566 case V2DF_FTYPE_V2DF_V2DF_INT
:
34567 case V4SF_FTYPE_V4SF_V2DF_INT
:
34568 case V2DF_FTYPE_V2DF_V4SF_INT
:
34571 case V8SF_FTYPE_V8DF_V8SF_QI_INT
:
34572 case V8DF_FTYPE_V8DF_V8DF_QI_INT
:
34573 case V8SI_FTYPE_V8DF_V8SI_QI_INT
:
34574 case V8DI_FTYPE_V8DF_V8DI_QI_INT
:
34575 case V8SF_FTYPE_V8DI_V8SF_QI_INT
:
34576 case V8DF_FTYPE_V8DI_V8DF_QI_INT
:
34577 case V16SF_FTYPE_V16SF_V16SF_HI_INT
:
34578 case V8DI_FTYPE_V8SF_V8DI_QI_INT
:
34579 case V16SF_FTYPE_V16SI_V16SF_HI_INT
:
34580 case V16SI_FTYPE_V16SF_V16SI_HI_INT
:
34581 case V8DF_FTYPE_V8SF_V8DF_QI_INT
:
34582 case V16SF_FTYPE_V16HI_V16SF_HI_INT
:
34583 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT
:
34584 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT
:
34587 case V4SF_FTYPE_V4SF_V4SF_INT_INT
:
34588 case V2DF_FTYPE_V2DF_V2DF_INT_INT
:
34589 nargs_constant
= 2;
34592 case INT_FTYPE_V4SF_V4SF_INT_INT
:
34593 case INT_FTYPE_V2DF_V2DF_INT_INT
:
34594 return ix86_expand_sse_comi_round (d
, exp
, target
);
34595 case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT
:
34596 case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT
:
34597 case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT
:
34598 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT
:
34599 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT
:
34600 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT
:
34601 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT
:
34602 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT
:
34605 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT
:
34606 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT
:
34607 nargs_constant
= 4;
34610 case UQI_FTYPE_V8DF_V8DF_INT_UQI_INT
:
34611 case UQI_FTYPE_V2DF_V2DF_INT_UQI_INT
:
34612 case UHI_FTYPE_V16SF_V16SF_INT_UHI_INT
:
34613 case UQI_FTYPE_V4SF_V4SF_INT_UQI_INT
:
34614 nargs_constant
= 3;
34617 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT
:
34618 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT
:
34619 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT
:
34620 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT
:
34621 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT
:
34622 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT
:
34624 nargs_constant
= 4;
34626 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT
:
34627 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT
:
34628 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT
:
34629 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT
:
34631 nargs_constant
= 3;
34634 gcc_unreachable ();
34636 gcc_assert (nargs
<= ARRAY_SIZE (args
));
34640 || GET_MODE (target
) != tmode
34641 || !insn_p
->operand
[0].predicate (target
, tmode
))
34642 target
= gen_reg_rtx (tmode
);
34644 for (i
= 0; i
< nargs
; i
++)
34646 tree arg
= CALL_EXPR_ARG (exp
, i
);
34647 rtx op
= expand_normal (arg
);
34648 machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
34649 bool match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
34651 if (i
== nargs
- nargs_constant
)
34657 case CODE_FOR_avx512f_getmantv8df_mask_round
:
34658 case CODE_FOR_avx512f_getmantv16sf_mask_round
:
34659 case CODE_FOR_avx512f_vgetmantv2df_round
:
34660 case CODE_FOR_avx512f_vgetmantv2df_mask_round
:
34661 case CODE_FOR_avx512f_vgetmantv4sf_round
:
34662 case CODE_FOR_avx512f_vgetmantv4sf_mask_round
:
34663 error ("the immediate argument must be a 4-bit immediate");
34665 case CODE_FOR_avx512f_cmpv8df3_mask_round
:
34666 case CODE_FOR_avx512f_cmpv16sf3_mask_round
:
34667 case CODE_FOR_avx512f_vmcmpv2df3_mask_round
:
34668 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round
:
34669 error ("the immediate argument must be a 5-bit immediate");
34672 error ("the immediate argument must be an 8-bit immediate");
34677 else if (i
== nargs
-1)
34679 if (!insn_p
->operand
[nargs
].predicate (op
, SImode
))
34681 error ("incorrect rounding operand");
34685 /* If there is no rounding use normal version of the pattern. */
34686 if (INTVAL (op
) == NO_ROUND
)
34687 redundant_embed_rnd
= 1;
34691 if (VECTOR_MODE_P (mode
))
34692 op
= safe_vector_operand (op
, mode
);
34694 op
= fixup_modeless_constant (op
, mode
);
34696 if (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
)
34698 if (optimize
|| !match
)
34699 op
= copy_to_mode_reg (mode
, op
);
34703 op
= copy_to_reg (op
);
34704 op
= lowpart_subreg (mode
, op
, GET_MODE (op
));
34709 args
[i
].mode
= mode
;
34715 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
34718 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
34721 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
34725 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
34726 args
[2].op
, args
[3].op
);
34729 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
34730 args
[2].op
, args
[3].op
, args
[4].op
);
34733 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
34734 args
[2].op
, args
[3].op
, args
[4].op
,
34738 gcc_unreachable ();
34744 if (redundant_embed_rnd
)
34745 pat
= ix86_erase_embedded_rounding (pat
);
34751 /* Subroutine of ix86_expand_builtin to take care of special insns
34752 with variable number of operands. */
34755 ix86_expand_special_args_builtin (const struct builtin_description
*d
,
34756 tree exp
, rtx target
)
34760 unsigned int i
, nargs
, arg_adjust
, memory
;
34761 bool aligned_mem
= false;
34767 enum insn_code icode
= d
->icode
;
34768 bool last_arg_constant
= false;
34769 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
34770 machine_mode tmode
= insn_p
->operand
[0].mode
;
34771 enum { load
, store
} klass
;
34773 switch ((enum ix86_builtin_func_type
) d
->flag
)
34775 case VOID_FTYPE_VOID
:
34776 emit_insn (GEN_FCN (icode
) (target
));
34778 case VOID_FTYPE_UINT64
:
34779 case VOID_FTYPE_UNSIGNED
:
34785 case INT_FTYPE_VOID
:
34786 case USHORT_FTYPE_VOID
:
34787 case UINT64_FTYPE_VOID
:
34788 case UNSIGNED_FTYPE_VOID
:
34793 case UINT64_FTYPE_PUNSIGNED
:
34794 case V2DI_FTYPE_PV2DI
:
34795 case V4DI_FTYPE_PV4DI
:
34796 case V32QI_FTYPE_PCCHAR
:
34797 case V16QI_FTYPE_PCCHAR
:
34798 case V8SF_FTYPE_PCV4SF
:
34799 case V8SF_FTYPE_PCFLOAT
:
34800 case V4SF_FTYPE_PCFLOAT
:
34801 case V4DF_FTYPE_PCV2DF
:
34802 case V4DF_FTYPE_PCDOUBLE
:
34803 case V2DF_FTYPE_PCDOUBLE
:
34804 case VOID_FTYPE_PVOID
:
34805 case V8DI_FTYPE_PV8DI
:
34811 case CODE_FOR_sse4_1_movntdqa
:
34812 case CODE_FOR_avx2_movntdqa
:
34813 case CODE_FOR_avx512f_movntdqa
:
34814 aligned_mem
= true;
34820 case VOID_FTYPE_PV2SF_V4SF
:
34821 case VOID_FTYPE_PV8DI_V8DI
:
34822 case VOID_FTYPE_PV4DI_V4DI
:
34823 case VOID_FTYPE_PV2DI_V2DI
:
34824 case VOID_FTYPE_PCHAR_V32QI
:
34825 case VOID_FTYPE_PCHAR_V16QI
:
34826 case VOID_FTYPE_PFLOAT_V16SF
:
34827 case VOID_FTYPE_PFLOAT_V8SF
:
34828 case VOID_FTYPE_PFLOAT_V4SF
:
34829 case VOID_FTYPE_PDOUBLE_V8DF
:
34830 case VOID_FTYPE_PDOUBLE_V4DF
:
34831 case VOID_FTYPE_PDOUBLE_V2DF
:
34832 case VOID_FTYPE_PLONGLONG_LONGLONG
:
34833 case VOID_FTYPE_PULONGLONG_ULONGLONG
:
34834 case VOID_FTYPE_PINT_INT
:
34837 /* Reserve memory operand for target. */
34838 memory
= ARRAY_SIZE (args
);
34841 /* These builtins and instructions require the memory
34842 to be properly aligned. */
34843 case CODE_FOR_avx_movntv4di
:
34844 case CODE_FOR_sse2_movntv2di
:
34845 case CODE_FOR_avx_movntv8sf
:
34846 case CODE_FOR_sse_movntv4sf
:
34847 case CODE_FOR_sse4a_vmmovntv4sf
:
34848 case CODE_FOR_avx_movntv4df
:
34849 case CODE_FOR_sse2_movntv2df
:
34850 case CODE_FOR_sse4a_vmmovntv2df
:
34851 case CODE_FOR_sse2_movntidi
:
34852 case CODE_FOR_sse_movntq
:
34853 case CODE_FOR_sse2_movntisi
:
34854 case CODE_FOR_avx512f_movntv16sf
:
34855 case CODE_FOR_avx512f_movntv8df
:
34856 case CODE_FOR_avx512f_movntv8di
:
34857 aligned_mem
= true;
34863 case V4SF_FTYPE_V4SF_PCV2SF
:
34864 case V2DF_FTYPE_V2DF_PCDOUBLE
:
34869 case V8SF_FTYPE_PCV8SF_V8SI
:
34870 case V4DF_FTYPE_PCV4DF_V4DI
:
34871 case V4SF_FTYPE_PCV4SF_V4SI
:
34872 case V2DF_FTYPE_PCV2DF_V2DI
:
34873 case V8SI_FTYPE_PCV8SI_V8SI
:
34874 case V4DI_FTYPE_PCV4DI_V4DI
:
34875 case V4SI_FTYPE_PCV4SI_V4SI
:
34876 case V2DI_FTYPE_PCV2DI_V2DI
:
34877 case VOID_FTYPE_INT_INT64
:
34882 case VOID_FTYPE_PV8DF_V8DF_UQI
:
34883 case VOID_FTYPE_PV4DF_V4DF_UQI
:
34884 case VOID_FTYPE_PV2DF_V2DF_UQI
:
34885 case VOID_FTYPE_PV16SF_V16SF_UHI
:
34886 case VOID_FTYPE_PV8SF_V8SF_UQI
:
34887 case VOID_FTYPE_PV4SF_V4SF_UQI
:
34888 case VOID_FTYPE_PV8DI_V8DI_UQI
:
34889 case VOID_FTYPE_PV4DI_V4DI_UQI
:
34890 case VOID_FTYPE_PV2DI_V2DI_UQI
:
34891 case VOID_FTYPE_PV16SI_V16SI_UHI
:
34892 case VOID_FTYPE_PV8SI_V8SI_UQI
:
34893 case VOID_FTYPE_PV4SI_V4SI_UQI
:
34894 case VOID_FTYPE_PV64QI_V64QI_UDI
:
34895 case VOID_FTYPE_PV32HI_V32HI_USI
:
34896 case VOID_FTYPE_PV32QI_V32QI_USI
:
34897 case VOID_FTYPE_PV16QI_V16QI_UHI
:
34898 case VOID_FTYPE_PV16HI_V16HI_UHI
:
34899 case VOID_FTYPE_PV8HI_V8HI_UQI
:
34902 /* These builtins and instructions require the memory
34903 to be properly aligned. */
34904 case CODE_FOR_avx512f_storev16sf_mask
:
34905 case CODE_FOR_avx512f_storev16si_mask
:
34906 case CODE_FOR_avx512f_storev8df_mask
:
34907 case CODE_FOR_avx512f_storev8di_mask
:
34908 case CODE_FOR_avx512vl_storev8sf_mask
:
34909 case CODE_FOR_avx512vl_storev8si_mask
:
34910 case CODE_FOR_avx512vl_storev4df_mask
:
34911 case CODE_FOR_avx512vl_storev4di_mask
:
34912 case CODE_FOR_avx512vl_storev4sf_mask
:
34913 case CODE_FOR_avx512vl_storev4si_mask
:
34914 case CODE_FOR_avx512vl_storev2df_mask
:
34915 case CODE_FOR_avx512vl_storev2di_mask
:
34916 aligned_mem
= true;
34922 case VOID_FTYPE_PV8SF_V8SI_V8SF
:
34923 case VOID_FTYPE_PV4DF_V4DI_V4DF
:
34924 case VOID_FTYPE_PV4SF_V4SI_V4SF
:
34925 case VOID_FTYPE_PV2DF_V2DI_V2DF
:
34926 case VOID_FTYPE_PV8SI_V8SI_V8SI
:
34927 case VOID_FTYPE_PV4DI_V4DI_V4DI
:
34928 case VOID_FTYPE_PV4SI_V4SI_V4SI
:
34929 case VOID_FTYPE_PV2DI_V2DI_V2DI
:
34930 case VOID_FTYPE_PV8SI_V8DI_UQI
:
34931 case VOID_FTYPE_PV8HI_V8DI_UQI
:
34932 case VOID_FTYPE_PV16HI_V16SI_UHI
:
34933 case VOID_FTYPE_PV16QI_V8DI_UQI
:
34934 case VOID_FTYPE_PV16QI_V16SI_UHI
:
34935 case VOID_FTYPE_PV4SI_V4DI_UQI
:
34936 case VOID_FTYPE_PV4SI_V2DI_UQI
:
34937 case VOID_FTYPE_PV8HI_V4DI_UQI
:
34938 case VOID_FTYPE_PV8HI_V2DI_UQI
:
34939 case VOID_FTYPE_PV8HI_V8SI_UQI
:
34940 case VOID_FTYPE_PV8HI_V4SI_UQI
:
34941 case VOID_FTYPE_PV16QI_V4DI_UQI
:
34942 case VOID_FTYPE_PV16QI_V2DI_UQI
:
34943 case VOID_FTYPE_PV16QI_V8SI_UQI
:
34944 case VOID_FTYPE_PV16QI_V4SI_UQI
:
34945 case VOID_FTYPE_PCHAR_V64QI_UDI
:
34946 case VOID_FTYPE_PCHAR_V32QI_USI
:
34947 case VOID_FTYPE_PCHAR_V16QI_UHI
:
34948 case VOID_FTYPE_PSHORT_V32HI_USI
:
34949 case VOID_FTYPE_PSHORT_V16HI_UHI
:
34950 case VOID_FTYPE_PSHORT_V8HI_UQI
:
34951 case VOID_FTYPE_PINT_V16SI_UHI
:
34952 case VOID_FTYPE_PINT_V8SI_UQI
:
34953 case VOID_FTYPE_PINT_V4SI_UQI
:
34954 case VOID_FTYPE_PINT64_V8DI_UQI
:
34955 case VOID_FTYPE_PINT64_V4DI_UQI
:
34956 case VOID_FTYPE_PINT64_V2DI_UQI
:
34957 case VOID_FTYPE_PDOUBLE_V8DF_UQI
:
34958 case VOID_FTYPE_PDOUBLE_V4DF_UQI
:
34959 case VOID_FTYPE_PDOUBLE_V2DF_UQI
:
34960 case VOID_FTYPE_PFLOAT_V16SF_UHI
:
34961 case VOID_FTYPE_PFLOAT_V8SF_UQI
:
34962 case VOID_FTYPE_PFLOAT_V4SF_UQI
:
34963 case VOID_FTYPE_PV32QI_V32HI_USI
:
34964 case VOID_FTYPE_PV16QI_V16HI_UHI
:
34965 case VOID_FTYPE_PV8QI_V8HI_UQI
:
34968 /* Reserve memory operand for target. */
34969 memory
= ARRAY_SIZE (args
);
34971 case V4SF_FTYPE_PCV4SF_V4SF_UQI
:
34972 case V8SF_FTYPE_PCV8SF_V8SF_UQI
:
34973 case V16SF_FTYPE_PCV16SF_V16SF_UHI
:
34974 case V4SI_FTYPE_PCV4SI_V4SI_UQI
:
34975 case V8SI_FTYPE_PCV8SI_V8SI_UQI
:
34976 case V16SI_FTYPE_PCV16SI_V16SI_UHI
:
34977 case V2DF_FTYPE_PCV2DF_V2DF_UQI
:
34978 case V4DF_FTYPE_PCV4DF_V4DF_UQI
:
34979 case V8DF_FTYPE_PCV8DF_V8DF_UQI
:
34980 case V2DI_FTYPE_PCV2DI_V2DI_UQI
:
34981 case V4DI_FTYPE_PCV4DI_V4DI_UQI
:
34982 case V8DI_FTYPE_PCV8DI_V8DI_UQI
:
34983 case V64QI_FTYPE_PCV64QI_V64QI_UDI
:
34984 case V32HI_FTYPE_PCV32HI_V32HI_USI
:
34985 case V32QI_FTYPE_PCV32QI_V32QI_USI
:
34986 case V16QI_FTYPE_PCV16QI_V16QI_UHI
:
34987 case V16HI_FTYPE_PCV16HI_V16HI_UHI
:
34988 case V8HI_FTYPE_PCV8HI_V8HI_UQI
:
34991 /* These builtins and instructions require the memory
34992 to be properly aligned. */
34993 case CODE_FOR_avx512f_loadv16sf_mask
:
34994 case CODE_FOR_avx512f_loadv16si_mask
:
34995 case CODE_FOR_avx512f_loadv8df_mask
:
34996 case CODE_FOR_avx512f_loadv8di_mask
:
34997 case CODE_FOR_avx512vl_loadv8sf_mask
:
34998 case CODE_FOR_avx512vl_loadv8si_mask
:
34999 case CODE_FOR_avx512vl_loadv4df_mask
:
35000 case CODE_FOR_avx512vl_loadv4di_mask
:
35001 case CODE_FOR_avx512vl_loadv4sf_mask
:
35002 case CODE_FOR_avx512vl_loadv4si_mask
:
35003 case CODE_FOR_avx512vl_loadv2df_mask
:
35004 case CODE_FOR_avx512vl_loadv2di_mask
:
35005 case CODE_FOR_avx512bw_loadv64qi_mask
:
35006 case CODE_FOR_avx512vl_loadv32qi_mask
:
35007 case CODE_FOR_avx512vl_loadv16qi_mask
:
35008 case CODE_FOR_avx512bw_loadv32hi_mask
:
35009 case CODE_FOR_avx512vl_loadv16hi_mask
:
35010 case CODE_FOR_avx512vl_loadv8hi_mask
:
35011 aligned_mem
= true;
35017 case V64QI_FTYPE_PCCHAR_V64QI_UDI
:
35018 case V32QI_FTYPE_PCCHAR_V32QI_USI
:
35019 case V16QI_FTYPE_PCCHAR_V16QI_UHI
:
35020 case V32HI_FTYPE_PCSHORT_V32HI_USI
:
35021 case V16HI_FTYPE_PCSHORT_V16HI_UHI
:
35022 case V8HI_FTYPE_PCSHORT_V8HI_UQI
:
35023 case V16SI_FTYPE_PCINT_V16SI_UHI
:
35024 case V8SI_FTYPE_PCINT_V8SI_UQI
:
35025 case V4SI_FTYPE_PCINT_V4SI_UQI
:
35026 case V8DI_FTYPE_PCINT64_V8DI_UQI
:
35027 case V4DI_FTYPE_PCINT64_V4DI_UQI
:
35028 case V2DI_FTYPE_PCINT64_V2DI_UQI
:
35029 case V8DF_FTYPE_PCDOUBLE_V8DF_UQI
:
35030 case V4DF_FTYPE_PCDOUBLE_V4DF_UQI
:
35031 case V2DF_FTYPE_PCDOUBLE_V2DF_UQI
:
35032 case V16SF_FTYPE_PCFLOAT_V16SF_UHI
:
35033 case V8SF_FTYPE_PCFLOAT_V8SF_UQI
:
35034 case V4SF_FTYPE_PCFLOAT_V4SF_UQI
:
35039 case VOID_FTYPE_UINT_UINT_UINT
:
35040 case VOID_FTYPE_UINT64_UINT_UINT
:
35041 case UCHAR_FTYPE_UINT_UINT_UINT
:
35042 case UCHAR_FTYPE_UINT64_UINT_UINT
:
35045 memory
= ARRAY_SIZE (args
);
35046 last_arg_constant
= true;
35049 gcc_unreachable ();
35052 gcc_assert (nargs
<= ARRAY_SIZE (args
));
35054 if (klass
== store
)
35056 arg
= CALL_EXPR_ARG (exp
, 0);
35057 op
= expand_normal (arg
);
35058 gcc_assert (target
== 0);
35061 op
= ix86_zero_extend_to_Pmode (op
);
35062 target
= gen_rtx_MEM (tmode
, op
);
35063 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
35064 on it. Try to improve it using get_pointer_alignment,
35065 and if the special builtin is one that requires strict
35066 mode alignment, also from it's GET_MODE_ALIGNMENT.
35067 Failure to do so could lead to ix86_legitimate_combined_insn
35068 rejecting all changes to such insns. */
35069 unsigned int align
= get_pointer_alignment (arg
);
35070 if (aligned_mem
&& align
< GET_MODE_ALIGNMENT (tmode
))
35071 align
= GET_MODE_ALIGNMENT (tmode
);
35072 if (MEM_ALIGN (target
) < align
)
35073 set_mem_align (target
, align
);
35076 target
= force_reg (tmode
, op
);
35084 || !register_operand (target
, tmode
)
35085 || GET_MODE (target
) != tmode
)
35086 target
= gen_reg_rtx (tmode
);
35089 for (i
= 0; i
< nargs
; i
++)
35091 machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
35094 arg
= CALL_EXPR_ARG (exp
, i
+ arg_adjust
);
35095 op
= expand_normal (arg
);
35096 match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
35098 if (last_arg_constant
&& (i
+ 1) == nargs
)
35102 if (icode
== CODE_FOR_lwp_lwpvalsi3
35103 || icode
== CODE_FOR_lwp_lwpinssi3
35104 || icode
== CODE_FOR_lwp_lwpvaldi3
35105 || icode
== CODE_FOR_lwp_lwpinsdi3
)
35106 error ("the last argument must be a 32-bit immediate");
35108 error ("the last argument must be an 8-bit immediate");
35116 /* This must be the memory operand. */
35117 op
= ix86_zero_extend_to_Pmode (op
);
35118 op
= gen_rtx_MEM (mode
, op
);
35119 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
35120 on it. Try to improve it using get_pointer_alignment,
35121 and if the special builtin is one that requires strict
35122 mode alignment, also from it's GET_MODE_ALIGNMENT.
35123 Failure to do so could lead to ix86_legitimate_combined_insn
35124 rejecting all changes to such insns. */
35125 unsigned int align
= get_pointer_alignment (arg
);
35126 if (aligned_mem
&& align
< GET_MODE_ALIGNMENT (mode
))
35127 align
= GET_MODE_ALIGNMENT (mode
);
35128 if (MEM_ALIGN (op
) < align
)
35129 set_mem_align (op
, align
);
35133 /* This must be register. */
35134 if (VECTOR_MODE_P (mode
))
35135 op
= safe_vector_operand (op
, mode
);
35137 op
= fixup_modeless_constant (op
, mode
);
35139 if (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
)
35140 op
= copy_to_mode_reg (mode
, op
);
35143 op
= copy_to_reg (op
);
35144 op
= lowpart_subreg (mode
, op
, GET_MODE (op
));
35150 args
[i
].mode
= mode
;
35156 pat
= GEN_FCN (icode
) (target
);
35159 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
35162 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
35165 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
35168 gcc_unreachable ();
35174 return klass
== store
? 0 : target
;
35177 /* Return the integer constant in ARG. Constrain it to be in the range
35178 of the subparts of VEC_TYPE; issue an error if not. */
35181 get_element_number (tree vec_type
, tree arg
)
35183 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
35185 if (!tree_fits_uhwi_p (arg
)
35186 || (elt
= tree_to_uhwi (arg
), elt
> max
))
35188 error ("selector must be an integer constant in the range 0..%wi", max
);
35195 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
35196 ix86_expand_vector_init. We DO have language-level syntax for this, in
35197 the form of (type){ init-list }. Except that since we can't place emms
35198 instructions from inside the compiler, we can't allow the use of MMX
35199 registers unless the user explicitly asks for it. So we do *not* define
35200 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
35201 we have builtins invoked by mmintrin.h that gives us license to emit
35202 these sorts of instructions. */
35205 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
35207 machine_mode tmode
= TYPE_MODE (type
);
35208 machine_mode inner_mode
= GET_MODE_INNER (tmode
);
35209 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
35210 rtvec v
= rtvec_alloc (n_elt
);
35212 gcc_assert (VECTOR_MODE_P (tmode
));
35213 gcc_assert (call_expr_nargs (exp
) == n_elt
);
35215 for (i
= 0; i
< n_elt
; ++i
)
35217 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
35218 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
35221 if (!target
|| !register_operand (target
, tmode
))
35222 target
= gen_reg_rtx (tmode
);
35224 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
35228 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
35229 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
35230 had a language-level syntax for referencing vector elements. */
35233 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
35235 machine_mode tmode
, mode0
;
35240 arg0
= CALL_EXPR_ARG (exp
, 0);
35241 arg1
= CALL_EXPR_ARG (exp
, 1);
35243 op0
= expand_normal (arg0
);
35244 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
35246 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
35247 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
35248 gcc_assert (VECTOR_MODE_P (mode0
));
35250 op0
= force_reg (mode0
, op0
);
35252 if (optimize
|| !target
|| !register_operand (target
, tmode
))
35253 target
= gen_reg_rtx (tmode
);
35255 ix86_expand_vector_extract (true, target
, op0
, elt
);
35260 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
35261 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
35262 a language-level syntax for referencing vector elements. */
35265 ix86_expand_vec_set_builtin (tree exp
)
35267 machine_mode tmode
, mode1
;
35268 tree arg0
, arg1
, arg2
;
35270 rtx op0
, op1
, target
;
35272 arg0
= CALL_EXPR_ARG (exp
, 0);
35273 arg1
= CALL_EXPR_ARG (exp
, 1);
35274 arg2
= CALL_EXPR_ARG (exp
, 2);
35276 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
35277 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
35278 gcc_assert (VECTOR_MODE_P (tmode
));
35280 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, EXPAND_NORMAL
);
35281 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, EXPAND_NORMAL
);
35282 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
35284 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
35285 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
35287 op0
= force_reg (tmode
, op0
);
35288 op1
= force_reg (mode1
, op1
);
35290 /* OP0 is the source of these builtin functions and shouldn't be
35291 modified. Create a copy, use it and return it as target. */
35292 target
= gen_reg_rtx (tmode
);
35293 emit_move_insn (target
, op0
);
35294 ix86_expand_vector_set (true, target
, op1
, elt
);
35299 /* Emit conditional move of SRC to DST with condition
35302 ix86_emit_cmove (rtx dst
, rtx src
, enum rtx_code code
, rtx op1
, rtx op2
)
35308 t
= ix86_expand_compare (code
, op1
, op2
);
35309 emit_insn (gen_rtx_SET (dst
, gen_rtx_IF_THEN_ELSE (GET_MODE (dst
), t
,
35314 rtx_code_label
*nomove
= gen_label_rtx ();
35315 emit_cmp_and_jump_insns (op1
, op2
, reverse_condition (code
),
35316 const0_rtx
, GET_MODE (op1
), 1, nomove
);
35317 emit_move_insn (dst
, src
);
35318 emit_label (nomove
);
35322 /* Choose max of DST and SRC and put it to DST. */
35324 ix86_emit_move_max (rtx dst
, rtx src
)
35326 ix86_emit_cmove (dst
, src
, LTU
, dst
, src
);
35329 /* Expand an expression EXP that calls a built-in function,
35330 with result going to TARGET if that's convenient
35331 (and in mode MODE if that's convenient).
35332 SUBTARGET may be used as the target for computing one of EXP's operands.
35333 IGNORE is nonzero if the value is to be ignored. */
35336 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget
,
35337 machine_mode mode
, int ignore
)
35340 enum insn_code icode
, icode2
;
35341 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
35342 tree arg0
, arg1
, arg2
, arg3
, arg4
;
35343 rtx op0
, op1
, op2
, op3
, op4
, pat
, pat2
, insn
;
35344 machine_mode mode0
, mode1
, mode2
, mode3
, mode4
;
35345 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
35347 /* For CPU builtins that can be folded, fold first and expand the fold. */
35350 case IX86_BUILTIN_CPU_INIT
:
35352 /* Make it call __cpu_indicator_init in libgcc. */
35353 tree call_expr
, fndecl
, type
;
35354 type
= build_function_type_list (integer_type_node
, NULL_TREE
);
35355 fndecl
= build_fn_decl ("__cpu_indicator_init", type
);
35356 call_expr
= build_call_expr (fndecl
, 0);
35357 return expand_expr (call_expr
, target
, mode
, EXPAND_NORMAL
);
35359 case IX86_BUILTIN_CPU_IS
:
35360 case IX86_BUILTIN_CPU_SUPPORTS
:
35362 tree arg0
= CALL_EXPR_ARG (exp
, 0);
35363 tree fold_expr
= fold_builtin_cpu (fndecl
, &arg0
);
35364 gcc_assert (fold_expr
!= NULL_TREE
);
35365 return expand_expr (fold_expr
, target
, mode
, EXPAND_NORMAL
);
35369 /* Determine whether the builtin function is available under the current ISA.
35370 Originally the builtin was not created if it wasn't applicable to the
35371 current ISA based on the command line switches. With function specific
35372 options, we need to check in the context of the function making the call
35373 whether it is supported. Treat AVX512VL and MMX specially. For other flags,
35374 if isa includes more than one ISA bit, treat those are requiring any
35375 of them. For AVX512VL, require both AVX512VL and the non-AVX512VL
35376 ISAs. Likewise for MMX, require both MMX and the non-MMX ISAs.
35377 Similarly for 64BIT, but we shouldn't be building such builtins
35378 at all, -m64 is a whole TU option. */
35379 if (((ix86_builtins_isa
[fcode
].isa
35380 & ~(OPTION_MASK_ISA_AVX512VL
| OPTION_MASK_ISA_MMX
35381 | OPTION_MASK_ISA_64BIT
| OPTION_MASK_ISA_GFNI
35382 | OPTION_MASK_ISA_VPCLMULQDQ
))
35383 && !(ix86_builtins_isa
[fcode
].isa
35384 & ~(OPTION_MASK_ISA_AVX512VL
| OPTION_MASK_ISA_MMX
35385 | OPTION_MASK_ISA_64BIT
| OPTION_MASK_ISA_GFNI
35386 | OPTION_MASK_ISA_VPCLMULQDQ
)
35388 || ((ix86_builtins_isa
[fcode
].isa
& OPTION_MASK_ISA_AVX512VL
)
35389 && !(ix86_isa_flags
& OPTION_MASK_ISA_AVX512VL
))
35390 || ((ix86_builtins_isa
[fcode
].isa
& OPTION_MASK_ISA_GFNI
)
35391 && !(ix86_isa_flags
& OPTION_MASK_ISA_GFNI
))
35392 || ((ix86_builtins_isa
[fcode
].isa
& OPTION_MASK_ISA_MMX
)
35393 && !(ix86_isa_flags
& OPTION_MASK_ISA_MMX
))
35394 || (ix86_builtins_isa
[fcode
].isa2
35395 && !(ix86_builtins_isa
[fcode
].isa2
& ix86_isa_flags2
)))
35397 char *opts
= ix86_target_string (ix86_builtins_isa
[fcode
].isa
,
35398 ix86_builtins_isa
[fcode
].isa2
, 0, 0,
35399 NULL
, NULL
, (enum fpmath_unit
) 0,
35402 error ("%qE needs unknown isa option", fndecl
);
35405 gcc_assert (opts
!= NULL
);
35406 error ("%qE needs isa option %s", fndecl
, opts
);
35409 return expand_call (exp
, target
, ignore
);
35414 case IX86_BUILTIN_BNDMK
:
35416 || GET_MODE (target
) != BNDmode
35417 || !register_operand (target
, BNDmode
))
35418 target
= gen_reg_rtx (BNDmode
);
35420 arg0
= CALL_EXPR_ARG (exp
, 0);
35421 arg1
= CALL_EXPR_ARG (exp
, 1);
35423 op0
= expand_normal (arg0
);
35424 op1
= expand_normal (arg1
);
35426 if (!register_operand (op0
, Pmode
))
35427 op0
= ix86_zero_extend_to_Pmode (op0
);
35428 if (!register_operand (op1
, Pmode
))
35429 op1
= ix86_zero_extend_to_Pmode (op1
);
35431 /* Builtin arg1 is size of block but instruction op1 should
35433 op1
= expand_simple_binop (Pmode
, PLUS
, op1
, constm1_rtx
,
35434 NULL_RTX
, 1, OPTAB_DIRECT
);
35436 emit_insn (BNDmode
== BND64mode
35437 ? gen_bnd64_mk (target
, op0
, op1
)
35438 : gen_bnd32_mk (target
, op0
, op1
));
35441 case IX86_BUILTIN_BNDSTX
:
35442 arg0
= CALL_EXPR_ARG (exp
, 0);
35443 arg1
= CALL_EXPR_ARG (exp
, 1);
35444 arg2
= CALL_EXPR_ARG (exp
, 2);
35446 op0
= expand_normal (arg0
);
35447 op1
= expand_normal (arg1
);
35448 op2
= expand_normal (arg2
);
35450 if (!register_operand (op0
, Pmode
))
35451 op0
= ix86_zero_extend_to_Pmode (op0
);
35452 if (!register_operand (op1
, BNDmode
))
35453 op1
= copy_to_mode_reg (BNDmode
, op1
);
35454 if (!register_operand (op2
, Pmode
))
35455 op2
= ix86_zero_extend_to_Pmode (op2
);
35457 emit_insn (BNDmode
== BND64mode
35458 ? gen_bnd64_stx (op2
, op0
, op1
)
35459 : gen_bnd32_stx (op2
, op0
, op1
));
35462 case IX86_BUILTIN_BNDLDX
:
35464 || GET_MODE (target
) != BNDmode
35465 || !register_operand (target
, BNDmode
))
35466 target
= gen_reg_rtx (BNDmode
);
35468 arg0
= CALL_EXPR_ARG (exp
, 0);
35469 arg1
= CALL_EXPR_ARG (exp
, 1);
35471 op0
= expand_normal (arg0
);
35472 op1
= expand_normal (arg1
);
35474 if (!register_operand (op0
, Pmode
))
35475 op0
= ix86_zero_extend_to_Pmode (op0
);
35476 if (!register_operand (op1
, Pmode
))
35477 op1
= ix86_zero_extend_to_Pmode (op1
);
35479 emit_insn (BNDmode
== BND64mode
35480 ? gen_bnd64_ldx (target
, op0
, op1
)
35481 : gen_bnd32_ldx (target
, op0
, op1
));
35484 case IX86_BUILTIN_BNDCL
:
35485 arg0
= CALL_EXPR_ARG (exp
, 0);
35486 arg1
= CALL_EXPR_ARG (exp
, 1);
35488 op0
= expand_normal (arg0
);
35489 op1
= expand_normal (arg1
);
35491 if (!register_operand (op0
, Pmode
))
35492 op0
= ix86_zero_extend_to_Pmode (op0
);
35493 if (!register_operand (op1
, BNDmode
))
35494 op1
= copy_to_mode_reg (BNDmode
, op1
);
35496 emit_insn (BNDmode
== BND64mode
35497 ? gen_bnd64_cl (op1
, op0
)
35498 : gen_bnd32_cl (op1
, op0
));
35501 case IX86_BUILTIN_BNDCU
:
35502 arg0
= CALL_EXPR_ARG (exp
, 0);
35503 arg1
= CALL_EXPR_ARG (exp
, 1);
35505 op0
= expand_normal (arg0
);
35506 op1
= expand_normal (arg1
);
35508 if (!register_operand (op0
, Pmode
))
35509 op0
= ix86_zero_extend_to_Pmode (op0
);
35510 if (!register_operand (op1
, BNDmode
))
35511 op1
= copy_to_mode_reg (BNDmode
, op1
);
35513 emit_insn (BNDmode
== BND64mode
35514 ? gen_bnd64_cu (op1
, op0
)
35515 : gen_bnd32_cu (op1
, op0
));
35518 case IX86_BUILTIN_BNDRET
:
35519 arg0
= CALL_EXPR_ARG (exp
, 0);
35520 target
= chkp_get_rtl_bounds (arg0
);
35522 /* If no bounds were specified for returned value,
35523 then use INIT bounds. It usually happens when
35524 some built-in function is expanded. */
35527 rtx t1
= gen_reg_rtx (Pmode
);
35528 rtx t2
= gen_reg_rtx (Pmode
);
35529 target
= gen_reg_rtx (BNDmode
);
35530 emit_move_insn (t1
, const0_rtx
);
35531 emit_move_insn (t2
, constm1_rtx
);
35532 emit_insn (BNDmode
== BND64mode
35533 ? gen_bnd64_mk (target
, t1
, t2
)
35534 : gen_bnd32_mk (target
, t1
, t2
));
35537 gcc_assert (target
&& REG_P (target
));
35540 case IX86_BUILTIN_BNDNARROW
:
35542 rtx m1
, m1h1
, m1h2
, lb
, ub
, t1
;
35544 /* Return value and lb. */
35545 arg0
= CALL_EXPR_ARG (exp
, 0);
35547 arg1
= CALL_EXPR_ARG (exp
, 1);
35549 arg2
= CALL_EXPR_ARG (exp
, 2);
35551 lb
= expand_normal (arg0
);
35552 op1
= expand_normal (arg1
);
35553 op2
= expand_normal (arg2
);
35555 /* Size was passed but we need to use (size - 1) as for bndmk. */
35556 op2
= expand_simple_binop (Pmode
, PLUS
, op2
, constm1_rtx
,
35557 NULL_RTX
, 1, OPTAB_DIRECT
);
35559 /* Add LB to size and inverse to get UB. */
35560 op2
= expand_simple_binop (Pmode
, PLUS
, op2
, lb
,
35561 op2
, 1, OPTAB_DIRECT
);
35562 ub
= expand_simple_unop (Pmode
, NOT
, op2
, op2
, 1);
35564 if (!register_operand (lb
, Pmode
))
35565 lb
= ix86_zero_extend_to_Pmode (lb
);
35566 if (!register_operand (ub
, Pmode
))
35567 ub
= ix86_zero_extend_to_Pmode (ub
);
35569 /* We need to move bounds to memory before any computations. */
35574 m1
= assign_386_stack_local (BNDmode
, SLOT_TEMP
);
35575 emit_move_insn (m1
, op1
);
35578 /* Generate mem expression to be used for access to LB and UB. */
35579 m1h1
= adjust_address (m1
, Pmode
, 0);
35580 m1h2
= adjust_address (m1
, Pmode
, GET_MODE_SIZE (Pmode
));
35582 t1
= gen_reg_rtx (Pmode
);
35585 emit_move_insn (t1
, m1h1
);
35586 ix86_emit_move_max (t1
, lb
);
35587 emit_move_insn (m1h1
, t1
);
35589 /* Compute UB. UB is stored in 1's complement form. Therefore
35590 we also use max here. */
35591 emit_move_insn (t1
, m1h2
);
35592 ix86_emit_move_max (t1
, ub
);
35593 emit_move_insn (m1h2
, t1
);
35595 op2
= gen_reg_rtx (BNDmode
);
35596 emit_move_insn (op2
, m1
);
35598 return chkp_join_splitted_slot (lb
, op2
);
35601 case IX86_BUILTIN_BNDINT
:
35603 rtx res
, rh1
, rh2
, lb1
, lb2
, ub1
, ub2
;
35606 || GET_MODE (target
) != BNDmode
35607 || !register_operand (target
, BNDmode
))
35608 target
= gen_reg_rtx (BNDmode
);
35610 arg0
= CALL_EXPR_ARG (exp
, 0);
35611 arg1
= CALL_EXPR_ARG (exp
, 1);
35613 op0
= expand_normal (arg0
);
35614 op1
= expand_normal (arg1
);
35616 res
= assign_386_stack_local (BNDmode
, SLOT_TEMP
);
35617 rh1
= adjust_address (res
, Pmode
, 0);
35618 rh2
= adjust_address (res
, Pmode
, GET_MODE_SIZE (Pmode
));
35620 /* Put first bounds to temporaries. */
35621 lb1
= gen_reg_rtx (Pmode
);
35622 ub1
= gen_reg_rtx (Pmode
);
35625 emit_move_insn (lb1
, adjust_address (op0
, Pmode
, 0));
35626 emit_move_insn (ub1
, adjust_address (op0
, Pmode
,
35627 GET_MODE_SIZE (Pmode
)));
35631 emit_move_insn (res
, op0
);
35632 emit_move_insn (lb1
, rh1
);
35633 emit_move_insn (ub1
, rh2
);
35636 /* Put second bounds to temporaries. */
35637 lb2
= gen_reg_rtx (Pmode
);
35638 ub2
= gen_reg_rtx (Pmode
);
35641 emit_move_insn (lb2
, adjust_address (op1
, Pmode
, 0));
35642 emit_move_insn (ub2
, adjust_address (op1
, Pmode
,
35643 GET_MODE_SIZE (Pmode
)));
35647 emit_move_insn (res
, op1
);
35648 emit_move_insn (lb2
, rh1
);
35649 emit_move_insn (ub2
, rh2
);
35653 ix86_emit_move_max (lb1
, lb2
);
35654 emit_move_insn (rh1
, lb1
);
35656 /* Compute UB. UB is stored in 1's complement form. Therefore
35657 we also use max here. */
35658 ix86_emit_move_max (ub1
, ub2
);
35659 emit_move_insn (rh2
, ub1
);
35661 emit_move_insn (target
, res
);
35666 case IX86_BUILTIN_SIZEOF
:
35672 || GET_MODE (target
) != Pmode
35673 || !register_operand (target
, Pmode
))
35674 target
= gen_reg_rtx (Pmode
);
35676 arg0
= CALL_EXPR_ARG (exp
, 0);
35677 gcc_assert (VAR_P (arg0
));
35679 name
= DECL_ASSEMBLER_NAME (arg0
);
35680 symbol
= gen_rtx_SYMBOL_REF (Pmode
, IDENTIFIER_POINTER (name
));
35682 emit_insn (Pmode
== SImode
35683 ? gen_move_size_reloc_si (target
, symbol
)
35684 : gen_move_size_reloc_di (target
, symbol
));
35689 case IX86_BUILTIN_BNDLOWER
:
35694 || GET_MODE (target
) != Pmode
35695 || !register_operand (target
, Pmode
))
35696 target
= gen_reg_rtx (Pmode
);
35698 arg0
= CALL_EXPR_ARG (exp
, 0);
35699 op0
= expand_normal (arg0
);
35701 /* We need to move bounds to memory first. */
35706 mem
= assign_386_stack_local (BNDmode
, SLOT_TEMP
);
35707 emit_move_insn (mem
, op0
);
35710 /* Generate mem expression to access LB and load it. */
35711 hmem
= adjust_address (mem
, Pmode
, 0);
35712 emit_move_insn (target
, hmem
);
35717 case IX86_BUILTIN_BNDUPPER
:
35719 rtx mem
, hmem
, res
;
35722 || GET_MODE (target
) != Pmode
35723 || !register_operand (target
, Pmode
))
35724 target
= gen_reg_rtx (Pmode
);
35726 arg0
= CALL_EXPR_ARG (exp
, 0);
35727 op0
= expand_normal (arg0
);
35729 /* We need to move bounds to memory first. */
35734 mem
= assign_386_stack_local (BNDmode
, SLOT_TEMP
);
35735 emit_move_insn (mem
, op0
);
35738 /* Generate mem expression to access UB. */
35739 hmem
= adjust_address (mem
, Pmode
, GET_MODE_SIZE (Pmode
));
35741 /* We need to inverse all bits of UB. */
35742 res
= expand_simple_unop (Pmode
, NOT
, hmem
, target
, 1);
35745 emit_move_insn (target
, res
);
35750 case IX86_BUILTIN_MASKMOVQ
:
35751 case IX86_BUILTIN_MASKMOVDQU
:
35752 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
35753 ? CODE_FOR_mmx_maskmovq
35754 : CODE_FOR_sse2_maskmovdqu
);
35755 /* Note the arg order is different from the operand order. */
35756 arg1
= CALL_EXPR_ARG (exp
, 0);
35757 arg2
= CALL_EXPR_ARG (exp
, 1);
35758 arg0
= CALL_EXPR_ARG (exp
, 2);
35759 op0
= expand_normal (arg0
);
35760 op1
= expand_normal (arg1
);
35761 op2
= expand_normal (arg2
);
35762 mode0
= insn_data
[icode
].operand
[0].mode
;
35763 mode1
= insn_data
[icode
].operand
[1].mode
;
35764 mode2
= insn_data
[icode
].operand
[2].mode
;
35766 op0
= ix86_zero_extend_to_Pmode (op0
);
35767 op0
= gen_rtx_MEM (mode1
, op0
);
35769 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
35770 op0
= copy_to_mode_reg (mode0
, op0
);
35771 if (!insn_data
[icode
].operand
[1].predicate (op1
, mode1
))
35772 op1
= copy_to_mode_reg (mode1
, op1
);
35773 if (!insn_data
[icode
].operand
[2].predicate (op2
, mode2
))
35774 op2
= copy_to_mode_reg (mode2
, op2
);
35775 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
35781 case IX86_BUILTIN_LDMXCSR
:
35782 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
35783 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
35784 emit_move_insn (target
, op0
);
35785 emit_insn (gen_sse_ldmxcsr (target
));
35788 case IX86_BUILTIN_STMXCSR
:
35789 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
35790 emit_insn (gen_sse_stmxcsr (target
));
35791 return copy_to_mode_reg (SImode
, target
);
35793 case IX86_BUILTIN_CLFLUSH
:
35794 arg0
= CALL_EXPR_ARG (exp
, 0);
35795 op0
= expand_normal (arg0
);
35796 icode
= CODE_FOR_sse2_clflush
;
35797 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
35798 op0
= ix86_zero_extend_to_Pmode (op0
);
35800 emit_insn (gen_sse2_clflush (op0
));
35803 case IX86_BUILTIN_CLWB
:
35804 arg0
= CALL_EXPR_ARG (exp
, 0);
35805 op0
= expand_normal (arg0
);
35806 icode
= CODE_FOR_clwb
;
35807 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
35808 op0
= ix86_zero_extend_to_Pmode (op0
);
35810 emit_insn (gen_clwb (op0
));
35813 case IX86_BUILTIN_CLFLUSHOPT
:
35814 arg0
= CALL_EXPR_ARG (exp
, 0);
35815 op0
= expand_normal (arg0
);
35816 icode
= CODE_FOR_clflushopt
;
35817 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
35818 op0
= ix86_zero_extend_to_Pmode (op0
);
35820 emit_insn (gen_clflushopt (op0
));
35823 case IX86_BUILTIN_MONITOR
:
35824 case IX86_BUILTIN_MONITORX
:
35825 arg0
= CALL_EXPR_ARG (exp
, 0);
35826 arg1
= CALL_EXPR_ARG (exp
, 1);
35827 arg2
= CALL_EXPR_ARG (exp
, 2);
35828 op0
= expand_normal (arg0
);
35829 op1
= expand_normal (arg1
);
35830 op2
= expand_normal (arg2
);
35832 op0
= ix86_zero_extend_to_Pmode (op0
);
35834 op1
= copy_to_mode_reg (SImode
, op1
);
35836 op2
= copy_to_mode_reg (SImode
, op2
);
35838 emit_insn (fcode
== IX86_BUILTIN_MONITOR
35839 ? ix86_gen_monitor (op0
, op1
, op2
)
35840 : ix86_gen_monitorx (op0
, op1
, op2
));
35843 case IX86_BUILTIN_MWAIT
:
35844 arg0
= CALL_EXPR_ARG (exp
, 0);
35845 arg1
= CALL_EXPR_ARG (exp
, 1);
35846 op0
= expand_normal (arg0
);
35847 op1
= expand_normal (arg1
);
35849 op0
= copy_to_mode_reg (SImode
, op0
);
35851 op1
= copy_to_mode_reg (SImode
, op1
);
35852 emit_insn (gen_sse3_mwait (op0
, op1
));
35855 case IX86_BUILTIN_MWAITX
:
35856 arg0
= CALL_EXPR_ARG (exp
, 0);
35857 arg1
= CALL_EXPR_ARG (exp
, 1);
35858 arg2
= CALL_EXPR_ARG (exp
, 2);
35859 op0
= expand_normal (arg0
);
35860 op1
= expand_normal (arg1
);
35861 op2
= expand_normal (arg2
);
35863 op0
= copy_to_mode_reg (SImode
, op0
);
35865 op1
= copy_to_mode_reg (SImode
, op1
);
35867 op2
= copy_to_mode_reg (SImode
, op2
);
35868 emit_insn (gen_mwaitx (op0
, op1
, op2
));
35871 case IX86_BUILTIN_CLZERO
:
35872 arg0
= CALL_EXPR_ARG (exp
, 0);
35873 op0
= expand_normal (arg0
);
35875 op0
= ix86_zero_extend_to_Pmode (op0
);
35876 emit_insn (ix86_gen_clzero (op0
));
35879 case IX86_BUILTIN_VEC_INIT_V2SI
:
35880 case IX86_BUILTIN_VEC_INIT_V4HI
:
35881 case IX86_BUILTIN_VEC_INIT_V8QI
:
35882 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
35884 case IX86_BUILTIN_VEC_EXT_V2DF
:
35885 case IX86_BUILTIN_VEC_EXT_V2DI
:
35886 case IX86_BUILTIN_VEC_EXT_V4SF
:
35887 case IX86_BUILTIN_VEC_EXT_V4SI
:
35888 case IX86_BUILTIN_VEC_EXT_V8HI
:
35889 case IX86_BUILTIN_VEC_EXT_V2SI
:
35890 case IX86_BUILTIN_VEC_EXT_V4HI
:
35891 case IX86_BUILTIN_VEC_EXT_V16QI
:
35892 return ix86_expand_vec_ext_builtin (exp
, target
);
35894 case IX86_BUILTIN_VEC_SET_V2DI
:
35895 case IX86_BUILTIN_VEC_SET_V4SF
:
35896 case IX86_BUILTIN_VEC_SET_V4SI
:
35897 case IX86_BUILTIN_VEC_SET_V8HI
:
35898 case IX86_BUILTIN_VEC_SET_V4HI
:
35899 case IX86_BUILTIN_VEC_SET_V16QI
:
35900 return ix86_expand_vec_set_builtin (exp
);
35902 case IX86_BUILTIN_NANQ
:
35903 case IX86_BUILTIN_NANSQ
:
35904 return expand_call (exp
, target
, ignore
);
35906 case IX86_BUILTIN_RDPMC
:
35907 case IX86_BUILTIN_RDTSC
:
35908 case IX86_BUILTIN_RDTSCP
:
35909 case IX86_BUILTIN_XGETBV
:
35911 op0
= gen_reg_rtx (DImode
);
35912 op1
= gen_reg_rtx (DImode
);
35914 if (fcode
== IX86_BUILTIN_RDPMC
)
35916 arg0
= CALL_EXPR_ARG (exp
, 0);
35917 op2
= expand_normal (arg0
);
35918 if (!register_operand (op2
, SImode
))
35919 op2
= copy_to_mode_reg (SImode
, op2
);
35921 insn
= (TARGET_64BIT
35922 ? gen_rdpmc_rex64 (op0
, op1
, op2
)
35923 : gen_rdpmc (op0
, op2
));
35926 else if (fcode
== IX86_BUILTIN_XGETBV
)
35928 arg0
= CALL_EXPR_ARG (exp
, 0);
35929 op2
= expand_normal (arg0
);
35930 if (!register_operand (op2
, SImode
))
35931 op2
= copy_to_mode_reg (SImode
, op2
);
35933 insn
= (TARGET_64BIT
35934 ? gen_xgetbv_rex64 (op0
, op1
, op2
)
35935 : gen_xgetbv (op0
, op2
));
35938 else if (fcode
== IX86_BUILTIN_RDTSC
)
35940 insn
= (TARGET_64BIT
35941 ? gen_rdtsc_rex64 (op0
, op1
)
35942 : gen_rdtsc (op0
));
35947 op2
= gen_reg_rtx (SImode
);
35949 insn
= (TARGET_64BIT
35950 ? gen_rdtscp_rex64 (op0
, op1
, op2
)
35951 : gen_rdtscp (op0
, op2
));
35954 arg0
= CALL_EXPR_ARG (exp
, 0);
35955 op4
= expand_normal (arg0
);
35956 if (!address_operand (op4
, VOIDmode
))
35958 op4
= convert_memory_address (Pmode
, op4
);
35959 op4
= copy_addr_to_reg (op4
);
35961 emit_move_insn (gen_rtx_MEM (SImode
, op4
), op2
);
35966 /* mode is VOIDmode if __builtin_rd* has been called
35968 if (mode
== VOIDmode
)
35970 target
= gen_reg_rtx (mode
);
35975 op1
= expand_simple_binop (DImode
, ASHIFT
, op1
, GEN_INT (32),
35976 op1
, 1, OPTAB_DIRECT
);
35977 op0
= expand_simple_binop (DImode
, IOR
, op0
, op1
,
35978 op0
, 1, OPTAB_DIRECT
);
35981 emit_move_insn (target
, op0
);
35984 case IX86_BUILTIN_FXSAVE
:
35985 case IX86_BUILTIN_FXRSTOR
:
35986 case IX86_BUILTIN_FXSAVE64
:
35987 case IX86_BUILTIN_FXRSTOR64
:
35988 case IX86_BUILTIN_FNSTENV
:
35989 case IX86_BUILTIN_FLDENV
:
35993 case IX86_BUILTIN_FXSAVE
:
35994 icode
= CODE_FOR_fxsave
;
35996 case IX86_BUILTIN_FXRSTOR
:
35997 icode
= CODE_FOR_fxrstor
;
35999 case IX86_BUILTIN_FXSAVE64
:
36000 icode
= CODE_FOR_fxsave64
;
36002 case IX86_BUILTIN_FXRSTOR64
:
36003 icode
= CODE_FOR_fxrstor64
;
36005 case IX86_BUILTIN_FNSTENV
:
36006 icode
= CODE_FOR_fnstenv
;
36008 case IX86_BUILTIN_FLDENV
:
36009 icode
= CODE_FOR_fldenv
;
36012 gcc_unreachable ();
36015 arg0
= CALL_EXPR_ARG (exp
, 0);
36016 op0
= expand_normal (arg0
);
36018 if (!address_operand (op0
, VOIDmode
))
36020 op0
= convert_memory_address (Pmode
, op0
);
36021 op0
= copy_addr_to_reg (op0
);
36023 op0
= gen_rtx_MEM (mode0
, op0
);
36025 pat
= GEN_FCN (icode
) (op0
);
36030 case IX86_BUILTIN_XSETBV
:
36031 arg0
= CALL_EXPR_ARG (exp
, 0);
36032 arg1
= CALL_EXPR_ARG (exp
, 1);
36033 op0
= expand_normal (arg0
);
36034 op1
= expand_normal (arg1
);
36037 op0
= copy_to_mode_reg (SImode
, op0
);
36041 op2
= expand_simple_binop (DImode
, LSHIFTRT
, op1
, GEN_INT (32),
36042 NULL
, 1, OPTAB_DIRECT
);
36044 op2
= gen_lowpart (SImode
, op2
);
36045 op1
= gen_lowpart (SImode
, op1
);
36047 op1
= copy_to_mode_reg (SImode
, op1
);
36049 op2
= copy_to_mode_reg (SImode
, op2
);
36050 icode
= CODE_FOR_xsetbv_rex64
;
36051 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
36056 op1
= copy_to_mode_reg (DImode
, op1
);
36057 icode
= CODE_FOR_xsetbv
;
36058 pat
= GEN_FCN (icode
) (op0
, op1
);
36064 case IX86_BUILTIN_XSAVE
:
36065 case IX86_BUILTIN_XRSTOR
:
36066 case IX86_BUILTIN_XSAVE64
:
36067 case IX86_BUILTIN_XRSTOR64
:
36068 case IX86_BUILTIN_XSAVEOPT
:
36069 case IX86_BUILTIN_XSAVEOPT64
:
36070 case IX86_BUILTIN_XSAVES
:
36071 case IX86_BUILTIN_XRSTORS
:
36072 case IX86_BUILTIN_XSAVES64
:
36073 case IX86_BUILTIN_XRSTORS64
:
36074 case IX86_BUILTIN_XSAVEC
:
36075 case IX86_BUILTIN_XSAVEC64
:
36076 arg0
= CALL_EXPR_ARG (exp
, 0);
36077 arg1
= CALL_EXPR_ARG (exp
, 1);
36078 op0
= expand_normal (arg0
);
36079 op1
= expand_normal (arg1
);
36081 if (!address_operand (op0
, VOIDmode
))
36083 op0
= convert_memory_address (Pmode
, op0
);
36084 op0
= copy_addr_to_reg (op0
);
36086 op0
= gen_rtx_MEM (BLKmode
, op0
);
36088 op1
= force_reg (DImode
, op1
);
36092 op2
= expand_simple_binop (DImode
, LSHIFTRT
, op1
, GEN_INT (32),
36093 NULL
, 1, OPTAB_DIRECT
);
36096 case IX86_BUILTIN_XSAVE
:
36097 icode
= CODE_FOR_xsave_rex64
;
36099 case IX86_BUILTIN_XRSTOR
:
36100 icode
= CODE_FOR_xrstor_rex64
;
36102 case IX86_BUILTIN_XSAVE64
:
36103 icode
= CODE_FOR_xsave64
;
36105 case IX86_BUILTIN_XRSTOR64
:
36106 icode
= CODE_FOR_xrstor64
;
36108 case IX86_BUILTIN_XSAVEOPT
:
36109 icode
= CODE_FOR_xsaveopt_rex64
;
36111 case IX86_BUILTIN_XSAVEOPT64
:
36112 icode
= CODE_FOR_xsaveopt64
;
36114 case IX86_BUILTIN_XSAVES
:
36115 icode
= CODE_FOR_xsaves_rex64
;
36117 case IX86_BUILTIN_XRSTORS
:
36118 icode
= CODE_FOR_xrstors_rex64
;
36120 case IX86_BUILTIN_XSAVES64
:
36121 icode
= CODE_FOR_xsaves64
;
36123 case IX86_BUILTIN_XRSTORS64
:
36124 icode
= CODE_FOR_xrstors64
;
36126 case IX86_BUILTIN_XSAVEC
:
36127 icode
= CODE_FOR_xsavec_rex64
;
36129 case IX86_BUILTIN_XSAVEC64
:
36130 icode
= CODE_FOR_xsavec64
;
36133 gcc_unreachable ();
36136 op2
= gen_lowpart (SImode
, op2
);
36137 op1
= gen_lowpart (SImode
, op1
);
36138 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
36144 case IX86_BUILTIN_XSAVE
:
36145 icode
= CODE_FOR_xsave
;
36147 case IX86_BUILTIN_XRSTOR
:
36148 icode
= CODE_FOR_xrstor
;
36150 case IX86_BUILTIN_XSAVEOPT
:
36151 icode
= CODE_FOR_xsaveopt
;
36153 case IX86_BUILTIN_XSAVES
:
36154 icode
= CODE_FOR_xsaves
;
36156 case IX86_BUILTIN_XRSTORS
:
36157 icode
= CODE_FOR_xrstors
;
36159 case IX86_BUILTIN_XSAVEC
:
36160 icode
= CODE_FOR_xsavec
;
36163 gcc_unreachable ();
36165 pat
= GEN_FCN (icode
) (op0
, op1
);
36172 case IX86_BUILTIN_LLWPCB
:
36173 arg0
= CALL_EXPR_ARG (exp
, 0);
36174 op0
= expand_normal (arg0
);
36175 icode
= CODE_FOR_lwp_llwpcb
;
36176 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
36177 op0
= ix86_zero_extend_to_Pmode (op0
);
36178 emit_insn (gen_lwp_llwpcb (op0
));
36181 case IX86_BUILTIN_SLWPCB
:
36182 icode
= CODE_FOR_lwp_slwpcb
;
36184 || !insn_data
[icode
].operand
[0].predicate (target
, Pmode
))
36185 target
= gen_reg_rtx (Pmode
);
36186 emit_insn (gen_lwp_slwpcb (target
));
36189 case IX86_BUILTIN_BEXTRI32
:
36190 case IX86_BUILTIN_BEXTRI64
:
36191 arg0
= CALL_EXPR_ARG (exp
, 0);
36192 arg1
= CALL_EXPR_ARG (exp
, 1);
36193 op0
= expand_normal (arg0
);
36194 op1
= expand_normal (arg1
);
36195 icode
= (fcode
== IX86_BUILTIN_BEXTRI32
36196 ? CODE_FOR_tbm_bextri_si
36197 : CODE_FOR_tbm_bextri_di
);
36198 if (!CONST_INT_P (op1
))
36200 error ("last argument must be an immediate");
36205 unsigned char length
= (INTVAL (op1
) >> 8) & 0xFF;
36206 unsigned char lsb_index
= INTVAL (op1
) & 0xFF;
36207 op1
= GEN_INT (length
);
36208 op2
= GEN_INT (lsb_index
);
36209 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
36215 case IX86_BUILTIN_RDRAND16_STEP
:
36216 icode
= CODE_FOR_rdrandhi_1
;
36220 case IX86_BUILTIN_RDRAND32_STEP
:
36221 icode
= CODE_FOR_rdrandsi_1
;
36225 case IX86_BUILTIN_RDRAND64_STEP
:
36226 icode
= CODE_FOR_rdranddi_1
;
36230 arg0
= CALL_EXPR_ARG (exp
, 0);
36231 op1
= expand_normal (arg0
);
36232 if (!address_operand (op1
, VOIDmode
))
36234 op1
= convert_memory_address (Pmode
, op1
);
36235 op1
= copy_addr_to_reg (op1
);
36238 op0
= gen_reg_rtx (mode0
);
36239 emit_insn (GEN_FCN (icode
) (op0
));
36241 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
36243 op1
= gen_reg_rtx (SImode
);
36244 emit_move_insn (op1
, CONST1_RTX (SImode
));
36246 /* Emit SImode conditional move. */
36247 if (mode0
== HImode
)
36249 if (TARGET_ZERO_EXTEND_WITH_AND
36250 && optimize_function_for_speed_p (cfun
))
36252 op2
= force_reg (SImode
, const0_rtx
);
36254 emit_insn (gen_movstricthi
36255 (gen_lowpart (HImode
, op2
), op0
));
36259 op2
= gen_reg_rtx (SImode
);
36261 emit_insn (gen_zero_extendhisi2 (op2
, op0
));
36264 else if (mode0
== SImode
)
36267 op2
= gen_rtx_SUBREG (SImode
, op0
, 0);
36270 || !register_operand (target
, SImode
))
36271 target
= gen_reg_rtx (SImode
);
36273 pat
= gen_rtx_GEU (VOIDmode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
36275 emit_insn (gen_rtx_SET (target
,
36276 gen_rtx_IF_THEN_ELSE (SImode
, pat
, op2
, op1
)));
36279 case IX86_BUILTIN_RDSEED16_STEP
:
36280 icode
= CODE_FOR_rdseedhi_1
;
36284 case IX86_BUILTIN_RDSEED32_STEP
:
36285 icode
= CODE_FOR_rdseedsi_1
;
36289 case IX86_BUILTIN_RDSEED64_STEP
:
36290 icode
= CODE_FOR_rdseeddi_1
;
36294 arg0
= CALL_EXPR_ARG (exp
, 0);
36295 op1
= expand_normal (arg0
);
36296 if (!address_operand (op1
, VOIDmode
))
36298 op1
= convert_memory_address (Pmode
, op1
);
36299 op1
= copy_addr_to_reg (op1
);
36302 op0
= gen_reg_rtx (mode0
);
36303 emit_insn (GEN_FCN (icode
) (op0
));
36305 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
36307 op2
= gen_reg_rtx (QImode
);
36309 pat
= gen_rtx_LTU (QImode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
36311 emit_insn (gen_rtx_SET (op2
, pat
));
36314 || !register_operand (target
, SImode
))
36315 target
= gen_reg_rtx (SImode
);
36317 emit_insn (gen_zero_extendqisi2 (target
, op2
));
36320 case IX86_BUILTIN_SBB32
:
36321 icode
= CODE_FOR_subborrowsi
;
36322 icode2
= CODE_FOR_subborrowsi_0
;
36328 case IX86_BUILTIN_SBB64
:
36329 icode
= CODE_FOR_subborrowdi
;
36330 icode2
= CODE_FOR_subborrowdi_0
;
36336 case IX86_BUILTIN_ADDCARRYX32
:
36337 icode
= CODE_FOR_addcarrysi
;
36338 icode2
= CODE_FOR_addcarrysi_0
;
36344 case IX86_BUILTIN_ADDCARRYX64
:
36345 icode
= CODE_FOR_addcarrydi
;
36346 icode2
= CODE_FOR_addcarrydi_0
;
36352 arg0
= CALL_EXPR_ARG (exp
, 0); /* unsigned char c_in. */
36353 arg1
= CALL_EXPR_ARG (exp
, 1); /* unsigned int src1. */
36354 arg2
= CALL_EXPR_ARG (exp
, 2); /* unsigned int src2. */
36355 arg3
= CALL_EXPR_ARG (exp
, 3); /* unsigned int *sum_out. */
36357 op1
= expand_normal (arg0
);
36358 if (!integer_zerop (arg0
))
36359 op1
= copy_to_mode_reg (QImode
, convert_to_mode (QImode
, op1
, 1));
36361 op2
= expand_normal (arg1
);
36362 if (!register_operand (op2
, mode0
))
36363 op2
= copy_to_mode_reg (mode0
, op2
);
36365 op3
= expand_normal (arg2
);
36366 if (!register_operand (op3
, mode0
))
36367 op3
= copy_to_mode_reg (mode0
, op3
);
36369 op4
= expand_normal (arg3
);
36370 if (!address_operand (op4
, VOIDmode
))
36372 op4
= convert_memory_address (Pmode
, op4
);
36373 op4
= copy_addr_to_reg (op4
);
36376 op0
= gen_reg_rtx (mode0
);
36377 if (integer_zerop (arg0
))
36379 /* If arg0 is 0, optimize right away into add or sub
36380 instruction that sets CCCmode flags. */
36381 op1
= gen_rtx_REG (mode2
, FLAGS_REG
);
36382 emit_insn (GEN_FCN (icode2
) (op0
, op2
, op3
));
36386 /* Generate CF from input operand. */
36387 emit_insn (gen_addqi3_cconly_overflow (op1
, constm1_rtx
));
36389 /* Generate instruction that consumes CF. */
36390 op1
= gen_rtx_REG (CCCmode
, FLAGS_REG
);
36391 pat
= gen_rtx_LTU (mode1
, op1
, const0_rtx
);
36392 pat2
= gen_rtx_LTU (mode0
, op1
, const0_rtx
);
36393 emit_insn (GEN_FCN (icode
) (op0
, op2
, op3
, op1
, pat
, pat2
));
36396 /* Return current CF value. */
36398 target
= gen_reg_rtx (QImode
);
36400 pat
= gen_rtx_LTU (QImode
, op1
, const0_rtx
);
36401 emit_insn (gen_rtx_SET (target
, pat
));
36403 /* Store the result. */
36404 emit_move_insn (gen_rtx_MEM (mode0
, op4
), op0
);
36408 case IX86_BUILTIN_READ_FLAGS
:
36409 emit_insn (gen_push (gen_rtx_REG (word_mode
, FLAGS_REG
)));
36412 || target
== NULL_RTX
36413 || !nonimmediate_operand (target
, word_mode
)
36414 || GET_MODE (target
) != word_mode
)
36415 target
= gen_reg_rtx (word_mode
);
36417 emit_insn (gen_pop (target
));
36420 case IX86_BUILTIN_WRITE_FLAGS
:
36422 arg0
= CALL_EXPR_ARG (exp
, 0);
36423 op0
= expand_normal (arg0
);
36424 if (!general_no_elim_operand (op0
, word_mode
))
36425 op0
= copy_to_mode_reg (word_mode
, op0
);
36427 emit_insn (gen_push (op0
));
36428 emit_insn (gen_pop (gen_rtx_REG (word_mode
, FLAGS_REG
)));
36431 case IX86_BUILTIN_KTESTC8
:
36432 icode
= CODE_FOR_ktestqi
;
36436 case IX86_BUILTIN_KTESTZ8
:
36437 icode
= CODE_FOR_ktestqi
;
36441 case IX86_BUILTIN_KTESTC16
:
36442 icode
= CODE_FOR_ktesthi
;
36446 case IX86_BUILTIN_KTESTZ16
:
36447 icode
= CODE_FOR_ktesthi
;
36451 case IX86_BUILTIN_KTESTC32
:
36452 icode
= CODE_FOR_ktestsi
;
36456 case IX86_BUILTIN_KTESTZ32
:
36457 icode
= CODE_FOR_ktestsi
;
36461 case IX86_BUILTIN_KTESTC64
:
36462 icode
= CODE_FOR_ktestdi
;
36466 case IX86_BUILTIN_KTESTZ64
:
36467 icode
= CODE_FOR_ktestdi
;
36471 case IX86_BUILTIN_KORTESTC8
:
36472 icode
= CODE_FOR_kortestqi
;
36476 case IX86_BUILTIN_KORTESTZ8
:
36477 icode
= CODE_FOR_kortestqi
;
36481 case IX86_BUILTIN_KORTESTC16
:
36482 icode
= CODE_FOR_kortesthi
;
36486 case IX86_BUILTIN_KORTESTZ16
:
36487 icode
= CODE_FOR_kortesthi
;
36491 case IX86_BUILTIN_KORTESTC32
:
36492 icode
= CODE_FOR_kortestsi
;
36496 case IX86_BUILTIN_KORTESTZ32
:
36497 icode
= CODE_FOR_kortestsi
;
36501 case IX86_BUILTIN_KORTESTC64
:
36502 icode
= CODE_FOR_kortestdi
;
36506 case IX86_BUILTIN_KORTESTZ64
:
36507 icode
= CODE_FOR_kortestdi
;
36511 arg0
= CALL_EXPR_ARG (exp
, 0); /* Mask reg src1. */
36512 arg1
= CALL_EXPR_ARG (exp
, 1); /* Mask reg src2. */
36513 op0
= expand_normal (arg0
);
36514 op1
= expand_normal (arg1
);
36516 mode0
= insn_data
[icode
].operand
[0].mode
;
36517 mode1
= insn_data
[icode
].operand
[1].mode
;
36519 if (GET_MODE (op0
) != VOIDmode
)
36520 op0
= force_reg (GET_MODE (op0
), op0
);
36522 op0
= gen_lowpart (mode0
, op0
);
36524 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
36525 op0
= copy_to_mode_reg (mode0
, op0
);
36527 if (GET_MODE (op1
) != VOIDmode
)
36528 op1
= force_reg (GET_MODE (op1
), op1
);
36530 op1
= gen_lowpart (mode1
, op1
);
36532 if (!insn_data
[icode
].operand
[1].predicate (op1
, mode1
))
36533 op1
= copy_to_mode_reg (mode1
, op1
);
36535 target
= gen_reg_rtx (QImode
);
36537 /* Emit kortest. */
36538 emit_insn (GEN_FCN (icode
) (op0
, op1
));
36539 /* And use setcc to return result from flags. */
36540 ix86_expand_setcc (target
, EQ
,
36541 gen_rtx_REG (mode3
, FLAGS_REG
), const0_rtx
);
36544 case IX86_BUILTIN_GATHERSIV2DF
:
36545 icode
= CODE_FOR_avx2_gathersiv2df
;
36547 case IX86_BUILTIN_GATHERSIV4DF
:
36548 icode
= CODE_FOR_avx2_gathersiv4df
;
36550 case IX86_BUILTIN_GATHERDIV2DF
:
36551 icode
= CODE_FOR_avx2_gatherdiv2df
;
36553 case IX86_BUILTIN_GATHERDIV4DF
:
36554 icode
= CODE_FOR_avx2_gatherdiv4df
;
36556 case IX86_BUILTIN_GATHERSIV4SF
:
36557 icode
= CODE_FOR_avx2_gathersiv4sf
;
36559 case IX86_BUILTIN_GATHERSIV8SF
:
36560 icode
= CODE_FOR_avx2_gathersiv8sf
;
36562 case IX86_BUILTIN_GATHERDIV4SF
:
36563 icode
= CODE_FOR_avx2_gatherdiv4sf
;
36565 case IX86_BUILTIN_GATHERDIV8SF
:
36566 icode
= CODE_FOR_avx2_gatherdiv8sf
;
36568 case IX86_BUILTIN_GATHERSIV2DI
:
36569 icode
= CODE_FOR_avx2_gathersiv2di
;
36571 case IX86_BUILTIN_GATHERSIV4DI
:
36572 icode
= CODE_FOR_avx2_gathersiv4di
;
36574 case IX86_BUILTIN_GATHERDIV2DI
:
36575 icode
= CODE_FOR_avx2_gatherdiv2di
;
36577 case IX86_BUILTIN_GATHERDIV4DI
:
36578 icode
= CODE_FOR_avx2_gatherdiv4di
;
36580 case IX86_BUILTIN_GATHERSIV4SI
:
36581 icode
= CODE_FOR_avx2_gathersiv4si
;
36583 case IX86_BUILTIN_GATHERSIV8SI
:
36584 icode
= CODE_FOR_avx2_gathersiv8si
;
36586 case IX86_BUILTIN_GATHERDIV4SI
:
36587 icode
= CODE_FOR_avx2_gatherdiv4si
;
36589 case IX86_BUILTIN_GATHERDIV8SI
:
36590 icode
= CODE_FOR_avx2_gatherdiv8si
;
36592 case IX86_BUILTIN_GATHERALTSIV4DF
:
36593 icode
= CODE_FOR_avx2_gathersiv4df
;
36595 case IX86_BUILTIN_GATHERALTDIV8SF
:
36596 icode
= CODE_FOR_avx2_gatherdiv8sf
;
36598 case IX86_BUILTIN_GATHERALTSIV4DI
:
36599 icode
= CODE_FOR_avx2_gathersiv4di
;
36601 case IX86_BUILTIN_GATHERALTDIV8SI
:
36602 icode
= CODE_FOR_avx2_gatherdiv8si
;
36604 case IX86_BUILTIN_GATHER3SIV16SF
:
36605 icode
= CODE_FOR_avx512f_gathersiv16sf
;
36607 case IX86_BUILTIN_GATHER3SIV8DF
:
36608 icode
= CODE_FOR_avx512f_gathersiv8df
;
36610 case IX86_BUILTIN_GATHER3DIV16SF
:
36611 icode
= CODE_FOR_avx512f_gatherdiv16sf
;
36613 case IX86_BUILTIN_GATHER3DIV8DF
:
36614 icode
= CODE_FOR_avx512f_gatherdiv8df
;
36616 case IX86_BUILTIN_GATHER3SIV16SI
:
36617 icode
= CODE_FOR_avx512f_gathersiv16si
;
36619 case IX86_BUILTIN_GATHER3SIV8DI
:
36620 icode
= CODE_FOR_avx512f_gathersiv8di
;
36622 case IX86_BUILTIN_GATHER3DIV16SI
:
36623 icode
= CODE_FOR_avx512f_gatherdiv16si
;
36625 case IX86_BUILTIN_GATHER3DIV8DI
:
36626 icode
= CODE_FOR_avx512f_gatherdiv8di
;
36628 case IX86_BUILTIN_GATHER3ALTSIV8DF
:
36629 icode
= CODE_FOR_avx512f_gathersiv8df
;
36631 case IX86_BUILTIN_GATHER3ALTDIV16SF
:
36632 icode
= CODE_FOR_avx512f_gatherdiv16sf
;
36634 case IX86_BUILTIN_GATHER3ALTSIV8DI
:
36635 icode
= CODE_FOR_avx512f_gathersiv8di
;
36637 case IX86_BUILTIN_GATHER3ALTDIV16SI
:
36638 icode
= CODE_FOR_avx512f_gatherdiv16si
;
36640 case IX86_BUILTIN_GATHER3SIV2DF
:
36641 icode
= CODE_FOR_avx512vl_gathersiv2df
;
36643 case IX86_BUILTIN_GATHER3SIV4DF
:
36644 icode
= CODE_FOR_avx512vl_gathersiv4df
;
36646 case IX86_BUILTIN_GATHER3DIV2DF
:
36647 icode
= CODE_FOR_avx512vl_gatherdiv2df
;
36649 case IX86_BUILTIN_GATHER3DIV4DF
:
36650 icode
= CODE_FOR_avx512vl_gatherdiv4df
;
36652 case IX86_BUILTIN_GATHER3SIV4SF
:
36653 icode
= CODE_FOR_avx512vl_gathersiv4sf
;
36655 case IX86_BUILTIN_GATHER3SIV8SF
:
36656 icode
= CODE_FOR_avx512vl_gathersiv8sf
;
36658 case IX86_BUILTIN_GATHER3DIV4SF
:
36659 icode
= CODE_FOR_avx512vl_gatherdiv4sf
;
36661 case IX86_BUILTIN_GATHER3DIV8SF
:
36662 icode
= CODE_FOR_avx512vl_gatherdiv8sf
;
36664 case IX86_BUILTIN_GATHER3SIV2DI
:
36665 icode
= CODE_FOR_avx512vl_gathersiv2di
;
36667 case IX86_BUILTIN_GATHER3SIV4DI
:
36668 icode
= CODE_FOR_avx512vl_gathersiv4di
;
36670 case IX86_BUILTIN_GATHER3DIV2DI
:
36671 icode
= CODE_FOR_avx512vl_gatherdiv2di
;
36673 case IX86_BUILTIN_GATHER3DIV4DI
:
36674 icode
= CODE_FOR_avx512vl_gatherdiv4di
;
36676 case IX86_BUILTIN_GATHER3SIV4SI
:
36677 icode
= CODE_FOR_avx512vl_gathersiv4si
;
36679 case IX86_BUILTIN_GATHER3SIV8SI
:
36680 icode
= CODE_FOR_avx512vl_gathersiv8si
;
36682 case IX86_BUILTIN_GATHER3DIV4SI
:
36683 icode
= CODE_FOR_avx512vl_gatherdiv4si
;
36685 case IX86_BUILTIN_GATHER3DIV8SI
:
36686 icode
= CODE_FOR_avx512vl_gatherdiv8si
;
36688 case IX86_BUILTIN_GATHER3ALTSIV4DF
:
36689 icode
= CODE_FOR_avx512vl_gathersiv4df
;
36691 case IX86_BUILTIN_GATHER3ALTDIV8SF
:
36692 icode
= CODE_FOR_avx512vl_gatherdiv8sf
;
36694 case IX86_BUILTIN_GATHER3ALTSIV4DI
:
36695 icode
= CODE_FOR_avx512vl_gathersiv4di
;
36697 case IX86_BUILTIN_GATHER3ALTDIV8SI
:
36698 icode
= CODE_FOR_avx512vl_gatherdiv8si
;
36700 case IX86_BUILTIN_SCATTERSIV16SF
:
36701 icode
= CODE_FOR_avx512f_scattersiv16sf
;
36703 case IX86_BUILTIN_SCATTERSIV8DF
:
36704 icode
= CODE_FOR_avx512f_scattersiv8df
;
36706 case IX86_BUILTIN_SCATTERDIV16SF
:
36707 icode
= CODE_FOR_avx512f_scatterdiv16sf
;
36709 case IX86_BUILTIN_SCATTERDIV8DF
:
36710 icode
= CODE_FOR_avx512f_scatterdiv8df
;
36712 case IX86_BUILTIN_SCATTERSIV16SI
:
36713 icode
= CODE_FOR_avx512f_scattersiv16si
;
36715 case IX86_BUILTIN_SCATTERSIV8DI
:
36716 icode
= CODE_FOR_avx512f_scattersiv8di
;
36718 case IX86_BUILTIN_SCATTERDIV16SI
:
36719 icode
= CODE_FOR_avx512f_scatterdiv16si
;
36721 case IX86_BUILTIN_SCATTERDIV8DI
:
36722 icode
= CODE_FOR_avx512f_scatterdiv8di
;
36724 case IX86_BUILTIN_SCATTERSIV8SF
:
36725 icode
= CODE_FOR_avx512vl_scattersiv8sf
;
36727 case IX86_BUILTIN_SCATTERSIV4SF
:
36728 icode
= CODE_FOR_avx512vl_scattersiv4sf
;
36730 case IX86_BUILTIN_SCATTERSIV4DF
:
36731 icode
= CODE_FOR_avx512vl_scattersiv4df
;
36733 case IX86_BUILTIN_SCATTERSIV2DF
:
36734 icode
= CODE_FOR_avx512vl_scattersiv2df
;
36736 case IX86_BUILTIN_SCATTERDIV8SF
:
36737 icode
= CODE_FOR_avx512vl_scatterdiv8sf
;
36739 case IX86_BUILTIN_SCATTERDIV4SF
:
36740 icode
= CODE_FOR_avx512vl_scatterdiv4sf
;
36742 case IX86_BUILTIN_SCATTERDIV4DF
:
36743 icode
= CODE_FOR_avx512vl_scatterdiv4df
;
36745 case IX86_BUILTIN_SCATTERDIV2DF
:
36746 icode
= CODE_FOR_avx512vl_scatterdiv2df
;
36748 case IX86_BUILTIN_SCATTERSIV8SI
:
36749 icode
= CODE_FOR_avx512vl_scattersiv8si
;
36751 case IX86_BUILTIN_SCATTERSIV4SI
:
36752 icode
= CODE_FOR_avx512vl_scattersiv4si
;
36754 case IX86_BUILTIN_SCATTERSIV4DI
:
36755 icode
= CODE_FOR_avx512vl_scattersiv4di
;
36757 case IX86_BUILTIN_SCATTERSIV2DI
:
36758 icode
= CODE_FOR_avx512vl_scattersiv2di
;
36760 case IX86_BUILTIN_SCATTERDIV8SI
:
36761 icode
= CODE_FOR_avx512vl_scatterdiv8si
;
36763 case IX86_BUILTIN_SCATTERDIV4SI
:
36764 icode
= CODE_FOR_avx512vl_scatterdiv4si
;
36766 case IX86_BUILTIN_SCATTERDIV4DI
:
36767 icode
= CODE_FOR_avx512vl_scatterdiv4di
;
36769 case IX86_BUILTIN_SCATTERDIV2DI
:
36770 icode
= CODE_FOR_avx512vl_scatterdiv2di
;
36772 case IX86_BUILTIN_GATHERPFDPD
:
36773 icode
= CODE_FOR_avx512pf_gatherpfv8sidf
;
36774 goto vec_prefetch_gen
;
36775 case IX86_BUILTIN_SCATTERALTSIV8DF
:
36776 icode
= CODE_FOR_avx512f_scattersiv8df
;
36778 case IX86_BUILTIN_SCATTERALTDIV16SF
:
36779 icode
= CODE_FOR_avx512f_scatterdiv16sf
;
36781 case IX86_BUILTIN_SCATTERALTSIV8DI
:
36782 icode
= CODE_FOR_avx512f_scattersiv8di
;
36784 case IX86_BUILTIN_SCATTERALTDIV16SI
:
36785 icode
= CODE_FOR_avx512f_scatterdiv16si
;
36787 case IX86_BUILTIN_GATHERPFDPS
:
36788 icode
= CODE_FOR_avx512pf_gatherpfv16sisf
;
36789 goto vec_prefetch_gen
;
36790 case IX86_BUILTIN_GATHERPFQPD
:
36791 icode
= CODE_FOR_avx512pf_gatherpfv8didf
;
36792 goto vec_prefetch_gen
;
36793 case IX86_BUILTIN_GATHERPFQPS
:
36794 icode
= CODE_FOR_avx512pf_gatherpfv8disf
;
36795 goto vec_prefetch_gen
;
36796 case IX86_BUILTIN_SCATTERPFDPD
:
36797 icode
= CODE_FOR_avx512pf_scatterpfv8sidf
;
36798 goto vec_prefetch_gen
;
36799 case IX86_BUILTIN_SCATTERPFDPS
:
36800 icode
= CODE_FOR_avx512pf_scatterpfv16sisf
;
36801 goto vec_prefetch_gen
;
36802 case IX86_BUILTIN_SCATTERPFQPD
:
36803 icode
= CODE_FOR_avx512pf_scatterpfv8didf
;
36804 goto vec_prefetch_gen
;
36805 case IX86_BUILTIN_SCATTERPFQPS
:
36806 icode
= CODE_FOR_avx512pf_scatterpfv8disf
;
36807 goto vec_prefetch_gen
;
36811 rtx (*gen
) (rtx
, rtx
);
36813 arg0
= CALL_EXPR_ARG (exp
, 0);
36814 arg1
= CALL_EXPR_ARG (exp
, 1);
36815 arg2
= CALL_EXPR_ARG (exp
, 2);
36816 arg3
= CALL_EXPR_ARG (exp
, 3);
36817 arg4
= CALL_EXPR_ARG (exp
, 4);
36818 op0
= expand_normal (arg0
);
36819 op1
= expand_normal (arg1
);
36820 op2
= expand_normal (arg2
);
36821 op3
= expand_normal (arg3
);
36822 op4
= expand_normal (arg4
);
36823 /* Note the arg order is different from the operand order. */
36824 mode0
= insn_data
[icode
].operand
[1].mode
;
36825 mode2
= insn_data
[icode
].operand
[3].mode
;
36826 mode3
= insn_data
[icode
].operand
[4].mode
;
36827 mode4
= insn_data
[icode
].operand
[5].mode
;
36829 if (target
== NULL_RTX
36830 || GET_MODE (target
) != insn_data
[icode
].operand
[0].mode
36831 || !insn_data
[icode
].operand
[0].predicate (target
,
36832 GET_MODE (target
)))
36833 subtarget
= gen_reg_rtx (insn_data
[icode
].operand
[0].mode
);
36835 subtarget
= target
;
36839 case IX86_BUILTIN_GATHER3ALTSIV8DF
:
36840 case IX86_BUILTIN_GATHER3ALTSIV8DI
:
36841 half
= gen_reg_rtx (V8SImode
);
36842 if (!nonimmediate_operand (op2
, V16SImode
))
36843 op2
= copy_to_mode_reg (V16SImode
, op2
);
36844 emit_insn (gen_vec_extract_lo_v16si (half
, op2
));
36847 case IX86_BUILTIN_GATHER3ALTSIV4DF
:
36848 case IX86_BUILTIN_GATHER3ALTSIV4DI
:
36849 case IX86_BUILTIN_GATHERALTSIV4DF
:
36850 case IX86_BUILTIN_GATHERALTSIV4DI
:
36851 half
= gen_reg_rtx (V4SImode
);
36852 if (!nonimmediate_operand (op2
, V8SImode
))
36853 op2
= copy_to_mode_reg (V8SImode
, op2
);
36854 emit_insn (gen_vec_extract_lo_v8si (half
, op2
));
36857 case IX86_BUILTIN_GATHER3ALTDIV16SF
:
36858 case IX86_BUILTIN_GATHER3ALTDIV16SI
:
36859 half
= gen_reg_rtx (mode0
);
36860 if (mode0
== V8SFmode
)
36861 gen
= gen_vec_extract_lo_v16sf
;
36863 gen
= gen_vec_extract_lo_v16si
;
36864 if (!nonimmediate_operand (op0
, GET_MODE (op0
)))
36865 op0
= copy_to_mode_reg (GET_MODE (op0
), op0
);
36866 emit_insn (gen (half
, op0
));
36868 if (GET_MODE (op3
) != VOIDmode
)
36870 if (!nonimmediate_operand (op3
, GET_MODE (op3
)))
36871 op3
= copy_to_mode_reg (GET_MODE (op3
), op3
);
36872 emit_insn (gen (half
, op3
));
36876 case IX86_BUILTIN_GATHER3ALTDIV8SF
:
36877 case IX86_BUILTIN_GATHER3ALTDIV8SI
:
36878 case IX86_BUILTIN_GATHERALTDIV8SF
:
36879 case IX86_BUILTIN_GATHERALTDIV8SI
:
36880 half
= gen_reg_rtx (mode0
);
36881 if (mode0
== V4SFmode
)
36882 gen
= gen_vec_extract_lo_v8sf
;
36884 gen
= gen_vec_extract_lo_v8si
;
36885 if (!nonimmediate_operand (op0
, GET_MODE (op0
)))
36886 op0
= copy_to_mode_reg (GET_MODE (op0
), op0
);
36887 emit_insn (gen (half
, op0
));
36889 if (GET_MODE (op3
) != VOIDmode
)
36891 if (!nonimmediate_operand (op3
, GET_MODE (op3
)))
36892 op3
= copy_to_mode_reg (GET_MODE (op3
), op3
);
36893 emit_insn (gen (half
, op3
));
36901 /* Force memory operand only with base register here. But we
36902 don't want to do it on memory operand for other builtin
36904 op1
= ix86_zero_extend_to_Pmode (op1
);
36906 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
36907 op0
= copy_to_mode_reg (mode0
, op0
);
36908 if (!insn_data
[icode
].operand
[2].predicate (op1
, Pmode
))
36909 op1
= copy_to_mode_reg (Pmode
, op1
);
36910 if (!insn_data
[icode
].operand
[3].predicate (op2
, mode2
))
36911 op2
= copy_to_mode_reg (mode2
, op2
);
36913 op3
= fixup_modeless_constant (op3
, mode3
);
36915 if (GET_MODE (op3
) == mode3
|| GET_MODE (op3
) == VOIDmode
)
36917 if (!insn_data
[icode
].operand
[4].predicate (op3
, mode3
))
36918 op3
= copy_to_mode_reg (mode3
, op3
);
36922 op3
= copy_to_reg (op3
);
36923 op3
= lowpart_subreg (mode3
, op3
, GET_MODE (op3
));
36925 if (!insn_data
[icode
].operand
[5].predicate (op4
, mode4
))
36927 error ("the last argument must be scale 1, 2, 4, 8");
36931 /* Optimize. If mask is known to have all high bits set,
36932 replace op0 with pc_rtx to signal that the instruction
36933 overwrites the whole destination and doesn't use its
36934 previous contents. */
36937 if (TREE_CODE (arg3
) == INTEGER_CST
)
36939 if (integer_all_onesp (arg3
))
36942 else if (TREE_CODE (arg3
) == VECTOR_CST
)
36944 unsigned int negative
= 0;
36945 for (i
= 0; i
< VECTOR_CST_NELTS (arg3
); ++i
)
36947 tree cst
= VECTOR_CST_ELT (arg3
, i
);
36948 if (TREE_CODE (cst
) == INTEGER_CST
36949 && tree_int_cst_sign_bit (cst
))
36951 else if (TREE_CODE (cst
) == REAL_CST
36952 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst
)))
36955 if (negative
== TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3
)))
36958 else if (TREE_CODE (arg3
) == SSA_NAME
36959 && TREE_CODE (TREE_TYPE (arg3
)) == VECTOR_TYPE
)
36961 /* Recognize also when mask is like:
36962 __v2df src = _mm_setzero_pd ();
36963 __v2df mask = _mm_cmpeq_pd (src, src);
36965 __v8sf src = _mm256_setzero_ps ();
36966 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
36967 as that is a cheaper way to load all ones into
36968 a register than having to load a constant from
36970 gimple
*def_stmt
= SSA_NAME_DEF_STMT (arg3
);
36971 if (is_gimple_call (def_stmt
))
36973 tree fndecl
= gimple_call_fndecl (def_stmt
);
36975 && DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
36976 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl
))
36978 case IX86_BUILTIN_CMPPD
:
36979 case IX86_BUILTIN_CMPPS
:
36980 case IX86_BUILTIN_CMPPD256
:
36981 case IX86_BUILTIN_CMPPS256
:
36982 if (!integer_zerop (gimple_call_arg (def_stmt
, 2)))
36985 case IX86_BUILTIN_CMPEQPD
:
36986 case IX86_BUILTIN_CMPEQPS
:
36987 if (initializer_zerop (gimple_call_arg (def_stmt
, 0))
36988 && initializer_zerop (gimple_call_arg (def_stmt
,
36999 pat
= GEN_FCN (icode
) (subtarget
, op0
, op1
, op2
, op3
, op4
);
37006 case IX86_BUILTIN_GATHER3DIV16SF
:
37007 if (target
== NULL_RTX
)
37008 target
= gen_reg_rtx (V8SFmode
);
37009 emit_insn (gen_vec_extract_lo_v16sf (target
, subtarget
));
37011 case IX86_BUILTIN_GATHER3DIV16SI
:
37012 if (target
== NULL_RTX
)
37013 target
= gen_reg_rtx (V8SImode
);
37014 emit_insn (gen_vec_extract_lo_v16si (target
, subtarget
));
37016 case IX86_BUILTIN_GATHER3DIV8SF
:
37017 case IX86_BUILTIN_GATHERDIV8SF
:
37018 if (target
== NULL_RTX
)
37019 target
= gen_reg_rtx (V4SFmode
);
37020 emit_insn (gen_vec_extract_lo_v8sf (target
, subtarget
));
37022 case IX86_BUILTIN_GATHER3DIV8SI
:
37023 case IX86_BUILTIN_GATHERDIV8SI
:
37024 if (target
== NULL_RTX
)
37025 target
= gen_reg_rtx (V4SImode
);
37026 emit_insn (gen_vec_extract_lo_v8si (target
, subtarget
));
37029 target
= subtarget
;
37035 arg0
= CALL_EXPR_ARG (exp
, 0);
37036 arg1
= CALL_EXPR_ARG (exp
, 1);
37037 arg2
= CALL_EXPR_ARG (exp
, 2);
37038 arg3
= CALL_EXPR_ARG (exp
, 3);
37039 arg4
= CALL_EXPR_ARG (exp
, 4);
37040 op0
= expand_normal (arg0
);
37041 op1
= expand_normal (arg1
);
37042 op2
= expand_normal (arg2
);
37043 op3
= expand_normal (arg3
);
37044 op4
= expand_normal (arg4
);
37045 mode1
= insn_data
[icode
].operand
[1].mode
;
37046 mode2
= insn_data
[icode
].operand
[2].mode
;
37047 mode3
= insn_data
[icode
].operand
[3].mode
;
37048 mode4
= insn_data
[icode
].operand
[4].mode
;
37050 /* Scatter instruction stores operand op3 to memory with
37051 indices from op2 and scale from op4 under writemask op1.
37052 If index operand op2 has more elements then source operand
37053 op3 one need to use only its low half. And vice versa. */
37056 case IX86_BUILTIN_SCATTERALTSIV8DF
:
37057 case IX86_BUILTIN_SCATTERALTSIV8DI
:
37058 half
= gen_reg_rtx (V8SImode
);
37059 if (!nonimmediate_operand (op2
, V16SImode
))
37060 op2
= copy_to_mode_reg (V16SImode
, op2
);
37061 emit_insn (gen_vec_extract_lo_v16si (half
, op2
));
37064 case IX86_BUILTIN_SCATTERALTDIV16SF
:
37065 case IX86_BUILTIN_SCATTERALTDIV16SI
:
37066 half
= gen_reg_rtx (mode3
);
37067 if (mode3
== V8SFmode
)
37068 gen
= gen_vec_extract_lo_v16sf
;
37070 gen
= gen_vec_extract_lo_v16si
;
37071 if (!nonimmediate_operand (op3
, GET_MODE (op3
)))
37072 op3
= copy_to_mode_reg (GET_MODE (op3
), op3
);
37073 emit_insn (gen (half
, op3
));
37080 /* Force memory operand only with base register here. But we
37081 don't want to do it on memory operand for other builtin
37083 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
37085 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
37086 op0
= copy_to_mode_reg (Pmode
, op0
);
37088 op1
= fixup_modeless_constant (op1
, mode1
);
37090 if (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
)
37092 if (!insn_data
[icode
].operand
[1].predicate (op1
, mode1
))
37093 op1
= copy_to_mode_reg (mode1
, op1
);
37097 op1
= copy_to_reg (op1
);
37098 op1
= lowpart_subreg (mode1
, op1
, GET_MODE (op1
));
37101 if (!insn_data
[icode
].operand
[2].predicate (op2
, mode2
))
37102 op2
= copy_to_mode_reg (mode2
, op2
);
37104 if (!insn_data
[icode
].operand
[3].predicate (op3
, mode3
))
37105 op3
= copy_to_mode_reg (mode3
, op3
);
37107 if (!insn_data
[icode
].operand
[4].predicate (op4
, mode4
))
37109 error ("the last argument must be scale 1, 2, 4, 8");
37113 pat
= GEN_FCN (icode
) (op0
, op1
, op2
, op3
, op4
);
37121 arg0
= CALL_EXPR_ARG (exp
, 0);
37122 arg1
= CALL_EXPR_ARG (exp
, 1);
37123 arg2
= CALL_EXPR_ARG (exp
, 2);
37124 arg3
= CALL_EXPR_ARG (exp
, 3);
37125 arg4
= CALL_EXPR_ARG (exp
, 4);
37126 op0
= expand_normal (arg0
);
37127 op1
= expand_normal (arg1
);
37128 op2
= expand_normal (arg2
);
37129 op3
= expand_normal (arg3
);
37130 op4
= expand_normal (arg4
);
37131 mode0
= insn_data
[icode
].operand
[0].mode
;
37132 mode1
= insn_data
[icode
].operand
[1].mode
;
37133 mode3
= insn_data
[icode
].operand
[3].mode
;
37134 mode4
= insn_data
[icode
].operand
[4].mode
;
37136 op0
= fixup_modeless_constant (op0
, mode0
);
37138 if (GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
37140 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
37141 op0
= copy_to_mode_reg (mode0
, op0
);
37145 op0
= copy_to_reg (op0
);
37146 op0
= lowpart_subreg (mode0
, op0
, GET_MODE (op0
));
37149 if (!insn_data
[icode
].operand
[1].predicate (op1
, mode1
))
37150 op1
= copy_to_mode_reg (mode1
, op1
);
37152 /* Force memory operand only with base register here. But we
37153 don't want to do it on memory operand for other builtin
37155 op2
= force_reg (Pmode
, convert_to_mode (Pmode
, op2
, 1));
37157 if (!insn_data
[icode
].operand
[2].predicate (op2
, Pmode
))
37158 op2
= copy_to_mode_reg (Pmode
, op2
);
37160 if (!insn_data
[icode
].operand
[3].predicate (op3
, mode3
))
37162 error ("the forth argument must be scale 1, 2, 4, 8");
37166 if (!insn_data
[icode
].operand
[4].predicate (op4
, mode4
))
37168 error ("incorrect hint operand");
37172 pat
= GEN_FCN (icode
) (op0
, op1
, op2
, op3
, op4
);
37180 case IX86_BUILTIN_XABORT
:
37181 icode
= CODE_FOR_xabort
;
37182 arg0
= CALL_EXPR_ARG (exp
, 0);
37183 op0
= expand_normal (arg0
);
37184 mode0
= insn_data
[icode
].operand
[0].mode
;
37185 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
37187 error ("the xabort's argument must be an 8-bit immediate");
37190 emit_insn (gen_xabort (op0
));
37193 case IX86_BUILTIN_RSTORSSP
:
37194 case IX86_BUILTIN_CLRSSBSY
:
37195 arg0
= CALL_EXPR_ARG (exp
, 0);
37196 op0
= expand_normal (arg0
);
37197 icode
= (fcode
== IX86_BUILTIN_RSTORSSP
37198 ? CODE_FOR_rstorssp
37199 : CODE_FOR_clrssbsy
);
37200 if (!address_operand (op0
, VOIDmode
))
37202 op1
= convert_memory_address (Pmode
, op0
);
37203 op0
= copy_addr_to_reg (op1
);
37205 emit_insn (GEN_FCN (icode
) (gen_rtx_MEM (Pmode
, op0
)));
37208 case IX86_BUILTIN_WRSSD
:
37209 case IX86_BUILTIN_WRSSQ
:
37210 case IX86_BUILTIN_WRUSSD
:
37211 case IX86_BUILTIN_WRUSSQ
:
37212 arg0
= CALL_EXPR_ARG (exp
, 0);
37213 op0
= expand_normal (arg0
);
37214 arg1
= CALL_EXPR_ARG (exp
, 1);
37215 op1
= expand_normal (arg1
);
37218 case IX86_BUILTIN_WRSSD
:
37219 icode
= CODE_FOR_wrsssi
;
37222 case IX86_BUILTIN_WRSSQ
:
37223 icode
= CODE_FOR_wrssdi
;
37226 case IX86_BUILTIN_WRUSSD
:
37227 icode
= CODE_FOR_wrusssi
;
37230 case IX86_BUILTIN_WRUSSQ
:
37231 icode
= CODE_FOR_wrussdi
;
37235 op0
= force_reg (mode
, op0
);
37236 if (!address_operand (op1
, VOIDmode
))
37238 op2
= convert_memory_address (Pmode
, op1
);
37239 op1
= copy_addr_to_reg (op2
);
37241 emit_insn (GEN_FCN (icode
) (op0
, gen_rtx_MEM (mode
, op1
)));
37248 if (fcode
>= IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST
37249 && fcode
<= IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST
)
37251 i
= fcode
- IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST
;
37252 return ix86_expand_special_args_builtin (bdesc_special_args
+ i
, exp
,
37256 if (fcode
>= IX86_BUILTIN__BDESC_ARGS_FIRST
37257 && fcode
<= IX86_BUILTIN__BDESC_ARGS_LAST
)
37259 i
= fcode
- IX86_BUILTIN__BDESC_ARGS_FIRST
;
37262 case IX86_BUILTIN_FABSQ
:
37263 case IX86_BUILTIN_COPYSIGNQ
:
37265 /* Emit a normal call if SSE isn't available. */
37266 return expand_call (exp
, target
, ignore
);
37269 return ix86_expand_args_builtin (bdesc_args
+ i
, exp
, target
);
37273 if (fcode
>= IX86_BUILTIN__BDESC_ARGS2_FIRST
37274 && fcode
<= IX86_BUILTIN__BDESC_ARGS2_LAST
)
37276 i
= fcode
- IX86_BUILTIN__BDESC_ARGS2_FIRST
;
37277 rtx (*fcn
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
37278 rtx (*fcn_mask
) (rtx
, rtx
, rtx
, rtx
, rtx
);
37279 rtx (*fcn_maskz
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
37281 machine_mode mode
, wide_mode
, nar_mode
;
37283 nar_mode
= V4SFmode
;
37285 wide_mode
= V64SFmode
;
37286 fcn_mask
= gen_avx5124fmaddps_4fmaddps_mask
;
37287 fcn_maskz
= gen_avx5124fmaddps_4fmaddps_maskz
;
37291 case IX86_BUILTIN_4FMAPS
:
37292 fcn
= gen_avx5124fmaddps_4fmaddps
;
37296 case IX86_BUILTIN_4DPWSSD
:
37297 nar_mode
= V4SImode
;
37299 wide_mode
= V64SImode
;
37300 fcn
= gen_avx5124vnniw_vp4dpwssd
;
37304 case IX86_BUILTIN_4DPWSSDS
:
37305 nar_mode
= V4SImode
;
37307 wide_mode
= V64SImode
;
37308 fcn
= gen_avx5124vnniw_vp4dpwssds
;
37312 case IX86_BUILTIN_4FNMAPS
:
37313 fcn
= gen_avx5124fmaddps_4fnmaddps
;
37317 case IX86_BUILTIN_4FNMAPS_MASK
:
37318 fcn_mask
= gen_avx5124fmaddps_4fnmaddps_mask
;
37319 fcn_maskz
= gen_avx5124fmaddps_4fnmaddps_maskz
;
37322 case IX86_BUILTIN_4DPWSSD_MASK
:
37323 nar_mode
= V4SImode
;
37325 wide_mode
= V64SImode
;
37326 fcn_mask
= gen_avx5124vnniw_vp4dpwssd_mask
;
37327 fcn_maskz
= gen_avx5124vnniw_vp4dpwssd_maskz
;
37330 case IX86_BUILTIN_4DPWSSDS_MASK
:
37331 nar_mode
= V4SImode
;
37333 wide_mode
= V64SImode
;
37334 fcn_mask
= gen_avx5124vnniw_vp4dpwssds_mask
;
37335 fcn_maskz
= gen_avx5124vnniw_vp4dpwssds_maskz
;
37338 case IX86_BUILTIN_4FMAPS_MASK
:
37348 wide_reg
= gen_reg_rtx (wide_mode
);
37349 for (i
= 0; i
< 4; i
++)
37351 args
[i
] = CALL_EXPR_ARG (exp
, i
);
37352 ops
[i
] = expand_normal (args
[i
]);
37354 emit_move_insn (gen_rtx_SUBREG (mode
, wide_reg
, i
* 64),
37358 accum
= expand_normal (CALL_EXPR_ARG (exp
, 4));
37359 accum
= force_reg (mode
, accum
);
37361 addr
= expand_normal (CALL_EXPR_ARG (exp
, 5));
37362 addr
= force_reg (Pmode
, addr
);
37364 mem
= gen_rtx_MEM (nar_mode
, addr
);
37366 target
= gen_reg_rtx (mode
);
37368 emit_move_insn (target
, accum
);
37371 emit_insn (fcn (target
, accum
, wide_reg
, mem
));
37375 merge
= expand_normal (CALL_EXPR_ARG (exp
, 6));
37377 mask
= expand_normal (CALL_EXPR_ARG (exp
, 7));
37379 if (CONST_INT_P (mask
))
37380 mask
= fixup_modeless_constant (mask
, HImode
);
37382 mask
= force_reg (HImode
, mask
);
37384 if (GET_MODE (mask
) != HImode
)
37385 mask
= gen_rtx_SUBREG (HImode
, mask
, 0);
37387 /* If merge is 0 then we're about to emit z-masked variant. */
37388 if (const0_operand (merge
, mode
))
37389 emit_insn (fcn_maskz (target
, accum
, wide_reg
, mem
, merge
, mask
));
37390 /* If merge is the same as accum then emit merge-masked variant. */
37391 else if (CALL_EXPR_ARG (exp
, 6) == CALL_EXPR_ARG (exp
, 4))
37393 merge
= force_reg (mode
, merge
);
37394 emit_insn (fcn_mask (target
, wide_reg
, mem
, merge
, mask
));
37396 /* Merge with something unknown might happen if we z-mask w/ -O0. */
37399 target
= gen_reg_rtx (mode
);
37400 emit_move_insn (target
, merge
);
37401 emit_insn (fcn_mask (target
, wide_reg
, mem
, target
, mask
));
37407 case IX86_BUILTIN_4FNMASS
:
37408 fcn
= gen_avx5124fmaddps_4fnmaddss
;
37412 case IX86_BUILTIN_4FMASS
:
37413 fcn
= gen_avx5124fmaddps_4fmaddss
;
37417 case IX86_BUILTIN_4FNMASS_MASK
:
37418 fcn_mask
= gen_avx5124fmaddps_4fnmaddss_mask
;
37419 fcn_maskz
= gen_avx5124fmaddps_4fnmaddss_maskz
;
37422 case IX86_BUILTIN_4FMASS_MASK
:
37431 fcn_mask
= gen_avx5124fmaddps_4fmaddss_mask
;
37432 fcn_maskz
= gen_avx5124fmaddps_4fmaddss_maskz
;
37436 wide_reg
= gen_reg_rtx (V64SFmode
);
37437 for (i
= 0; i
< 4; i
++)
37440 args
[i
] = CALL_EXPR_ARG (exp
, i
);
37441 ops
[i
] = expand_normal (args
[i
]);
37443 tmp
= gen_reg_rtx (SFmode
);
37444 emit_move_insn (tmp
, gen_rtx_SUBREG (SFmode
, ops
[i
], 0));
37446 emit_move_insn (gen_rtx_SUBREG (V16SFmode
, wide_reg
, i
* 64),
37447 gen_rtx_SUBREG (V16SFmode
, tmp
, 0));
37450 accum
= expand_normal (CALL_EXPR_ARG (exp
, 4));
37451 accum
= force_reg (V4SFmode
, accum
);
37453 addr
= expand_normal (CALL_EXPR_ARG (exp
, 5));
37454 addr
= force_reg (Pmode
, addr
);
37456 mem
= gen_rtx_MEM (V4SFmode
, addr
);
37458 target
= gen_reg_rtx (V4SFmode
);
37460 emit_move_insn (target
, accum
);
37463 emit_insn (fcn (target
, accum
, wide_reg
, mem
));
37467 merge
= expand_normal (CALL_EXPR_ARG (exp
, 6));
37469 mask
= expand_normal (CALL_EXPR_ARG (exp
, 7));
37471 if (CONST_INT_P (mask
))
37472 mask
= fixup_modeless_constant (mask
, QImode
);
37474 mask
= force_reg (QImode
, mask
);
37476 if (GET_MODE (mask
) != QImode
)
37477 mask
= gen_rtx_SUBREG (QImode
, mask
, 0);
37479 /* If merge is 0 then we're about to emit z-masked variant. */
37480 if (const0_operand (merge
, mode
))
37481 emit_insn (fcn_maskz (target
, accum
, wide_reg
, mem
, merge
, mask
));
37482 /* If merge is the same as accum then emit merge-masked
37484 else if (CALL_EXPR_ARG (exp
, 6) == CALL_EXPR_ARG (exp
, 4))
37486 merge
= force_reg (mode
, merge
);
37487 emit_insn (fcn_mask (target
, wide_reg
, mem
, merge
, mask
));
37489 /* Merge with something unknown might happen if we z-mask
37493 target
= gen_reg_rtx (mode
);
37494 emit_move_insn (target
, merge
);
37495 emit_insn (fcn_mask (target
, wide_reg
, mem
, target
, mask
));
37500 case IX86_BUILTIN_RDPID
:
37501 return ix86_expand_special_args_builtin (bdesc_args2
+ i
, exp
,
37504 return ix86_expand_args_builtin (bdesc_args2
+ i
, exp
, target
);
37508 if (fcode
>= IX86_BUILTIN__BDESC_SPECIAL_ARGS2_FIRST
37509 && fcode
<= IX86_BUILTIN__BDESC_SPECIAL_ARGS2_LAST
)
37511 i
= fcode
- IX86_BUILTIN__BDESC_SPECIAL_ARGS2_FIRST
;
37512 return ix86_expand_special_args_builtin (bdesc_special_args2
+ i
, exp
,
37516 if (fcode
>= IX86_BUILTIN__BDESC_COMI_FIRST
37517 && fcode
<= IX86_BUILTIN__BDESC_COMI_LAST
)
37519 i
= fcode
- IX86_BUILTIN__BDESC_COMI_FIRST
;
37520 return ix86_expand_sse_comi (bdesc_comi
+ i
, exp
, target
);
37523 if (fcode
>= IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST
37524 && fcode
<= IX86_BUILTIN__BDESC_ROUND_ARGS_LAST
)
37526 i
= fcode
- IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST
;
37527 return ix86_expand_round_builtin (bdesc_round_args
+ i
, exp
, target
);
37530 if (fcode
>= IX86_BUILTIN__BDESC_PCMPESTR_FIRST
37531 && fcode
<= IX86_BUILTIN__BDESC_PCMPESTR_LAST
)
37533 i
= fcode
- IX86_BUILTIN__BDESC_PCMPESTR_FIRST
;
37534 return ix86_expand_sse_pcmpestr (bdesc_pcmpestr
+ i
, exp
, target
);
37537 if (fcode
>= IX86_BUILTIN__BDESC_PCMPISTR_FIRST
37538 && fcode
<= IX86_BUILTIN__BDESC_PCMPISTR_LAST
)
37540 i
= fcode
- IX86_BUILTIN__BDESC_PCMPISTR_FIRST
;
37541 return ix86_expand_sse_pcmpistr (bdesc_pcmpistr
+ i
, exp
, target
);
37544 if (fcode
>= IX86_BUILTIN__BDESC_MULTI_ARG_FIRST
37545 && fcode
<= IX86_BUILTIN__BDESC_MULTI_ARG_LAST
)
37547 i
= fcode
- IX86_BUILTIN__BDESC_MULTI_ARG_FIRST
;
37548 const struct builtin_description
*d
= bdesc_multi_arg
+ i
;
37549 return ix86_expand_multi_arg_builtin (d
->icode
, exp
, target
,
37550 (enum ix86_builtin_func_type
)
37551 d
->flag
, d
->comparison
);
37554 if (fcode
>= IX86_BUILTIN__BDESC_CET_FIRST
37555 && fcode
<= IX86_BUILTIN__BDESC_CET_LAST
)
37557 i
= fcode
- IX86_BUILTIN__BDESC_CET_FIRST
;
37558 return ix86_expand_special_args_builtin (bdesc_cet
+ i
, exp
,
37562 if (fcode
>= IX86_BUILTIN__BDESC_CET_NORMAL_FIRST
37563 && fcode
<= IX86_BUILTIN__BDESC_CET_NORMAL_LAST
)
37565 i
= fcode
- IX86_BUILTIN__BDESC_CET_NORMAL_FIRST
;
37566 return ix86_expand_args_builtin (bdesc_cet_rdssp
+ i
, exp
,
37570 gcc_unreachable ();
37573 /* This returns the target-specific builtin with code CODE if
37574 current_function_decl has visibility on this builtin, which is checked
37575 using isa flags. Returns NULL_TREE otherwise. */
37577 static tree
ix86_get_builtin (enum ix86_builtins code
)
37579 struct cl_target_option
*opts
;
37580 tree target_tree
= NULL_TREE
;
37582 /* Determine the isa flags of current_function_decl. */
37584 if (current_function_decl
)
37585 target_tree
= DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl
);
37587 if (target_tree
== NULL
)
37588 target_tree
= target_option_default_node
;
37590 opts
= TREE_TARGET_OPTION (target_tree
);
37592 if ((ix86_builtins_isa
[(int) code
].isa
& opts
->x_ix86_isa_flags
)
37593 || (ix86_builtins_isa
[(int) code
].isa2
& opts
->x_ix86_isa_flags2
))
37594 return ix86_builtin_decl (code
, true);
37599 /* Return function decl for target specific builtin
37600 for given MPX builtin passed i FCODE. */
37602 ix86_builtin_mpx_function (unsigned fcode
)
37606 case BUILT_IN_CHKP_BNDMK
:
37607 return ix86_builtins
[IX86_BUILTIN_BNDMK
];
37609 case BUILT_IN_CHKP_BNDSTX
:
37610 return ix86_builtins
[IX86_BUILTIN_BNDSTX
];
37612 case BUILT_IN_CHKP_BNDLDX
:
37613 return ix86_builtins
[IX86_BUILTIN_BNDLDX
];
37615 case BUILT_IN_CHKP_BNDCL
:
37616 return ix86_builtins
[IX86_BUILTIN_BNDCL
];
37618 case BUILT_IN_CHKP_BNDCU
:
37619 return ix86_builtins
[IX86_BUILTIN_BNDCU
];
37621 case BUILT_IN_CHKP_BNDRET
:
37622 return ix86_builtins
[IX86_BUILTIN_BNDRET
];
37624 case BUILT_IN_CHKP_INTERSECT
:
37625 return ix86_builtins
[IX86_BUILTIN_BNDINT
];
37627 case BUILT_IN_CHKP_NARROW
:
37628 return ix86_builtins
[IX86_BUILTIN_BNDNARROW
];
37630 case BUILT_IN_CHKP_SIZEOF
:
37631 return ix86_builtins
[IX86_BUILTIN_SIZEOF
];
37633 case BUILT_IN_CHKP_EXTRACT_LOWER
:
37634 return ix86_builtins
[IX86_BUILTIN_BNDLOWER
];
37636 case BUILT_IN_CHKP_EXTRACT_UPPER
:
37637 return ix86_builtins
[IX86_BUILTIN_BNDUPPER
];
37643 gcc_unreachable ();
37646 /* Helper function for ix86_load_bounds and ix86_store_bounds.
37648 Return an address to be used to load/store bounds for pointer
37651 SLOT_NO is an integer constant holding number of a target
37652 dependent special slot to be used in case SLOT is not a memory.
37654 SPECIAL_BASE is a pointer to be used as a base of fake address
37655 to access special slots in Bounds Table. SPECIAL_BASE[-1],
37656 SPECIAL_BASE[-2] etc. will be used as fake pointer locations. */
37659 ix86_get_arg_address_for_bt (rtx slot
, rtx slot_no
, rtx special_base
)
37663 /* NULL slot means we pass bounds for pointer not passed to the
37664 function at all. Register slot means we pass pointer in a
37665 register. In both these cases bounds are passed via Bounds
37666 Table. Since we do not have actual pointer stored in memory,
37667 we have to use fake addresses to access Bounds Table. We
37668 start with (special_base - sizeof (void*)) and decrease this
37669 address by pointer size to get addresses for other slots. */
37670 if (!slot
|| REG_P (slot
))
37672 gcc_assert (CONST_INT_P (slot_no
));
37673 addr
= plus_constant (Pmode
, special_base
,
37674 -(INTVAL (slot_no
) + 1) * GET_MODE_SIZE (Pmode
));
37676 /* If pointer is passed in a memory then its address is used to
37677 access Bounds Table. */
37678 else if (MEM_P (slot
))
37680 addr
= XEXP (slot
, 0);
37681 if (!register_operand (addr
, Pmode
))
37682 addr
= copy_addr_to_reg (addr
);
37685 gcc_unreachable ();
37690 /* Expand pass uses this hook to load bounds for function parameter
37691 PTR passed in SLOT in case its bounds are not passed in a register.
37693 If SLOT is a memory, then bounds are loaded as for regular pointer
37694 loaded from memory. PTR may be NULL in case SLOT is a memory.
37695 In such case value of PTR (if required) may be loaded from SLOT.
37697 If SLOT is NULL or a register then SLOT_NO is an integer constant
37698 holding number of the target dependent special slot which should be
37699 used to obtain bounds.
37701 Return loaded bounds. */
37704 ix86_load_bounds (rtx slot
, rtx ptr
, rtx slot_no
)
37706 rtx reg
= gen_reg_rtx (BNDmode
);
37709 /* Get address to be used to access Bounds Table. Special slots start
37710 at the location of return address of the current function. */
37711 addr
= ix86_get_arg_address_for_bt (slot
, slot_no
, arg_pointer_rtx
);
37713 /* Load pointer value from a memory if we don't have it. */
37716 gcc_assert (MEM_P (slot
));
37717 ptr
= copy_addr_to_reg (slot
);
37720 if (!register_operand (ptr
, Pmode
))
37721 ptr
= ix86_zero_extend_to_Pmode (ptr
);
37723 emit_insn (BNDmode
== BND64mode
37724 ? gen_bnd64_ldx (reg
, addr
, ptr
)
37725 : gen_bnd32_ldx (reg
, addr
, ptr
));
37730 /* Expand pass uses this hook to store BOUNDS for call argument PTR
37731 passed in SLOT in case BOUNDS are not passed in a register.
37733 If SLOT is a memory, then BOUNDS are stored as for regular pointer
37734 stored in memory. PTR may be NULL in case SLOT is a memory.
37735 In such case value of PTR (if required) may be loaded from SLOT.
37737 If SLOT is NULL or a register then SLOT_NO is an integer constant
37738 holding number of the target dependent special slot which should be
37739 used to store BOUNDS. */
37742 ix86_store_bounds (rtx ptr
, rtx slot
, rtx bounds
, rtx slot_no
)
37746 /* Get address to be used to access Bounds Table. Special slots start
37747 at the location of return address of a called function. */
37748 addr
= ix86_get_arg_address_for_bt (slot
, slot_no
, stack_pointer_rtx
);
37750 /* Load pointer value from a memory if we don't have it. */
37753 gcc_assert (MEM_P (slot
));
37754 ptr
= copy_addr_to_reg (slot
);
37757 if (!register_operand (ptr
, Pmode
))
37758 ptr
= ix86_zero_extend_to_Pmode (ptr
);
37760 gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds
)));
37761 if (!register_operand (bounds
, BNDmode
))
37762 bounds
= copy_to_mode_reg (BNDmode
, bounds
);
37764 emit_insn (BNDmode
== BND64mode
37765 ? gen_bnd64_stx (addr
, ptr
, bounds
)
37766 : gen_bnd32_stx (addr
, ptr
, bounds
));
37769 /* Load and return bounds returned by function in SLOT. */
37772 ix86_load_returned_bounds (rtx slot
)
37776 gcc_assert (REG_P (slot
));
37777 res
= gen_reg_rtx (BNDmode
);
37778 emit_move_insn (res
, slot
);
37783 /* Store BOUNDS returned by function into SLOT. */
37786 ix86_store_returned_bounds (rtx slot
, rtx bounds
)
37788 gcc_assert (REG_P (slot
));
37789 emit_move_insn (slot
, bounds
);
37792 /* Returns a function decl for a vectorized version of the combined function
37793 with combined_fn code FN and the result vector type TYPE, or NULL_TREE
37794 if it is not available. */
37797 ix86_builtin_vectorized_function (unsigned int fn
, tree type_out
,
37800 machine_mode in_mode
, out_mode
;
37803 if (TREE_CODE (type_out
) != VECTOR_TYPE
37804 || TREE_CODE (type_in
) != VECTOR_TYPE
)
37807 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
37808 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
37809 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
37810 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
37815 if (out_mode
== SFmode
&& in_mode
== SFmode
)
37817 if (out_n
== 16 && in_n
== 16)
37818 return ix86_get_builtin (IX86_BUILTIN_EXP2PS
);
37825 /* The round insn does not trap on denormals. */
37826 if (flag_trapping_math
|| !TARGET_SSE4_1
)
37829 if (out_mode
== SImode
&& in_mode
== DFmode
)
37831 if (out_n
== 4 && in_n
== 2)
37832 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
);
37833 else if (out_n
== 8 && in_n
== 4)
37834 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
);
37835 else if (out_n
== 16 && in_n
== 8)
37836 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512
);
37838 if (out_mode
== SImode
&& in_mode
== SFmode
)
37840 if (out_n
== 4 && in_n
== 4)
37841 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX
);
37842 else if (out_n
== 8 && in_n
== 8)
37843 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256
);
37844 else if (out_n
== 16 && in_n
== 16)
37845 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX512
);
37852 /* The round insn does not trap on denormals. */
37853 if (flag_trapping_math
|| !TARGET_SSE4_1
)
37856 if (out_mode
== SImode
&& in_mode
== DFmode
)
37858 if (out_n
== 4 && in_n
== 2)
37859 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
);
37860 else if (out_n
== 8 && in_n
== 4)
37861 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
);
37862 else if (out_n
== 16 && in_n
== 8)
37863 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512
);
37865 if (out_mode
== SImode
&& in_mode
== SFmode
)
37867 if (out_n
== 4 && in_n
== 4)
37868 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX
);
37869 else if (out_n
== 8 && in_n
== 8)
37870 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256
);
37871 else if (out_n
== 16 && in_n
== 16)
37872 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX512
);
37879 if (out_mode
== SImode
&& in_mode
== DFmode
)
37881 if (out_n
== 4 && in_n
== 2)
37882 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX
);
37883 else if (out_n
== 8 && in_n
== 4)
37884 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256
);
37885 else if (out_n
== 16 && in_n
== 8)
37886 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX512
);
37888 if (out_mode
== SImode
&& in_mode
== SFmode
)
37890 if (out_n
== 4 && in_n
== 4)
37891 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ
);
37892 else if (out_n
== 8 && in_n
== 8)
37893 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256
);
37894 else if (out_n
== 16 && in_n
== 16)
37895 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ512
);
37902 /* The round insn does not trap on denormals. */
37903 if (flag_trapping_math
|| !TARGET_SSE4_1
)
37906 if (out_mode
== SImode
&& in_mode
== DFmode
)
37908 if (out_n
== 4 && in_n
== 2)
37909 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
);
37910 else if (out_n
== 8 && in_n
== 4)
37911 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
);
37912 else if (out_n
== 16 && in_n
== 8)
37913 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512
);
37915 if (out_mode
== SImode
&& in_mode
== SFmode
)
37917 if (out_n
== 4 && in_n
== 4)
37918 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX
);
37919 else if (out_n
== 8 && in_n
== 8)
37920 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256
);
37921 else if (out_n
== 16 && in_n
== 16)
37922 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX512
);
37927 /* The round insn does not trap on denormals. */
37928 if (flag_trapping_math
|| !TARGET_SSE4_1
)
37931 if (out_mode
== DFmode
&& in_mode
== DFmode
)
37933 if (out_n
== 2 && in_n
== 2)
37934 return ix86_get_builtin (IX86_BUILTIN_FLOORPD
);
37935 else if (out_n
== 4 && in_n
== 4)
37936 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256
);
37937 else if (out_n
== 8 && in_n
== 8)
37938 return ix86_get_builtin (IX86_BUILTIN_FLOORPD512
);
37940 if (out_mode
== SFmode
&& in_mode
== SFmode
)
37942 if (out_n
== 4 && in_n
== 4)
37943 return ix86_get_builtin (IX86_BUILTIN_FLOORPS
);
37944 else if (out_n
== 8 && in_n
== 8)
37945 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256
);
37946 else if (out_n
== 16 && in_n
== 16)
37947 return ix86_get_builtin (IX86_BUILTIN_FLOORPS512
);
37952 /* The round insn does not trap on denormals. */
37953 if (flag_trapping_math
|| !TARGET_SSE4_1
)
37956 if (out_mode
== DFmode
&& in_mode
== DFmode
)
37958 if (out_n
== 2 && in_n
== 2)
37959 return ix86_get_builtin (IX86_BUILTIN_CEILPD
);
37960 else if (out_n
== 4 && in_n
== 4)
37961 return ix86_get_builtin (IX86_BUILTIN_CEILPD256
);
37962 else if (out_n
== 8 && in_n
== 8)
37963 return ix86_get_builtin (IX86_BUILTIN_CEILPD512
);
37965 if (out_mode
== SFmode
&& in_mode
== SFmode
)
37967 if (out_n
== 4 && in_n
== 4)
37968 return ix86_get_builtin (IX86_BUILTIN_CEILPS
);
37969 else if (out_n
== 8 && in_n
== 8)
37970 return ix86_get_builtin (IX86_BUILTIN_CEILPS256
);
37971 else if (out_n
== 16 && in_n
== 16)
37972 return ix86_get_builtin (IX86_BUILTIN_CEILPS512
);
37977 /* The round insn does not trap on denormals. */
37978 if (flag_trapping_math
|| !TARGET_SSE4_1
)
37981 if (out_mode
== DFmode
&& in_mode
== DFmode
)
37983 if (out_n
== 2 && in_n
== 2)
37984 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD
);
37985 else if (out_n
== 4 && in_n
== 4)
37986 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256
);
37987 else if (out_n
== 8 && in_n
== 8)
37988 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD512
);
37990 if (out_mode
== SFmode
&& in_mode
== SFmode
)
37992 if (out_n
== 4 && in_n
== 4)
37993 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS
);
37994 else if (out_n
== 8 && in_n
== 8)
37995 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256
);
37996 else if (out_n
== 16 && in_n
== 16)
37997 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS512
);
38002 /* The round insn does not trap on denormals. */
38003 if (flag_trapping_math
|| !TARGET_SSE4_1
)
38006 if (out_mode
== DFmode
&& in_mode
== DFmode
)
38008 if (out_n
== 2 && in_n
== 2)
38009 return ix86_get_builtin (IX86_BUILTIN_RINTPD
);
38010 else if (out_n
== 4 && in_n
== 4)
38011 return ix86_get_builtin (IX86_BUILTIN_RINTPD256
);
38013 if (out_mode
== SFmode
&& in_mode
== SFmode
)
38015 if (out_n
== 4 && in_n
== 4)
38016 return ix86_get_builtin (IX86_BUILTIN_RINTPS
);
38017 else if (out_n
== 8 && in_n
== 8)
38018 return ix86_get_builtin (IX86_BUILTIN_RINTPS256
);
38023 if (out_mode
== DFmode
&& in_mode
== DFmode
)
38025 if (out_n
== 2 && in_n
== 2)
38026 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD
);
38027 if (out_n
== 4 && in_n
== 4)
38028 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256
);
38030 if (out_mode
== SFmode
&& in_mode
== SFmode
)
38032 if (out_n
== 4 && in_n
== 4)
38033 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS
);
38034 if (out_n
== 8 && in_n
== 8)
38035 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256
);
38043 /* Dispatch to a handler for a vectorization library. */
38044 if (ix86_veclib_handler
)
38045 return ix86_veclib_handler (combined_fn (fn
), type_out
, type_in
);
38050 /* Handler for an SVML-style interface to
38051 a library with vectorized intrinsics. */
38054 ix86_veclibabi_svml (combined_fn fn
, tree type_out
, tree type_in
)
38057 tree fntype
, new_fndecl
, args
;
38060 machine_mode el_mode
, in_mode
;
38063 /* The SVML is suitable for unsafe math only. */
38064 if (!flag_unsafe_math_optimizations
)
38067 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
38068 n
= TYPE_VECTOR_SUBPARTS (type_out
);
38069 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
38070 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
38071 if (el_mode
!= in_mode
38095 if ((el_mode
!= DFmode
|| n
!= 2)
38096 && (el_mode
!= SFmode
|| n
!= 4))
38104 tree fndecl
= mathfn_built_in (TREE_TYPE (type_in
), fn
);
38105 bname
= IDENTIFIER_POINTER (DECL_NAME (fndecl
));
38107 if (DECL_FUNCTION_CODE (fndecl
) == BUILT_IN_LOGF
)
38108 strcpy (name
, "vmlsLn4");
38109 else if (DECL_FUNCTION_CODE (fndecl
) == BUILT_IN_LOG
)
38110 strcpy (name
, "vmldLn2");
38113 sprintf (name
, "vmls%s", bname
+10);
38114 name
[strlen (name
)-1] = '4';
38117 sprintf (name
, "vmld%s2", bname
+10);
38119 /* Convert to uppercase. */
38123 for (args
= DECL_ARGUMENTS (fndecl
); args
; args
= TREE_CHAIN (args
))
38127 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
38129 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
38131 /* Build a function declaration for the vectorized function. */
38132 new_fndecl
= build_decl (BUILTINS_LOCATION
,
38133 FUNCTION_DECL
, get_identifier (name
), fntype
);
38134 TREE_PUBLIC (new_fndecl
) = 1;
38135 DECL_EXTERNAL (new_fndecl
) = 1;
38136 DECL_IS_NOVOPS (new_fndecl
) = 1;
38137 TREE_READONLY (new_fndecl
) = 1;
38142 /* Handler for an ACML-style interface to
38143 a library with vectorized intrinsics. */
38146 ix86_veclibabi_acml (combined_fn fn
, tree type_out
, tree type_in
)
38148 char name
[20] = "__vr.._";
38149 tree fntype
, new_fndecl
, args
;
38152 machine_mode el_mode
, in_mode
;
38155 /* The ACML is 64bits only and suitable for unsafe math only as
38156 it does not correctly support parts of IEEE with the required
38157 precision such as denormals. */
38159 || !flag_unsafe_math_optimizations
)
38162 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
38163 n
= TYPE_VECTOR_SUBPARTS (type_out
);
38164 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
38165 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
38166 if (el_mode
!= in_mode
38178 if (el_mode
== DFmode
&& n
== 2)
38183 else if (el_mode
== SFmode
&& n
== 4)
38196 tree fndecl
= mathfn_built_in (TREE_TYPE (type_in
), fn
);
38197 bname
= IDENTIFIER_POINTER (DECL_NAME (fndecl
));
38198 sprintf (name
+ 7, "%s", bname
+10);
38201 for (args
= DECL_ARGUMENTS (fndecl
); args
; args
= TREE_CHAIN (args
))
38205 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
38207 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
38209 /* Build a function declaration for the vectorized function. */
38210 new_fndecl
= build_decl (BUILTINS_LOCATION
,
38211 FUNCTION_DECL
, get_identifier (name
), fntype
);
38212 TREE_PUBLIC (new_fndecl
) = 1;
38213 DECL_EXTERNAL (new_fndecl
) = 1;
38214 DECL_IS_NOVOPS (new_fndecl
) = 1;
38215 TREE_READONLY (new_fndecl
) = 1;
38220 /* Returns a decl of a function that implements gather load with
38221 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
38222 Return NULL_TREE if it is not available. */
38225 ix86_vectorize_builtin_gather (const_tree mem_vectype
,
38226 const_tree index_type
, int scale
)
38229 enum ix86_builtins code
;
38234 if ((TREE_CODE (index_type
) != INTEGER_TYPE
38235 && !POINTER_TYPE_P (index_type
))
38236 || (TYPE_MODE (index_type
) != SImode
38237 && TYPE_MODE (index_type
) != DImode
))
38240 if (TYPE_PRECISION (index_type
) > POINTER_SIZE
)
38243 /* v*gather* insn sign extends index to pointer mode. */
38244 if (TYPE_PRECISION (index_type
) < POINTER_SIZE
38245 && TYPE_UNSIGNED (index_type
))
38250 || (scale
& (scale
- 1)) != 0)
38253 si
= TYPE_MODE (index_type
) == SImode
;
38254 switch (TYPE_MODE (mem_vectype
))
38257 if (TARGET_AVX512VL
)
38258 code
= si
? IX86_BUILTIN_GATHER3SIV2DF
: IX86_BUILTIN_GATHER3DIV2DF
;
38260 code
= si
? IX86_BUILTIN_GATHERSIV2DF
: IX86_BUILTIN_GATHERDIV2DF
;
38263 if (TARGET_AVX512VL
)
38264 code
= si
? IX86_BUILTIN_GATHER3ALTSIV4DF
: IX86_BUILTIN_GATHER3DIV4DF
;
38266 code
= si
? IX86_BUILTIN_GATHERALTSIV4DF
: IX86_BUILTIN_GATHERDIV4DF
;
38269 if (TARGET_AVX512VL
)
38270 code
= si
? IX86_BUILTIN_GATHER3SIV2DI
: IX86_BUILTIN_GATHER3DIV2DI
;
38272 code
= si
? IX86_BUILTIN_GATHERSIV2DI
: IX86_BUILTIN_GATHERDIV2DI
;
38275 if (TARGET_AVX512VL
)
38276 code
= si
? IX86_BUILTIN_GATHER3ALTSIV4DI
: IX86_BUILTIN_GATHER3DIV4DI
;
38278 code
= si
? IX86_BUILTIN_GATHERALTSIV4DI
: IX86_BUILTIN_GATHERDIV4DI
;
38281 if (TARGET_AVX512VL
)
38282 code
= si
? IX86_BUILTIN_GATHER3SIV4SF
: IX86_BUILTIN_GATHER3DIV4SF
;
38284 code
= si
? IX86_BUILTIN_GATHERSIV4SF
: IX86_BUILTIN_GATHERDIV4SF
;
38287 if (TARGET_AVX512VL
)
38288 code
= si
? IX86_BUILTIN_GATHER3SIV8SF
: IX86_BUILTIN_GATHER3ALTDIV8SF
;
38290 code
= si
? IX86_BUILTIN_GATHERSIV8SF
: IX86_BUILTIN_GATHERALTDIV8SF
;
38293 if (TARGET_AVX512VL
)
38294 code
= si
? IX86_BUILTIN_GATHER3SIV4SI
: IX86_BUILTIN_GATHER3DIV4SI
;
38296 code
= si
? IX86_BUILTIN_GATHERSIV4SI
: IX86_BUILTIN_GATHERDIV4SI
;
38299 if (TARGET_AVX512VL
)
38300 code
= si
? IX86_BUILTIN_GATHER3SIV8SI
: IX86_BUILTIN_GATHER3ALTDIV8SI
;
38302 code
= si
? IX86_BUILTIN_GATHERSIV8SI
: IX86_BUILTIN_GATHERALTDIV8SI
;
38305 if (TARGET_AVX512F
)
38306 code
= si
? IX86_BUILTIN_GATHER3ALTSIV8DF
: IX86_BUILTIN_GATHER3DIV8DF
;
38311 if (TARGET_AVX512F
)
38312 code
= si
? IX86_BUILTIN_GATHER3ALTSIV8DI
: IX86_BUILTIN_GATHER3DIV8DI
;
38317 if (TARGET_AVX512F
)
38318 code
= si
? IX86_BUILTIN_GATHER3SIV16SF
: IX86_BUILTIN_GATHER3ALTDIV16SF
;
38323 if (TARGET_AVX512F
)
38324 code
= si
? IX86_BUILTIN_GATHER3SIV16SI
: IX86_BUILTIN_GATHER3ALTDIV16SI
;
38332 return ix86_get_builtin (code
);
38335 /* Returns a decl of a function that implements scatter store with
38336 register type VECTYPE and index type INDEX_TYPE and SCALE.
38337 Return NULL_TREE if it is not available. */
38340 ix86_vectorize_builtin_scatter (const_tree vectype
,
38341 const_tree index_type
, int scale
)
38344 enum ix86_builtins code
;
38346 if (!TARGET_AVX512F
)
38349 if ((TREE_CODE (index_type
) != INTEGER_TYPE
38350 && !POINTER_TYPE_P (index_type
))
38351 || (TYPE_MODE (index_type
) != SImode
38352 && TYPE_MODE (index_type
) != DImode
))
38355 if (TYPE_PRECISION (index_type
) > POINTER_SIZE
)
38358 /* v*scatter* insn sign extends index to pointer mode. */
38359 if (TYPE_PRECISION (index_type
) < POINTER_SIZE
38360 && TYPE_UNSIGNED (index_type
))
38363 /* Scale can be 1, 2, 4 or 8. */
38366 || (scale
& (scale
- 1)) != 0)
38369 si
= TYPE_MODE (index_type
) == SImode
;
38370 switch (TYPE_MODE (vectype
))
38373 code
= si
? IX86_BUILTIN_SCATTERALTSIV8DF
: IX86_BUILTIN_SCATTERDIV8DF
;
38376 code
= si
? IX86_BUILTIN_SCATTERALTSIV8DI
: IX86_BUILTIN_SCATTERDIV8DI
;
38379 code
= si
? IX86_BUILTIN_SCATTERSIV16SF
: IX86_BUILTIN_SCATTERALTDIV16SF
;
38382 code
= si
? IX86_BUILTIN_SCATTERSIV16SI
: IX86_BUILTIN_SCATTERALTDIV16SI
;
38388 return ix86_builtins
[code
];
38391 /* Return true if it is safe to use the rsqrt optabs to optimize
38397 return (TARGET_SSE_MATH
38398 && flag_finite_math_only
38399 && !flag_trapping_math
38400 && flag_unsafe_math_optimizations
);
38403 /* Returns a code for a target-specific builtin that implements
38404 reciprocal of the function, or NULL_TREE if not available. */
38407 ix86_builtin_reciprocal (tree fndecl
)
38409 switch (DECL_FUNCTION_CODE (fndecl
))
38411 /* Vectorized version of sqrt to rsqrt conversion. */
38412 case IX86_BUILTIN_SQRTPS_NR
:
38413 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR
);
38415 case IX86_BUILTIN_SQRTPS_NR256
:
38416 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256
);
38423 /* Helper for avx_vpermilps256_operand et al. This is also used by
38424 the expansion functions to turn the parallel back into a mask.
38425 The return value is 0 for no match and the imm8+1 for a match. */
38428 avx_vpermilp_parallel (rtx par
, machine_mode mode
)
38430 unsigned i
, nelt
= GET_MODE_NUNITS (mode
);
38432 unsigned char ipar
[16] = {}; /* Silence -Wuninitialized warning. */
38434 if (XVECLEN (par
, 0) != (int) nelt
)
38437 /* Validate that all of the elements are constants, and not totally
38438 out of range. Copy the data into an integral array to make the
38439 subsequent checks easier. */
38440 for (i
= 0; i
< nelt
; ++i
)
38442 rtx er
= XVECEXP (par
, 0, i
);
38443 unsigned HOST_WIDE_INT ei
;
38445 if (!CONST_INT_P (er
))
38456 /* In the 512-bit DFmode case, we can only move elements within
38457 a 128-bit lane. First fill the second part of the mask,
38459 for (i
= 4; i
< 6; ++i
)
38461 if (ipar
[i
] < 4 || ipar
[i
] >= 6)
38463 mask
|= (ipar
[i
] - 4) << i
;
38465 for (i
= 6; i
< 8; ++i
)
38469 mask
|= (ipar
[i
] - 6) << i
;
38474 /* In the 256-bit DFmode case, we can only move elements within
38476 for (i
= 0; i
< 2; ++i
)
38480 mask
|= ipar
[i
] << i
;
38482 for (i
= 2; i
< 4; ++i
)
38486 mask
|= (ipar
[i
] - 2) << i
;
38491 /* In 512 bit SFmode case, permutation in the upper 256 bits
38492 must mirror the permutation in the lower 256-bits. */
38493 for (i
= 0; i
< 8; ++i
)
38494 if (ipar
[i
] + 8 != ipar
[i
+ 8])
38499 /* In 256 bit SFmode case, we have full freedom of
38500 movement within the low 128-bit lane, but the high 128-bit
38501 lane must mirror the exact same pattern. */
38502 for (i
= 0; i
< 4; ++i
)
38503 if (ipar
[i
] + 4 != ipar
[i
+ 4])
38510 /* In the 128-bit case, we've full freedom in the placement of
38511 the elements from the source operand. */
38512 for (i
= 0; i
< nelt
; ++i
)
38513 mask
|= ipar
[i
] << (i
* (nelt
/ 2));
38517 gcc_unreachable ();
38520 /* Make sure success has a non-zero value by adding one. */
38524 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
38525 the expansion functions to turn the parallel back into a mask.
38526 The return value is 0 for no match and the imm8+1 for a match. */
38529 avx_vperm2f128_parallel (rtx par
, machine_mode mode
)
38531 unsigned i
, nelt
= GET_MODE_NUNITS (mode
), nelt2
= nelt
/ 2;
38533 unsigned char ipar
[8] = {}; /* Silence -Wuninitialized warning. */
38535 if (XVECLEN (par
, 0) != (int) nelt
)
38538 /* Validate that all of the elements are constants, and not totally
38539 out of range. Copy the data into an integral array to make the
38540 subsequent checks easier. */
38541 for (i
= 0; i
< nelt
; ++i
)
38543 rtx er
= XVECEXP (par
, 0, i
);
38544 unsigned HOST_WIDE_INT ei
;
38546 if (!CONST_INT_P (er
))
38549 if (ei
>= 2 * nelt
)
38554 /* Validate that the halves of the permute are halves. */
38555 for (i
= 0; i
< nelt2
- 1; ++i
)
38556 if (ipar
[i
] + 1 != ipar
[i
+ 1])
38558 for (i
= nelt2
; i
< nelt
- 1; ++i
)
38559 if (ipar
[i
] + 1 != ipar
[i
+ 1])
38562 /* Reconstruct the mask. */
38563 for (i
= 0; i
< 2; ++i
)
38565 unsigned e
= ipar
[i
* nelt2
];
38569 mask
|= e
<< (i
* 4);
38572 /* Make sure success has a non-zero value by adding one. */
38576 /* Return a register priority for hard reg REGNO. */
38578 ix86_register_priority (int hard_regno
)
38580 /* ebp and r13 as the base always wants a displacement, r12 as the
38581 base always wants an index. So discourage their usage in an
38583 if (hard_regno
== R12_REG
|| hard_regno
== R13_REG
)
38585 if (hard_regno
== BP_REG
)
38587 /* New x86-64 int registers result in bigger code size. Discourage
38589 if (FIRST_REX_INT_REG
<= hard_regno
&& hard_regno
<= LAST_REX_INT_REG
)
38591 /* New x86-64 SSE registers result in bigger code size. Discourage
38593 if (FIRST_REX_SSE_REG
<= hard_regno
&& hard_regno
<= LAST_REX_SSE_REG
)
38595 /* Usage of AX register results in smaller code. Prefer it. */
38596 if (hard_regno
== AX_REG
)
38601 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
38603 Put float CONST_DOUBLE in the constant pool instead of fp regs.
38604 QImode must go into class Q_REGS.
38605 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
38606 movdf to do mem-to-mem moves through integer regs. */
38609 ix86_preferred_reload_class (rtx x
, reg_class_t regclass
)
38611 machine_mode mode
= GET_MODE (x
);
38613 /* We're only allowed to return a subclass of CLASS. Many of the
38614 following checks fail for NO_REGS, so eliminate that early. */
38615 if (regclass
== NO_REGS
)
38618 /* All classes can load zeros. */
38619 if (x
== CONST0_RTX (mode
))
38622 /* Force constants into memory if we are loading a (nonzero) constant into
38623 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
38624 instructions to load from a constant. */
38626 && (MAYBE_MMX_CLASS_P (regclass
)
38627 || MAYBE_SSE_CLASS_P (regclass
)
38628 || MAYBE_MASK_CLASS_P (regclass
)))
38631 /* Floating-point constants need more complex checks. */
38632 if (CONST_DOUBLE_P (x
))
38634 /* General regs can load everything. */
38635 if (INTEGER_CLASS_P (regclass
))
38638 /* Floats can load 0 and 1 plus some others. Note that we eliminated
38639 zero above. We only want to wind up preferring 80387 registers if
38640 we plan on doing computation with them. */
38641 if (IS_STACK_MODE (mode
)
38642 && standard_80387_constant_p (x
) > 0)
38644 /* Limit class to FP regs. */
38645 if (FLOAT_CLASS_P (regclass
))
38647 else if (regclass
== FP_TOP_SSE_REGS
)
38649 else if (regclass
== FP_SECOND_SSE_REGS
)
38650 return FP_SECOND_REG
;
38656 /* Prefer SSE regs only, if we can use them for math. */
38657 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
38658 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
38660 /* Generally when we see PLUS here, it's the function invariant
38661 (plus soft-fp const_int). Which can only be computed into general
38663 if (GET_CODE (x
) == PLUS
)
38664 return INTEGER_CLASS_P (regclass
) ? regclass
: NO_REGS
;
38666 /* QImode constants are easy to load, but non-constant QImode data
38667 must go into Q_REGS. */
38668 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
38670 if (Q_CLASS_P (regclass
))
38672 else if (reg_class_subset_p (Q_REGS
, regclass
))
38681 /* Discourage putting floating-point values in SSE registers unless
38682 SSE math is being used, and likewise for the 387 registers. */
38684 ix86_preferred_output_reload_class (rtx x
, reg_class_t regclass
)
38686 machine_mode mode
= GET_MODE (x
);
38688 /* Restrict the output reload class to the register bank that we are doing
38689 math on. If we would like not to return a subset of CLASS, reject this
38690 alternative: if reload cannot do this, it will still use its choice. */
38691 mode
= GET_MODE (x
);
38692 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
38693 return MAYBE_SSE_CLASS_P (regclass
) ? ALL_SSE_REGS
: NO_REGS
;
38695 if (IS_STACK_MODE (mode
))
38697 if (regclass
== FP_TOP_SSE_REGS
)
38699 else if (regclass
== FP_SECOND_SSE_REGS
)
38700 return FP_SECOND_REG
;
38702 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
38709 ix86_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass
,
38710 machine_mode mode
, secondary_reload_info
*sri
)
38712 /* Double-word spills from general registers to non-offsettable memory
38713 references (zero-extended addresses) require special handling. */
38716 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
38717 && INTEGER_CLASS_P (rclass
)
38718 && !offsettable_memref_p (x
))
38721 ? CODE_FOR_reload_noff_load
38722 : CODE_FOR_reload_noff_store
);
38723 /* Add the cost of moving address to a temporary. */
38724 sri
->extra_cost
= 1;
38729 /* QImode spills from non-QI registers require
38730 intermediate register on 32bit targets. */
38732 && ((!TARGET_64BIT
&& !in_p
38733 && INTEGER_CLASS_P (rclass
)
38734 && MAYBE_NON_Q_CLASS_P (rclass
))
38735 || (!TARGET_AVX512DQ
38736 && MAYBE_MASK_CLASS_P (rclass
))))
38738 int regno
= true_regnum (x
);
38740 /* Return Q_REGS if the operand is in memory. */
38747 /* This condition handles corner case where an expression involving
38748 pointers gets vectorized. We're trying to use the address of a
38749 stack slot as a vector initializer.
38751 (set (reg:V2DI 74 [ vect_cst_.2 ])
38752 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
38754 Eventually frame gets turned into sp+offset like this:
38756 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
38757 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
38758 (const_int 392 [0x188]))))
38760 That later gets turned into:
38762 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
38763 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
38764 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
38766 We'll have the following reload recorded:
38768 Reload 0: reload_in (DI) =
38769 (plus:DI (reg/f:DI 7 sp)
38770 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
38771 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
38772 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
38773 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
38774 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
38775 reload_reg_rtx: (reg:V2DI 22 xmm1)
38777 Which isn't going to work since SSE instructions can't handle scalar
38778 additions. Returning GENERAL_REGS forces the addition into integer
38779 register and reload can handle subsequent reloads without problems. */
38781 if (in_p
&& GET_CODE (x
) == PLUS
38782 && SSE_CLASS_P (rclass
)
38783 && SCALAR_INT_MODE_P (mode
))
38784 return GENERAL_REGS
;
38789 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
38792 ix86_class_likely_spilled_p (reg_class_t rclass
)
38803 case SSE_FIRST_REG
:
38805 case FP_SECOND_REG
:
38816 /* If we are copying between registers from different register sets
38817 (e.g. FP and integer), we may need a memory location.
38819 The function can't work reliably when one of the CLASSES is a class
38820 containing registers from multiple sets. We avoid this by never combining
38821 different sets in a single alternative in the machine description.
38822 Ensure that this constraint holds to avoid unexpected surprises.
38824 When STRICT is false, we are being called from REGISTER_MOVE_COST,
38825 so do not enforce these sanity checks.
38827 To optimize register_move_cost performance, define inline variant. */
38830 inline_secondary_memory_needed (machine_mode mode
, reg_class_t class1
,
38831 reg_class_t class2
, int strict
)
38833 if (lra_in_progress
&& (class1
== NO_REGS
|| class2
== NO_REGS
))
38836 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
38837 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
38838 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
38839 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
38840 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
38841 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
)
38842 || MAYBE_MASK_CLASS_P (class1
) != MASK_CLASS_P (class1
)
38843 || MAYBE_MASK_CLASS_P (class2
) != MASK_CLASS_P (class2
))
38845 gcc_assert (!strict
|| lra_in_progress
);
38849 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
38852 /* Between mask and general, we have moves no larger than word size. */
38853 if ((MASK_CLASS_P (class1
) != MASK_CLASS_P (class2
))
38854 && (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
))
38857 /* ??? This is a lie. We do have moves between mmx/general, and for
38858 mmx/sse2. But by saying we need secondary memory we discourage the
38859 register allocator from using the mmx registers unless needed. */
38860 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
38863 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
38865 /* SSE1 doesn't have any direct moves from other classes. */
38869 /* If the target says that inter-unit moves are more expensive
38870 than moving through memory, then don't generate them. */
38871 if ((SSE_CLASS_P (class1
) && !TARGET_INTER_UNIT_MOVES_FROM_VEC
)
38872 || (SSE_CLASS_P (class2
) && !TARGET_INTER_UNIT_MOVES_TO_VEC
))
38875 /* Between SSE and general, we have moves no larger than word size. */
38876 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
38883 /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
38886 ix86_secondary_memory_needed (machine_mode mode
, reg_class_t class1
,
38887 reg_class_t class2
)
38889 return inline_secondary_memory_needed (mode
, class1
, class2
, true);
38892 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
38894 get_secondary_mem widens integral modes to BITS_PER_WORD.
38895 There is no need to emit full 64 bit move on 64 bit targets
38896 for integral modes that can be moved using 32 bit move. */
38898 static machine_mode
38899 ix86_secondary_memory_needed_mode (machine_mode mode
)
38901 if (GET_MODE_BITSIZE (mode
) < 32 && INTEGRAL_MODE_P (mode
))
38902 return mode_for_size (32, GET_MODE_CLASS (mode
), 0).require ();
38906 /* Implement the TARGET_CLASS_MAX_NREGS hook.
38908 On the 80386, this is the size of MODE in words,
38909 except in the FP regs, where a single reg is always enough. */
38911 static unsigned char
38912 ix86_class_max_nregs (reg_class_t rclass
, machine_mode mode
)
38914 if (MAYBE_INTEGER_CLASS_P (rclass
))
38916 if (mode
== XFmode
)
38917 return (TARGET_64BIT
? 2 : 3);
38918 else if (mode
== XCmode
)
38919 return (TARGET_64BIT
? 4 : 6);
38921 return CEIL (GET_MODE_SIZE (mode
), UNITS_PER_WORD
);
38925 if (COMPLEX_MODE_P (mode
))
38932 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
38935 ix86_can_change_mode_class (machine_mode from
, machine_mode to
,
38936 reg_class_t regclass
)
38941 /* x87 registers can't do subreg at all, as all values are reformatted
38942 to extended precision. */
38943 if (MAYBE_FLOAT_CLASS_P (regclass
))
38946 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
38948 /* Vector registers do not support QI or HImode loads. If we don't
38949 disallow a change to these modes, reload will assume it's ok to
38950 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
38951 the vec_dupv4hi pattern. */
38952 if (GET_MODE_SIZE (from
) < 4)
38959 /* Return index of MODE in the sse load/store tables. */
38962 sse_store_index (machine_mode mode
)
38964 switch (GET_MODE_SIZE (mode
))
38981 /* Return the cost of moving data of mode M between a
38982 register and memory. A value of 2 is the default; this cost is
38983 relative to those in `REGISTER_MOVE_COST'.
38985 This function is used extensively by register_move_cost that is used to
38986 build tables at startup. Make it inline in this case.
38987 When IN is 2, return maximum of in and out move cost.
38989 If moving between registers and memory is more expensive than
38990 between two registers, you should define this macro to express the
38993 Model also increased moving costs of QImode registers in non
38997 inline_memory_move_cost (machine_mode mode
, enum reg_class regclass
,
39001 if (FLOAT_CLASS_P (regclass
))
39019 return MAX (ix86_cost
->fp_load
[index
], ix86_cost
->fp_store
[index
]);
39020 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
39022 if (SSE_CLASS_P (regclass
))
39024 int index
= sse_store_index (mode
);
39028 return MAX (ix86_cost
->sse_load
[index
], ix86_cost
->sse_store
[index
]);
39029 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
39031 if (MMX_CLASS_P (regclass
))
39034 switch (GET_MODE_SIZE (mode
))
39046 return MAX (ix86_cost
->mmx_load
[index
], ix86_cost
->mmx_store
[index
]);
39047 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
39049 switch (GET_MODE_SIZE (mode
))
39052 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
39055 return ix86_cost
->int_store
[0];
39056 if (TARGET_PARTIAL_REG_DEPENDENCY
39057 && optimize_function_for_speed_p (cfun
))
39058 cost
= ix86_cost
->movzbl_load
;
39060 cost
= ix86_cost
->int_load
[0];
39062 return MAX (cost
, ix86_cost
->int_store
[0]);
39068 return MAX (ix86_cost
->movzbl_load
, ix86_cost
->int_store
[0] + 4);
39070 return ix86_cost
->movzbl_load
;
39072 return ix86_cost
->int_store
[0] + 4;
39077 return MAX (ix86_cost
->int_load
[1], ix86_cost
->int_store
[1]);
39078 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
39080 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
39081 if (mode
== TFmode
)
39084 cost
= MAX (ix86_cost
->int_load
[2] , ix86_cost
->int_store
[2]);
39086 cost
= ix86_cost
->int_load
[2];
39088 cost
= ix86_cost
->int_store
[2];
39089 return cost
* CEIL ((int) GET_MODE_SIZE (mode
), UNITS_PER_WORD
);
39094 ix86_memory_move_cost (machine_mode mode
, reg_class_t regclass
,
39097 return inline_memory_move_cost (mode
, (enum reg_class
) regclass
, in
? 1 : 0);
39101 /* Return the cost of moving data from a register in class CLASS1 to
39102 one in class CLASS2.
39104 It is not required that the cost always equal 2 when FROM is the same as TO;
39105 on some machines it is expensive to move between registers if they are not
39106 general registers. */
39109 ix86_register_move_cost (machine_mode mode
, reg_class_t class1_i
,
39110 reg_class_t class2_i
)
39112 enum reg_class class1
= (enum reg_class
) class1_i
;
39113 enum reg_class class2
= (enum reg_class
) class2_i
;
39115 /* In case we require secondary memory, compute cost of the store followed
39116 by load. In order to avoid bad register allocation choices, we need
39117 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
39119 if (inline_secondary_memory_needed (mode
, class1
, class2
, false))
39123 cost
+= inline_memory_move_cost (mode
, class1
, 2);
39124 cost
+= inline_memory_move_cost (mode
, class2
, 2);
39126 /* In case of copying from general_purpose_register we may emit multiple
39127 stores followed by single load causing memory size mismatch stall.
39128 Count this as arbitrarily high cost of 20. */
39129 if (GET_MODE_BITSIZE (mode
) > BITS_PER_WORD
39130 && TARGET_MEMORY_MISMATCH_STALL
39131 && targetm
.class_max_nregs (class1
, mode
)
39132 > targetm
.class_max_nregs (class2
, mode
))
39135 /* In the case of FP/MMX moves, the registers actually overlap, and we
39136 have to switch modes in order to treat them differently. */
39137 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
39138 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
39144 /* Moves between SSE/MMX and integer unit are expensive. */
39145 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
39146 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
39148 /* ??? By keeping returned value relatively high, we limit the number
39149 of moves between integer and MMX/SSE registers for all targets.
39150 Additionally, high value prevents problem with x86_modes_tieable_p(),
39151 where integer modes in MMX/SSE registers are not tieable
39152 because of missing QImode and HImode moves to, from or between
39153 MMX/SSE registers. */
39154 return MAX (8, MMX_CLASS_P (class1
) || MMX_CLASS_P (class2
)
39155 ? ix86_cost
->mmxsse_to_integer
: ix86_cost
->ssemmx_to_integer
);
39157 if (MAYBE_FLOAT_CLASS_P (class1
))
39158 return ix86_cost
->fp_move
;
39159 if (MAYBE_SSE_CLASS_P (class1
))
39161 if (GET_MODE_BITSIZE (mode
) <= 128)
39162 return ix86_cost
->xmm_move
;
39163 if (GET_MODE_BITSIZE (mode
) <= 256)
39164 return ix86_cost
->ymm_move
;
39165 return ix86_cost
->zmm_move
;
39167 if (MAYBE_MMX_CLASS_P (class1
))
39168 return ix86_cost
->mmx_move
;
39172 /* Implement TARGET_HARD_REGNO_NREGS. This is ordinarily the length in
39173 words of a value of mode MODE but can be less for certain modes in
39174 special long registers.
39176 Actually there are no two word move instructions for consecutive
39177 registers. And only registers 0-3 may have mov byte instructions
39178 applied to them. */
39180 static unsigned int
39181 ix86_hard_regno_nregs (unsigned int regno
, machine_mode mode
)
39183 if (GENERAL_REGNO_P (regno
))
39185 if (mode
== XFmode
)
39186 return TARGET_64BIT
? 2 : 3;
39187 if (mode
== XCmode
)
39188 return TARGET_64BIT
? 4 : 6;
39189 return CEIL (GET_MODE_SIZE (mode
), UNITS_PER_WORD
);
39191 if (COMPLEX_MODE_P (mode
))
39193 if (mode
== V64SFmode
|| mode
== V64SImode
)
39198 /* Implement TARGET_HARD_REGNO_MODE_OK. */
39201 ix86_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
39203 /* Flags and only flags can only hold CCmode values. */
39204 if (CC_REGNO_P (regno
))
39205 return GET_MODE_CLASS (mode
) == MODE_CC
;
39206 if (GET_MODE_CLASS (mode
) == MODE_CC
39207 || GET_MODE_CLASS (mode
) == MODE_RANDOM
39208 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
39210 if (STACK_REGNO_P (regno
))
39211 return VALID_FP_MODE_P (mode
);
39212 if (MASK_REGNO_P (regno
))
39213 return (VALID_MASK_REG_MODE (mode
)
39214 || (TARGET_AVX512BW
39215 && VALID_MASK_AVX512BW_MODE (mode
)));
39216 if (BND_REGNO_P (regno
))
39217 return VALID_BND_REG_MODE (mode
);
39218 if (SSE_REGNO_P (regno
))
39220 /* We implement the move patterns for all vector modes into and
39221 out of SSE registers, even when no operation instructions
39224 /* For AVX-512 we allow, regardless of regno:
39226 - any of 512-bit wide vector mode
39227 - any scalar mode. */
39230 || VALID_AVX512F_REG_MODE (mode
)
39231 || VALID_AVX512F_SCALAR_MODE (mode
)))
39234 /* For AVX-5124FMAPS allow V64SFmode for special regnos. */
39235 if ((TARGET_AVX5124FMAPS
|| TARGET_AVX5124VNNIW
)
39236 && MOD4_SSE_REGNO_P (regno
)
39237 && mode
== V64SFmode
)
39240 /* For AVX-5124VNNIW allow V64SImode for special regnos. */
39241 if ((TARGET_AVX5124FMAPS
|| TARGET_AVX5124VNNIW
)
39242 && MOD4_SSE_REGNO_P (regno
)
39243 && mode
== V64SImode
)
39246 /* TODO check for QI/HI scalars. */
39247 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
39248 if (TARGET_AVX512VL
39251 || VALID_AVX256_REG_MODE (mode
)
39252 || VALID_AVX512VL_128_REG_MODE (mode
)))
39255 /* xmm16-xmm31 are only available for AVX-512. */
39256 if (EXT_REX_SSE_REGNO_P (regno
))
39259 /* OImode and AVX modes are available only when AVX is enabled. */
39260 return ((TARGET_AVX
39261 && VALID_AVX256_REG_OR_OI_MODE (mode
))
39262 || VALID_SSE_REG_MODE (mode
)
39263 || VALID_SSE2_REG_MODE (mode
)
39264 || VALID_MMX_REG_MODE (mode
)
39265 || VALID_MMX_REG_MODE_3DNOW (mode
));
39267 if (MMX_REGNO_P (regno
))
39269 /* We implement the move patterns for 3DNOW modes even in MMX mode,
39270 so if the register is available at all, then we can move data of
39271 the given mode into or out of it. */
39272 return (VALID_MMX_REG_MODE (mode
)
39273 || VALID_MMX_REG_MODE_3DNOW (mode
));
39276 if (mode
== QImode
)
39278 /* Take care for QImode values - they can be in non-QI regs,
39279 but then they do cause partial register stalls. */
39280 if (ANY_QI_REGNO_P (regno
))
39282 if (!TARGET_PARTIAL_REG_STALL
)
39284 /* LRA checks if the hard register is OK for the given mode.
39285 QImode values can live in non-QI regs, so we allow all
39287 if (lra_in_progress
)
39289 return !can_create_pseudo_p ();
39291 /* We handle both integer and floats in the general purpose registers. */
39292 else if (VALID_INT_MODE_P (mode
))
39294 else if (VALID_FP_MODE_P (mode
))
39296 else if (VALID_DFP_MODE_P (mode
))
39298 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
39299 on to use that value in smaller contexts, this can easily force a
39300 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
39301 supporting DImode, allow it. */
39302 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
39308 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The only ABI that
39309 saves SSE registers across calls is Win64 (thus no need to check the
39310 current ABI here), and with AVX enabled Win64 only guarantees that
39311 the low 16 bytes are saved. */
39314 ix86_hard_regno_call_part_clobbered (unsigned int regno
, machine_mode mode
)
39316 return SSE_REGNO_P (regno
) && GET_MODE_SIZE (mode
) > 16;
39319 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
39320 tieable integer mode. */
39323 ix86_tieable_integer_mode_p (machine_mode mode
)
39332 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
39335 return TARGET_64BIT
;
39342 /* Implement TARGET_MODES_TIEABLE_P.
39344 Return true if MODE1 is accessible in a register that can hold MODE2
39345 without copying. That is, all register classes that can hold MODE2
39346 can also hold MODE1. */
39349 ix86_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
39351 if (mode1
== mode2
)
39354 if (ix86_tieable_integer_mode_p (mode1
)
39355 && ix86_tieable_integer_mode_p (mode2
))
39358 /* MODE2 being XFmode implies fp stack or general regs, which means we
39359 can tie any smaller floating point modes to it. Note that we do not
39360 tie this with TFmode. */
39361 if (mode2
== XFmode
)
39362 return mode1
== SFmode
|| mode1
== DFmode
;
39364 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
39365 that we can tie it with SFmode. */
39366 if (mode2
== DFmode
)
39367 return mode1
== SFmode
;
39369 /* If MODE2 is only appropriate for an SSE register, then tie with
39370 any other mode acceptable to SSE registers. */
39371 if (GET_MODE_SIZE (mode2
) == 32
39372 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
39373 return (GET_MODE_SIZE (mode1
) == 32
39374 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
39375 if (GET_MODE_SIZE (mode2
) == 16
39376 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
39377 return (GET_MODE_SIZE (mode1
) == 16
39378 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
39380 /* If MODE2 is appropriate for an MMX register, then tie
39381 with any other mode acceptable to MMX registers. */
39382 if (GET_MODE_SIZE (mode2
) == 8
39383 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
39384 return (GET_MODE_SIZE (mode1
) == 8
39385 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
39390 /* Return the cost of moving between two registers of mode MODE. */
39393 ix86_set_reg_reg_cost (machine_mode mode
)
39395 unsigned int units
= UNITS_PER_WORD
;
39397 switch (GET_MODE_CLASS (mode
))
39403 units
= GET_MODE_SIZE (CCmode
);
39407 if ((TARGET_SSE
&& mode
== TFmode
)
39408 || (TARGET_80387
&& mode
== XFmode
)
39409 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DFmode
)
39410 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SFmode
))
39411 units
= GET_MODE_SIZE (mode
);
39414 case MODE_COMPLEX_FLOAT
:
39415 if ((TARGET_SSE
&& mode
== TCmode
)
39416 || (TARGET_80387
&& mode
== XCmode
)
39417 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DCmode
)
39418 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SCmode
))
39419 units
= GET_MODE_SIZE (mode
);
39422 case MODE_VECTOR_INT
:
39423 case MODE_VECTOR_FLOAT
:
39424 if ((TARGET_AVX512F
&& VALID_AVX512F_REG_MODE (mode
))
39425 || (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
39426 || (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
39427 || (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
39428 || (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
)))
39429 units
= GET_MODE_SIZE (mode
);
39432 /* Return the cost of moving between two registers of mode MODE,
39433 assuming that the move will be in pieces of at most UNITS bytes. */
39434 return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode
), units
));
39437 /* Return cost of vector operation in MODE given that scalar version has
39438 COST. If PARALLEL is true assume that CPU has more than one unit
39439 performing the operation. */
39442 ix86_vec_cost (machine_mode mode
, int cost
, bool parallel
)
39444 if (!VECTOR_MODE_P (mode
))
39448 return cost
* GET_MODE_NUNITS (mode
);
39449 if (GET_MODE_BITSIZE (mode
) == 128
39450 && TARGET_SSE_SPLIT_REGS
)
39452 if (GET_MODE_BITSIZE (mode
) > 128
39453 && TARGET_AVX128_OPTIMAL
)
39454 return cost
* GET_MODE_BITSIZE (mode
) / 128;
39458 /* Return cost of multiplication in MODE. */
39461 ix86_multiplication_cost (const struct processor_costs
*cost
,
39462 enum machine_mode mode
)
39464 machine_mode inner_mode
= mode
;
39465 if (VECTOR_MODE_P (mode
))
39466 inner_mode
= GET_MODE_INNER (mode
);
39468 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
39469 return inner_mode
== DFmode
? cost
->mulsd
: cost
->mulss
;
39470 else if (X87_FLOAT_MODE_P (mode
))
39472 else if (FLOAT_MODE_P (mode
))
39473 return ix86_vec_cost (mode
,
39474 inner_mode
== DFmode
39475 ? cost
->mulsd
: cost
->mulss
, true);
39476 else if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
39478 /* V*QImode is emulated with 7-13 insns. */
39479 if (mode
== V16QImode
|| mode
== V32QImode
)
39482 if (TARGET_XOP
&& mode
== V16QImode
)
39484 else if (TARGET_SSSE3
)
39486 return ix86_vec_cost (mode
,
39487 cost
->mulss
* 2 + cost
->sse_op
* extra
,
39490 /* V*DImode is emulated with 5-8 insns. */
39491 else if (mode
== V2DImode
|| mode
== V4DImode
)
39493 if (TARGET_XOP
&& mode
== V2DImode
)
39494 return ix86_vec_cost (mode
,
39495 cost
->mulss
* 2 + cost
->sse_op
* 3,
39498 return ix86_vec_cost (mode
,
39499 cost
->mulss
* 3 + cost
->sse_op
* 5,
39502 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
39503 insns, including two PMULUDQ. */
39504 else if (mode
== V4SImode
&& !(TARGET_SSE4_1
|| TARGET_AVX
))
39505 return ix86_vec_cost (mode
, cost
->mulss
* 2 + cost
->sse_op
* 5,
39508 return ix86_vec_cost (mode
, cost
->mulss
, true);
39511 return (cost
->mult_init
[MODE_INDEX (mode
)] + cost
->mult_bit
* 7);
39514 /* Return cost of multiplication in MODE. */
39517 ix86_division_cost (const struct processor_costs
*cost
,
39518 enum machine_mode mode
)
39520 machine_mode inner_mode
= mode
;
39521 if (VECTOR_MODE_P (mode
))
39522 inner_mode
= GET_MODE_INNER (mode
);
39524 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
39525 return inner_mode
== DFmode
? cost
->divsd
: cost
->divss
;
39526 else if (X87_FLOAT_MODE_P (mode
))
39528 else if (FLOAT_MODE_P (mode
))
39529 return ix86_vec_cost (mode
,
39530 inner_mode
== DFmode
? cost
->divsd
: cost
->divss
,
39533 return cost
->divide
[MODE_INDEX (mode
)];
39536 /* Return cost of shift in MODE.
39537 If CONSTANT_OP1 is true, the op1 value is known and set in OP1_VAL.
39538 AND_IN_OP1 specify in op1 is result of and and SHIFT_AND_TRUNCATE
39539 if op1 is a result of subreg.
39541 SKIP_OP0/1 is set to true if cost of OP0/1 should be ignored. */
39544 ix86_shift_rotate_cost (const struct processor_costs
*cost
,
39545 enum machine_mode mode
, bool constant_op1
,
39546 HOST_WIDE_INT op1_val
,
39549 bool shift_and_truncate
,
39550 bool *skip_op0
, bool *skip_op1
)
39553 *skip_op0
= *skip_op1
= false;
39554 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
39556 /* V*QImode is emulated with 1-11 insns. */
39557 if (mode
== V16QImode
|| mode
== V32QImode
)
39560 if (TARGET_XOP
&& mode
== V16QImode
)
39562 /* For XOP we use vpshab, which requires a broadcast of the
39563 value to the variable shift insn. For constants this
39564 means a V16Q const in mem; even when we can perform the
39565 shift with one insn set the cost to prefer paddb. */
39570 return ix86_vec_cost (mode
,
39575 (GET_MODE_UNIT_SIZE (mode
))), true);
39579 else if (TARGET_SSSE3
)
39581 return ix86_vec_cost (mode
, cost
->sse_op
* count
, true);
39584 return ix86_vec_cost (mode
, cost
->sse_op
, true);
39586 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
39591 return cost
->shift_const
+ COSTS_N_INSNS (2);
39593 return cost
->shift_const
* 2;
39598 return cost
->shift_var
* 2;
39600 return cost
->shift_var
* 6 + COSTS_N_INSNS (2);
39606 return cost
->shift_const
;
39607 else if (shift_and_truncate
)
39610 *skip_op0
= *skip_op1
= true;
39611 /* Return the cost after shift-and truncation. */
39612 return cost
->shift_var
;
39615 return cost
->shift_var
;
39617 return cost
->shift_const
;
39620 /* Compute a (partial) cost for rtx X. Return true if the complete
39621 cost has been computed, and false if subexpressions should be
39622 scanned. In either case, *TOTAL contains the cost result. */
39625 ix86_rtx_costs (rtx x
, machine_mode mode
, int outer_code_i
, int opno
,
39626 int *total
, bool speed
)
39629 enum rtx_code code
= GET_CODE (x
);
39630 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
39631 const struct processor_costs
*cost
= speed
? ix86_cost
: &ix86_size_cost
;
39637 if (register_operand (SET_DEST (x
), VOIDmode
)
39638 && reg_or_0_operand (SET_SRC (x
), VOIDmode
))
39640 *total
= ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x
)));
39644 if (register_operand (SET_SRC (x
), VOIDmode
))
39645 /* Avoid potentially incorrect high cost from rtx_costs
39646 for non-tieable SUBREGs. */
39650 src_cost
= rtx_cost (SET_SRC (x
), mode
, SET
, 1, speed
);
39652 if (CONSTANT_P (SET_SRC (x
)))
39653 /* Constant costs assume a base value of COSTS_N_INSNS (1) and add
39654 a small value, possibly zero for cheap constants. */
39655 src_cost
+= COSTS_N_INSNS (1);
39658 *total
= src_cost
+ rtx_cost (SET_DEST (x
), mode
, SET
, 0, speed
);
39665 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
39667 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
39669 else if (flag_pic
&& SYMBOLIC_CONST (x
)
39671 && (GET_CODE (x
) == LABEL_REF
39672 || (GET_CODE (x
) == SYMBOL_REF
39673 && SYMBOL_REF_LOCAL_P (x
))))
39674 /* Use 0 cost for CONST to improve its propagation. */
39675 && (TARGET_64BIT
|| GET_CODE (x
) != CONST
))
39682 if (IS_STACK_MODE (mode
))
39683 switch (standard_80387_constant_p (x
))
39691 default: /* Other constants */
39698 switch (standard_sse_constant_p (x
, mode
))
39702 case 1: /* 0: xor eliminates false dependency */
39705 default: /* -1: cmp contains false dependency */
39711 case CONST_WIDE_INT
:
39712 /* Fall back to (MEM (SYMBOL_REF)), since that's where
39713 it'll probably end up. Add a penalty for size. */
39714 *total
= (COSTS_N_INSNS (1)
39715 + (!TARGET_64BIT
&& flag_pic
)
39716 + (GET_MODE_SIZE (mode
) <= 4
39717 ? 0 : GET_MODE_SIZE (mode
) <= 8 ? 1 : 2));
39721 /* The zero extensions is often completely free on x86_64, so make
39722 it as cheap as possible. */
39723 if (TARGET_64BIT
&& mode
== DImode
39724 && GET_MODE (XEXP (x
, 0)) == SImode
)
39726 else if (TARGET_ZERO_EXTEND_WITH_AND
)
39727 *total
= cost
->add
;
39729 *total
= cost
->movzx
;
39733 *total
= cost
->movsx
;
39737 if (SCALAR_INT_MODE_P (mode
)
39738 && GET_MODE_SIZE (mode
) < UNITS_PER_WORD
39739 && CONST_INT_P (XEXP (x
, 1)))
39741 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
39744 *total
= cost
->add
;
39747 if ((value
== 2 || value
== 3)
39748 && cost
->lea
<= cost
->shift_const
)
39750 *total
= cost
->lea
;
39760 bool skip_op0
, skip_op1
;
39761 *total
= ix86_shift_rotate_cost (cost
, mode
, CONSTANT_P (XEXP (x
, 1)),
39762 CONST_INT_P (XEXP (x
, 1))
39763 ? INTVAL (XEXP (x
, 1)) : -1,
39765 GET_CODE (XEXP (x
, 1)) == AND
,
39766 SUBREG_P (XEXP (x
, 1))
39767 && GET_CODE (XEXP (XEXP (x
, 1), 0)) == AND
,
39768 &skip_op0
, &skip_op1
);
39769 if (skip_op0
|| skip_op1
)
39772 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
39774 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed
);
39783 gcc_assert (FLOAT_MODE_P (mode
));
39784 gcc_assert (TARGET_FMA
|| TARGET_FMA4
|| TARGET_AVX512F
);
39786 *total
= ix86_vec_cost (mode
,
39787 mode
== SFmode
? cost
->fmass
: cost
->fmasd
,
39789 *total
+= rtx_cost (XEXP (x
, 1), mode
, FMA
, 1, speed
);
39791 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
39793 if (GET_CODE (sub
) == NEG
)
39794 sub
= XEXP (sub
, 0);
39795 *total
+= rtx_cost (sub
, mode
, FMA
, 0, speed
);
39798 if (GET_CODE (sub
) == NEG
)
39799 sub
= XEXP (sub
, 0);
39800 *total
+= rtx_cost (sub
, mode
, FMA
, 2, speed
);
39805 if (!FLOAT_MODE_P (mode
) && !VECTOR_MODE_P (mode
))
39807 rtx op0
= XEXP (x
, 0);
39808 rtx op1
= XEXP (x
, 1);
39810 if (CONST_INT_P (XEXP (x
, 1)))
39812 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
39813 for (nbits
= 0; value
!= 0; value
&= value
- 1)
39817 /* This is arbitrary. */
39820 /* Compute costs correctly for widening multiplication. */
39821 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
39822 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
39823 == GET_MODE_SIZE (mode
))
39825 int is_mulwiden
= 0;
39826 machine_mode inner_mode
= GET_MODE (op0
);
39828 if (GET_CODE (op0
) == GET_CODE (op1
))
39829 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
39830 else if (CONST_INT_P (op1
))
39832 if (GET_CODE (op0
) == SIGN_EXTEND
)
39833 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
39836 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
39840 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
39843 *total
= (cost
->mult_init
[MODE_INDEX (mode
)]
39844 + nbits
* cost
->mult_bit
39845 + rtx_cost (op0
, mode
, outer_code
, opno
, speed
)
39846 + rtx_cost (op1
, mode
, outer_code
, opno
, speed
));
39850 *total
= ix86_multiplication_cost (cost
, mode
);
39857 *total
= ix86_division_cost (cost
, mode
);
39861 if (GET_MODE_CLASS (mode
) == MODE_INT
39862 && GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
)
39864 if (GET_CODE (XEXP (x
, 0)) == PLUS
39865 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
39866 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
39867 && CONSTANT_P (XEXP (x
, 1)))
39869 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
39870 if (val
== 2 || val
== 4 || val
== 8)
39872 *total
= cost
->lea
;
39873 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), mode
,
39874 outer_code
, opno
, speed
);
39875 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
39876 outer_code
, opno
, speed
);
39877 *total
+= rtx_cost (XEXP (x
, 1), mode
,
39878 outer_code
, opno
, speed
);
39882 else if (GET_CODE (XEXP (x
, 0)) == MULT
39883 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
39885 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
39886 if (val
== 2 || val
== 4 || val
== 8)
39888 *total
= cost
->lea
;
39889 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
39890 outer_code
, opno
, speed
);
39891 *total
+= rtx_cost (XEXP (x
, 1), mode
,
39892 outer_code
, opno
, speed
);
39896 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
39898 /* Add with carry, ignore the cost of adding a carry flag. */
39899 if (ix86_carry_flag_operator (XEXP (XEXP (x
, 0), 0), mode
))
39900 *total
= cost
->add
;
39903 *total
= cost
->lea
;
39904 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
39905 outer_code
, opno
, speed
);
39908 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), mode
,
39909 outer_code
, opno
, speed
);
39910 *total
+= rtx_cost (XEXP (x
, 1), mode
,
39911 outer_code
, opno
, speed
);
39918 /* Subtract with borrow, ignore the cost of subtracting a carry flag. */
39919 if (GET_MODE_CLASS (mode
) == MODE_INT
39920 && GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
39921 && GET_CODE (XEXP (x
, 0)) == MINUS
39922 && ix86_carry_flag_operator (XEXP (XEXP (x
, 0), 1), mode
))
39924 *total
= cost
->add
;
39925 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
39926 outer_code
, opno
, speed
);
39927 *total
+= rtx_cost (XEXP (x
, 1), mode
,
39928 outer_code
, opno
, speed
);
39932 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
39934 *total
= cost
->addss
;
39937 else if (X87_FLOAT_MODE_P (mode
))
39939 *total
= cost
->fadd
;
39942 else if (FLOAT_MODE_P (mode
))
39944 *total
= ix86_vec_cost (mode
, cost
->addss
, true);
39952 if (GET_MODE_CLASS (mode
) == MODE_INT
39953 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
39955 *total
= (cost
->add
* 2
39956 + (rtx_cost (XEXP (x
, 0), mode
, outer_code
, opno
, speed
)
39957 << (GET_MODE (XEXP (x
, 0)) != DImode
))
39958 + (rtx_cost (XEXP (x
, 1), mode
, outer_code
, opno
, speed
)
39959 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
39965 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
39967 *total
= cost
->sse_op
;
39970 else if (X87_FLOAT_MODE_P (mode
))
39972 *total
= cost
->fchs
;
39975 else if (FLOAT_MODE_P (mode
))
39977 *total
= ix86_vec_cost (mode
, cost
->sse_op
, true);
39983 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
39984 *total
= ix86_vec_cost (mode
, cost
->sse_op
, true);
39985 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
39986 *total
= cost
->add
* 2;
39988 *total
= cost
->add
;
39992 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
39993 && XEXP (XEXP (x
, 0), 1) == const1_rtx
39994 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
39995 && XEXP (x
, 1) == const0_rtx
)
39997 /* This kind of construct is implemented using test[bwl].
39998 Treat it as if we had an AND. */
39999 mode
= GET_MODE (XEXP (XEXP (x
, 0), 0));
40000 *total
= (cost
->add
40001 + rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, outer_code
,
40003 + rtx_cost (const1_rtx
, mode
, outer_code
, opno
, speed
));
40007 /* The embedded comparison operand is completely free. */
40008 if (!general_operand (XEXP (x
, 0), GET_MODE (XEXP (x
, 0)))
40009 && XEXP (x
, 1) == const0_rtx
)
40015 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
40018 *total
= ix86_vec_cost (mode
, cost
->addss
, true);
40021 case FLOAT_TRUNCATE
:
40022 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
40023 *total
= cost
->fadd
;
40025 *total
= ix86_vec_cost (mode
, cost
->addss
, true);
40029 /* SSE requires memory load for the constant operand. It may make
40030 sense to account for this. Of course the constant operand may or
40031 may not be reused. */
40032 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
40033 *total
= cost
->sse_op
;
40034 else if (X87_FLOAT_MODE_P (mode
))
40035 *total
= cost
->fabs
;
40036 else if (FLOAT_MODE_P (mode
))
40037 *total
= ix86_vec_cost (mode
, cost
->sse_op
, true);
40041 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
40042 *total
= mode
== SFmode
? cost
->sqrtss
: cost
->sqrtsd
;
40043 else if (X87_FLOAT_MODE_P (mode
))
40044 *total
= cost
->fsqrt
;
40045 else if (FLOAT_MODE_P (mode
))
40046 *total
= ix86_vec_cost (mode
,
40047 mode
== SFmode
? cost
->sqrtss
: cost
->sqrtsd
,
40052 if (XINT (x
, 1) == UNSPEC_TP
)
40058 case VEC_DUPLICATE
:
40059 /* ??? Assume all of these vector manipulation patterns are
40060 recognizable. In which case they all pretty much have the
40062 *total
= cost
->sse_op
;
40065 mask
= XEXP (x
, 2);
40066 /* This is masked instruction, assume the same cost,
40067 as nonmasked variant. */
40068 if (TARGET_AVX512F
&& register_operand (mask
, GET_MODE (mask
)))
40069 *total
= rtx_cost (XEXP (x
, 0), mode
, outer_code
, opno
, speed
);
40071 *total
= cost
->sse_op
;
40081 static int current_machopic_label_num
;
40083 /* Given a symbol name and its associated stub, write out the
40084 definition of the stub. */
40087 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
40089 unsigned int length
;
40090 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
40091 int label
= ++current_machopic_label_num
;
40093 /* For 64-bit we shouldn't get here. */
40094 gcc_assert (!TARGET_64BIT
);
40096 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
40097 symb
= targetm
.strip_name_encoding (symb
);
40099 length
= strlen (stub
);
40100 binder_name
= XALLOCAVEC (char, length
+ 32);
40101 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
40103 length
= strlen (symb
);
40104 symbol_name
= XALLOCAVEC (char, length
+ 32);
40105 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
40107 sprintf (lazy_ptr_name
, "L%d$lz", label
);
40109 if (MACHOPIC_ATT_STUB
)
40110 switch_to_section (darwin_sections
[machopic_picsymbol_stub3_section
]);
40111 else if (MACHOPIC_PURE
)
40112 switch_to_section (darwin_sections
[machopic_picsymbol_stub2_section
]);
40114 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
40116 fprintf (file
, "%s:\n", stub
);
40117 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
40119 if (MACHOPIC_ATT_STUB
)
40121 fprintf (file
, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
40123 else if (MACHOPIC_PURE
)
40126 /* 25-byte PIC stub using "CALL get_pc_thunk". */
40127 rtx tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
40128 output_set_got (tmp
, NULL_RTX
); /* "CALL ___<cpu>.get_pc_thunk.cx". */
40129 fprintf (file
, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
40130 label
, lazy_ptr_name
, label
);
40131 fprintf (file
, "\tjmp\t*%%ecx\n");
40134 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
40136 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
40137 it needs no stub-binding-helper. */
40138 if (MACHOPIC_ATT_STUB
)
40141 fprintf (file
, "%s:\n", binder_name
);
40145 fprintf (file
, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name
, binder_name
);
40146 fprintf (file
, "\tpushl\t%%ecx\n");
40149 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
40151 fputs ("\tjmp\tdyld_stub_binding_helper\n", file
);
40153 /* N.B. Keep the correspondence of these
40154 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
40155 old-pic/new-pic/non-pic stubs; altering this will break
40156 compatibility with existing dylibs. */
40159 /* 25-byte PIC stub using "CALL get_pc_thunk". */
40160 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr2_section
]);
40163 /* 16-byte -mdynamic-no-pic stub. */
40164 switch_to_section(darwin_sections
[machopic_lazy_symbol_ptr3_section
]);
40166 fprintf (file
, "%s:\n", lazy_ptr_name
);
40167 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
40168 fprintf (file
, ASM_LONG
"%s\n", binder_name
);
40170 #endif /* TARGET_MACHO */
40172 /* Order the registers for register allocator. */
40175 x86_order_regs_for_local_alloc (void)
40180 /* First allocate the local general purpose registers. */
40181 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
40182 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
40183 reg_alloc_order
[pos
++] = i
;
40185 /* Global general purpose registers. */
40186 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
40187 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
40188 reg_alloc_order
[pos
++] = i
;
40190 /* x87 registers come first in case we are doing FP math
40192 if (!TARGET_SSE_MATH
)
40193 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
40194 reg_alloc_order
[pos
++] = i
;
40196 /* SSE registers. */
40197 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
40198 reg_alloc_order
[pos
++] = i
;
40199 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
40200 reg_alloc_order
[pos
++] = i
;
40202 /* Extended REX SSE registers. */
40203 for (i
= FIRST_EXT_REX_SSE_REG
; i
<= LAST_EXT_REX_SSE_REG
; i
++)
40204 reg_alloc_order
[pos
++] = i
;
40206 /* Mask register. */
40207 for (i
= FIRST_MASK_REG
; i
<= LAST_MASK_REG
; i
++)
40208 reg_alloc_order
[pos
++] = i
;
40210 /* MPX bound registers. */
40211 for (i
= FIRST_BND_REG
; i
<= LAST_BND_REG
; i
++)
40212 reg_alloc_order
[pos
++] = i
;
40214 /* x87 registers. */
40215 if (TARGET_SSE_MATH
)
40216 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
40217 reg_alloc_order
[pos
++] = i
;
40219 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
40220 reg_alloc_order
[pos
++] = i
;
40222 /* Initialize the rest of array as we do not allocate some registers
40224 while (pos
< FIRST_PSEUDO_REGISTER
)
40225 reg_alloc_order
[pos
++] = 0;
40228 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
40229 in struct attribute_spec handler. */
40231 ix86_handle_callee_pop_aggregate_return (tree
*node
, tree name
, tree args
, int,
40232 bool *no_add_attrs
)
40234 if (TREE_CODE (*node
) != FUNCTION_TYPE
40235 && TREE_CODE (*node
) != METHOD_TYPE
40236 && TREE_CODE (*node
) != FIELD_DECL
40237 && TREE_CODE (*node
) != TYPE_DECL
)
40239 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
40241 *no_add_attrs
= true;
40246 warning (OPT_Wattributes
, "%qE attribute only available for 32-bit",
40248 *no_add_attrs
= true;
40251 if (is_attribute_p ("callee_pop_aggregate_return", name
))
40255 cst
= TREE_VALUE (args
);
40256 if (TREE_CODE (cst
) != INTEGER_CST
)
40258 warning (OPT_Wattributes
,
40259 "%qE attribute requires an integer constant argument",
40261 *no_add_attrs
= true;
40263 else if (compare_tree_int (cst
, 0) != 0
40264 && compare_tree_int (cst
, 1) != 0)
40266 warning (OPT_Wattributes
,
40267 "argument to %qE attribute is neither zero, nor one",
40269 *no_add_attrs
= true;
40278 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
40279 struct attribute_spec.handler. */
40281 ix86_handle_abi_attribute (tree
*node
, tree name
, tree
, int,
40282 bool *no_add_attrs
)
40284 if (TREE_CODE (*node
) != FUNCTION_TYPE
40285 && TREE_CODE (*node
) != METHOD_TYPE
40286 && TREE_CODE (*node
) != FIELD_DECL
40287 && TREE_CODE (*node
) != TYPE_DECL
)
40289 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
40291 *no_add_attrs
= true;
40295 /* Can combine regparm with all attributes but fastcall. */
40296 if (is_attribute_p ("ms_abi", name
))
40298 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node
)))
40300 error ("ms_abi and sysv_abi attributes are not compatible");
40305 else if (is_attribute_p ("sysv_abi", name
))
40307 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node
)))
40309 error ("ms_abi and sysv_abi attributes are not compatible");
40318 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
40319 struct attribute_spec.handler. */
40321 ix86_handle_struct_attribute (tree
*node
, tree name
, tree
, int,
40322 bool *no_add_attrs
)
40325 if (DECL_P (*node
))
40327 if (TREE_CODE (*node
) == TYPE_DECL
)
40328 type
= &TREE_TYPE (*node
);
40333 if (!(type
&& RECORD_OR_UNION_TYPE_P (*type
)))
40335 warning (OPT_Wattributes
, "%qE attribute ignored",
40337 *no_add_attrs
= true;
40340 else if ((is_attribute_p ("ms_struct", name
)
40341 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
40342 || ((is_attribute_p ("gcc_struct", name
)
40343 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
40345 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
40347 *no_add_attrs
= true;
40354 ix86_handle_fndecl_attribute (tree
*node
, tree name
, tree
, int,
40355 bool *no_add_attrs
)
40357 if (TREE_CODE (*node
) != FUNCTION_DECL
)
40359 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
40361 *no_add_attrs
= true;
40367 ix86_handle_no_caller_saved_registers_attribute (tree
*, tree
, tree
,
40374 ix86_handle_interrupt_attribute (tree
*node
, tree
, tree
, int, bool *)
40376 /* DECL_RESULT and DECL_ARGUMENTS do not exist there yet,
40377 but the function type contains args and return type data. */
40378 tree func_type
= *node
;
40379 tree return_type
= TREE_TYPE (func_type
);
40382 tree current_arg_type
= TYPE_ARG_TYPES (func_type
);
40383 while (current_arg_type
40384 && ! VOID_TYPE_P (TREE_VALUE (current_arg_type
)))
40388 if (! POINTER_TYPE_P (TREE_VALUE (current_arg_type
)))
40389 error ("interrupt service routine should have a pointer "
40390 "as the first argument");
40392 else if (nargs
== 1)
40394 if (TREE_CODE (TREE_VALUE (current_arg_type
)) != INTEGER_TYPE
40395 || TYPE_MODE (TREE_VALUE (current_arg_type
)) != word_mode
)
40396 error ("interrupt service routine should have unsigned %s"
40397 "int as the second argument",
40399 ? (TARGET_X32
? "long long " : "long ")
40403 current_arg_type
= TREE_CHAIN (current_arg_type
);
40405 if (!nargs
|| nargs
> 2)
40406 error ("interrupt service routine can only have a pointer argument "
40407 "and an optional integer argument");
40408 if (! VOID_TYPE_P (return_type
))
40409 error ("interrupt service routine can't have non-void return value");
40415 ix86_ms_bitfield_layout_p (const_tree record_type
)
40417 return ((TARGET_MS_BITFIELD_LAYOUT
40418 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
40419 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
)));
40422 /* Returns an expression indicating where the this parameter is
40423 located on entry to the FUNCTION. */
40426 x86_this_parameter (tree function
)
40428 tree type
= TREE_TYPE (function
);
40429 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
40434 const int *parm_regs
;
40436 if (ix86_function_type_abi (type
) == MS_ABI
)
40437 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
40439 parm_regs
= x86_64_int_parameter_registers
;
40440 return gen_rtx_REG (Pmode
, parm_regs
[aggr
]);
40443 nregs
= ix86_function_regparm (type
, function
);
40445 if (nregs
> 0 && !stdarg_p (type
))
40448 unsigned int ccvt
= ix86_get_callcvt (type
);
40450 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
40451 regno
= aggr
? DX_REG
: CX_REG
;
40452 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
40456 return gen_rtx_MEM (SImode
,
40457 plus_constant (Pmode
, stack_pointer_rtx
, 4));
40466 return gen_rtx_MEM (SImode
,
40467 plus_constant (Pmode
,
40468 stack_pointer_rtx
, 4));
40471 return gen_rtx_REG (SImode
, regno
);
40474 return gen_rtx_MEM (SImode
, plus_constant (Pmode
, stack_pointer_rtx
,
40478 /* Determine whether x86_output_mi_thunk can succeed. */
40481 x86_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT vcall_offset
,
40482 const_tree function
)
40484 /* 64-bit can handle anything. */
40488 /* For 32-bit, everything's fine if we have one free register. */
40489 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
40492 /* Need a free register for vcall_offset. */
40496 /* Need a free register for GOT references. */
40497 if (flag_pic
&& !targetm
.binds_local_p (function
))
40500 /* Otherwise ok. */
40504 /* Output the assembler code for a thunk function. THUNK_DECL is the
40505 declaration for the thunk function itself, FUNCTION is the decl for
40506 the target function. DELTA is an immediate constant offset to be
40507 added to THIS. If VCALL_OFFSET is nonzero, the word at
40508 *(*this + vcall_offset) should be added to THIS. */
40511 x86_output_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
40512 HOST_WIDE_INT vcall_offset
, tree function
)
40514 rtx this_param
= x86_this_parameter (function
);
40515 rtx this_reg
, tmp
, fnaddr
;
40516 unsigned int tmp_regno
;
40520 tmp_regno
= R10_REG
;
40523 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (function
));
40524 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
40525 tmp_regno
= AX_REG
;
40526 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
40527 tmp_regno
= DX_REG
;
40529 tmp_regno
= CX_REG
;
40532 emit_note (NOTE_INSN_PROLOGUE_END
);
40534 /* CET is enabled, insert EB instruction. */
40535 if ((flag_cf_protection
& CF_BRANCH
) && TARGET_IBT
)
40536 emit_insn (gen_nop_endbr ());
40538 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
40539 pull it in now and let DELTA benefit. */
40540 if (REG_P (this_param
))
40541 this_reg
= this_param
;
40542 else if (vcall_offset
)
40544 /* Put the this parameter into %eax. */
40545 this_reg
= gen_rtx_REG (Pmode
, AX_REG
);
40546 emit_move_insn (this_reg
, this_param
);
40549 this_reg
= NULL_RTX
;
40551 /* Adjust the this parameter by a fixed constant. */
40554 rtx delta_rtx
= GEN_INT (delta
);
40555 rtx delta_dst
= this_reg
? this_reg
: this_param
;
40559 if (!x86_64_general_operand (delta_rtx
, Pmode
))
40561 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
40562 emit_move_insn (tmp
, delta_rtx
);
40567 ix86_emit_binop (PLUS
, Pmode
, delta_dst
, delta_rtx
);
40570 /* Adjust the this parameter by a value stored in the vtable. */
40573 rtx vcall_addr
, vcall_mem
, this_mem
;
40575 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
40577 this_mem
= gen_rtx_MEM (ptr_mode
, this_reg
);
40578 if (Pmode
!= ptr_mode
)
40579 this_mem
= gen_rtx_ZERO_EXTEND (Pmode
, this_mem
);
40580 emit_move_insn (tmp
, this_mem
);
40582 /* Adjust the this parameter. */
40583 vcall_addr
= plus_constant (Pmode
, tmp
, vcall_offset
);
40585 && !ix86_legitimate_address_p (ptr_mode
, vcall_addr
, true))
40587 rtx tmp2
= gen_rtx_REG (Pmode
, R11_REG
);
40588 emit_move_insn (tmp2
, GEN_INT (vcall_offset
));
40589 vcall_addr
= gen_rtx_PLUS (Pmode
, tmp
, tmp2
);
40592 vcall_mem
= gen_rtx_MEM (ptr_mode
, vcall_addr
);
40593 if (Pmode
!= ptr_mode
)
40594 emit_insn (gen_addsi_1_zext (this_reg
,
40595 gen_rtx_REG (ptr_mode
,
40599 ix86_emit_binop (PLUS
, Pmode
, this_reg
, vcall_mem
);
40602 /* If necessary, drop THIS back to its stack slot. */
40603 if (this_reg
&& this_reg
!= this_param
)
40604 emit_move_insn (this_param
, this_reg
);
40606 fnaddr
= XEXP (DECL_RTL (function
), 0);
40609 if (!flag_pic
|| targetm
.binds_local_p (function
)
40614 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOTPCREL
);
40615 tmp
= gen_rtx_CONST (Pmode
, tmp
);
40616 fnaddr
= gen_const_mem (Pmode
, tmp
);
40621 if (!flag_pic
|| targetm
.binds_local_p (function
))
40624 else if (TARGET_MACHO
)
40626 fnaddr
= machopic_indirect_call_target (DECL_RTL (function
));
40627 fnaddr
= XEXP (fnaddr
, 0);
40629 #endif /* TARGET_MACHO */
40632 tmp
= gen_rtx_REG (Pmode
, CX_REG
);
40633 output_set_got (tmp
, NULL_RTX
);
40635 fnaddr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOT
);
40636 fnaddr
= gen_rtx_CONST (Pmode
, fnaddr
);
40637 fnaddr
= gen_rtx_PLUS (Pmode
, tmp
, fnaddr
);
40638 fnaddr
= gen_const_mem (Pmode
, fnaddr
);
40642 /* Our sibling call patterns do not allow memories, because we have no
40643 predicate that can distinguish between frame and non-frame memory.
40644 For our purposes here, we can get away with (ab)using a jump pattern,
40645 because we're going to do no optimization. */
40646 if (MEM_P (fnaddr
))
40648 if (sibcall_insn_operand (fnaddr
, word_mode
))
40650 fnaddr
= XEXP (DECL_RTL (function
), 0);
40651 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
40652 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
40653 tmp
= emit_call_insn (tmp
);
40654 SIBLING_CALL_P (tmp
) = 1;
40657 emit_jump_insn (gen_indirect_jump (fnaddr
));
40661 if (ix86_cmodel
== CM_LARGE_PIC
&& SYMBOLIC_CONST (fnaddr
))
40663 // CM_LARGE_PIC always uses pseudo PIC register which is
40664 // uninitialized. Since FUNCTION is local and calling it
40665 // doesn't go through PLT, we use scratch register %r11 as
40666 // PIC register and initialize it here.
40667 pic_offset_table_rtx
= gen_rtx_REG (Pmode
, R11_REG
);
40668 ix86_init_large_pic_reg (tmp_regno
);
40669 fnaddr
= legitimize_pic_address (fnaddr
,
40670 gen_rtx_REG (Pmode
, tmp_regno
));
40673 if (!sibcall_insn_operand (fnaddr
, word_mode
))
40675 tmp
= gen_rtx_REG (word_mode
, tmp_regno
);
40676 if (GET_MODE (fnaddr
) != word_mode
)
40677 fnaddr
= gen_rtx_ZERO_EXTEND (word_mode
, fnaddr
);
40678 emit_move_insn (tmp
, fnaddr
);
40682 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
40683 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
40684 tmp
= emit_call_insn (tmp
);
40685 SIBLING_CALL_P (tmp
) = 1;
40689 /* Emit just enough of rest_of_compilation to get the insns emitted.
40690 Note that use_thunk calls assemble_start_function et al. */
40691 insn
= get_insns ();
40692 shorten_branches (insn
);
40693 final_start_function (insn
, file
, 1);
40694 final (insn
, file
, 1);
40695 final_end_function ();
40699 x86_file_start (void)
40701 default_file_start ();
40703 fputs ("\t.code16gcc\n", asm_out_file
);
40705 darwin_file_start ();
40707 if (X86_FILE_START_VERSION_DIRECTIVE
)
40708 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
40709 if (X86_FILE_START_FLTUSED
)
40710 fputs ("\t.global\t__fltused\n", asm_out_file
);
40711 if (ix86_asm_dialect
== ASM_INTEL
)
40712 fputs ("\t.intel_syntax noprefix\n", asm_out_file
);
40716 x86_field_alignment (tree type
, int computed
)
40720 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
40723 return iamcu_alignment (type
, computed
);
40724 mode
= TYPE_MODE (strip_array_types (type
));
40725 if (mode
== DFmode
|| mode
== DCmode
40726 || GET_MODE_CLASS (mode
) == MODE_INT
40727 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
40728 return MIN (32, computed
);
40732 /* Print call to TARGET to FILE. */
40735 x86_print_call_or_nop (FILE *file
, const char *target
)
40737 if (flag_nop_mcount
)
40738 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
40739 fprintf (file
, "1:" ASM_BYTE
"0x0f, 0x1f, 0x44, 0x00, 0x00\n");
40741 fprintf (file
, "1:\tcall\t%s\n", target
);
40744 /* Output assembler code to FILE to increment profiler label # LABELNO
40745 for profiling a function entry. */
40747 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
40749 const char *mcount_name
= (flag_fentry
? MCOUNT_NAME_BEFORE_PROLOGUE
40753 #ifndef NO_PROFILE_COUNTERS
40754 fprintf (file
, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX
, labelno
);
40757 if (!TARGET_PECOFF
&& flag_pic
)
40758 fprintf (file
, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name
);
40760 x86_print_call_or_nop (file
, mcount_name
);
40764 #ifndef NO_PROFILE_COUNTERS
40765 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER
"\n",
40768 fprintf (file
, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name
);
40772 #ifndef NO_PROFILE_COUNTERS
40773 fprintf (file
, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER
"\n",
40776 x86_print_call_or_nop (file
, mcount_name
);
40779 if (flag_record_mcount
)
40781 fprintf (file
, "\t.section __mcount_loc, \"a\",@progbits\n");
40782 fprintf (file
, "\t.%s 1b\n", TARGET_64BIT
? "quad" : "long");
40783 fprintf (file
, "\t.previous\n");
40787 /* We don't have exact information about the insn sizes, but we may assume
40788 quite safely that we are informed about all 1 byte insns and memory
40789 address sizes. This is enough to eliminate unnecessary padding in
40793 ix86_min_insn_size (rtx_insn
*insn
)
40797 if (!INSN_P (insn
) || !active_insn_p (insn
))
40800 /* Discard alignments we've emit and jump instructions. */
40801 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
40802 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
40805 /* Important case - calls are always 5 bytes.
40806 It is common to have many calls in the row. */
40808 && symbolic_reference_mentioned_p (PATTERN (insn
))
40809 && !SIBLING_CALL_P (insn
))
40811 len
= get_attr_length (insn
);
40815 /* For normal instructions we rely on get_attr_length being exact,
40816 with a few exceptions. */
40817 if (!JUMP_P (insn
))
40819 enum attr_type type
= get_attr_type (insn
);
40824 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
40825 || asm_noperands (PATTERN (insn
)) >= 0)
40832 /* Otherwise trust get_attr_length. */
40836 l
= get_attr_length_address (insn
);
40837 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
40846 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
40848 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
40852 ix86_avoid_jump_mispredicts (void)
40854 rtx_insn
*insn
, *start
= get_insns ();
40855 int nbytes
= 0, njumps
= 0;
40856 bool isjump
= false;
40858 /* Look for all minimal intervals of instructions containing 4 jumps.
40859 The intervals are bounded by START and INSN. NBYTES is the total
40860 size of instructions in the interval including INSN and not including
40861 START. When the NBYTES is smaller than 16 bytes, it is possible
40862 that the end of START and INSN ends up in the same 16byte page.
40864 The smallest offset in the page INSN can start is the case where START
40865 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
40866 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
40868 Don't consider asm goto as jump, while it can contain a jump, it doesn't
40869 have to, control transfer to label(s) can be performed through other
40870 means, and also we estimate minimum length of all asm stmts as 0. */
40871 for (insn
= start
; insn
; insn
= NEXT_INSN (insn
))
40875 if (LABEL_P (insn
))
40877 int align
= label_to_alignment (insn
);
40878 int max_skip
= label_to_max_skip (insn
);
40882 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
40883 already in the current 16 byte page, because otherwise
40884 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
40885 bytes to reach 16 byte boundary. */
40887 || (align
<= 3 && max_skip
!= (1 << align
) - 1))
40890 fprintf (dump_file
, "Label %i with max_skip %i\n",
40891 INSN_UID (insn
), max_skip
);
40894 while (nbytes
+ max_skip
>= 16)
40896 start
= NEXT_INSN (start
);
40897 if ((JUMP_P (start
) && asm_noperands (PATTERN (start
)) < 0)
40899 njumps
--, isjump
= true;
40902 nbytes
-= ix86_min_insn_size (start
);
40908 min_size
= ix86_min_insn_size (insn
);
40909 nbytes
+= min_size
;
40911 fprintf (dump_file
, "Insn %i estimated to %i bytes\n",
40912 INSN_UID (insn
), min_size
);
40913 if ((JUMP_P (insn
) && asm_noperands (PATTERN (insn
)) < 0)
40921 start
= NEXT_INSN (start
);
40922 if ((JUMP_P (start
) && asm_noperands (PATTERN (start
)) < 0)
40924 njumps
--, isjump
= true;
40927 nbytes
-= ix86_min_insn_size (start
);
40929 gcc_assert (njumps
>= 0);
40931 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
40932 INSN_UID (start
), INSN_UID (insn
), nbytes
);
40934 if (njumps
== 3 && isjump
&& nbytes
< 16)
40936 int padsize
= 15 - nbytes
+ ix86_min_insn_size (insn
);
40939 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
40940 INSN_UID (insn
), padsize
);
40941 emit_insn_before (gen_pad (GEN_INT (padsize
)), insn
);
40947 /* AMD Athlon works faster
40948 when RET is not destination of conditional jump or directly preceded
40949 by other jump instruction. We avoid the penalty by inserting NOP just
40950 before the RET instructions in such cases. */
40952 ix86_pad_returns (void)
40957 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
40959 basic_block bb
= e
->src
;
40960 rtx_insn
*ret
= BB_END (bb
);
40962 bool replace
= false;
40964 if (!JUMP_P (ret
) || !ANY_RETURN_P (PATTERN (ret
))
40965 || optimize_bb_for_size_p (bb
))
40967 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
40968 if (active_insn_p (prev
) || LABEL_P (prev
))
40970 if (prev
&& LABEL_P (prev
))
40975 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
40976 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
40977 && !(e
->flags
& EDGE_FALLTHRU
))
40985 prev
= prev_active_insn (ret
);
40987 && ((JUMP_P (prev
) && any_condjump_p (prev
))
40990 /* Empty functions get branch mispredict even when
40991 the jump destination is not visible to us. */
40992 if (!prev
&& !optimize_function_for_size_p (cfun
))
40997 emit_jump_insn_before (gen_simple_return_internal_long (), ret
);
41003 /* Count the minimum number of instructions in BB. Return 4 if the
41004 number of instructions >= 4. */
41007 ix86_count_insn_bb (basic_block bb
)
41010 int insn_count
= 0;
41012 /* Count number of instructions in this block. Return 4 if the number
41013 of instructions >= 4. */
41014 FOR_BB_INSNS (bb
, insn
)
41016 /* Only happen in exit blocks. */
41018 && ANY_RETURN_P (PATTERN (insn
)))
41021 if (NONDEBUG_INSN_P (insn
)
41022 && GET_CODE (PATTERN (insn
)) != USE
41023 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
41026 if (insn_count
>= 4)
41035 /* Count the minimum number of instructions in code path in BB.
41036 Return 4 if the number of instructions >= 4. */
41039 ix86_count_insn (basic_block bb
)
41043 int min_prev_count
;
41045 /* Only bother counting instructions along paths with no
41046 more than 2 basic blocks between entry and exit. Given
41047 that BB has an edge to exit, determine if a predecessor
41048 of BB has an edge from entry. If so, compute the number
41049 of instructions in the predecessor block. If there
41050 happen to be multiple such blocks, compute the minimum. */
41051 min_prev_count
= 4;
41052 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
41055 edge_iterator prev_ei
;
41057 if (e
->src
== ENTRY_BLOCK_PTR_FOR_FN (cfun
))
41059 min_prev_count
= 0;
41062 FOR_EACH_EDGE (prev_e
, prev_ei
, e
->src
->preds
)
41064 if (prev_e
->src
== ENTRY_BLOCK_PTR_FOR_FN (cfun
))
41066 int count
= ix86_count_insn_bb (e
->src
);
41067 if (count
< min_prev_count
)
41068 min_prev_count
= count
;
41074 if (min_prev_count
< 4)
41075 min_prev_count
+= ix86_count_insn_bb (bb
);
41077 return min_prev_count
;
41080 /* Pad short function to 4 instructions. */
41083 ix86_pad_short_function (void)
41088 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
41090 rtx_insn
*ret
= BB_END (e
->src
);
41091 if (JUMP_P (ret
) && ANY_RETURN_P (PATTERN (ret
)))
41093 int insn_count
= ix86_count_insn (e
->src
);
41095 /* Pad short function. */
41096 if (insn_count
< 4)
41098 rtx_insn
*insn
= ret
;
41100 /* Find epilogue. */
41103 || NOTE_KIND (insn
) != NOTE_INSN_EPILOGUE_BEG
))
41104 insn
= PREV_INSN (insn
);
41109 /* Two NOPs count as one instruction. */
41110 insn_count
= 2 * (4 - insn_count
);
41111 emit_insn_before (gen_nops (GEN_INT (insn_count
)), insn
);
41117 /* Fix up a Windows system unwinder issue. If an EH region falls through into
41118 the epilogue, the Windows system unwinder will apply epilogue logic and
41119 produce incorrect offsets. This can be avoided by adding a nop between
41120 the last insn that can throw and the first insn of the epilogue. */
41123 ix86_seh_fixup_eh_fallthru (void)
41128 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
41130 rtx_insn
*insn
, *next
;
41132 /* Find the beginning of the epilogue. */
41133 for (insn
= BB_END (e
->src
); insn
!= NULL
; insn
= PREV_INSN (insn
))
41134 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_EPILOGUE_BEG
)
41139 /* We only care about preceding insns that can throw. */
41140 insn
= prev_active_insn (insn
);
41141 if (insn
== NULL
|| !can_throw_internal (insn
))
41144 /* Do not separate calls from their debug information. */
41145 for (next
= NEXT_INSN (insn
); next
!= NULL
; next
= NEXT_INSN (next
))
41147 && (NOTE_KIND (next
) == NOTE_INSN_VAR_LOCATION
41148 || NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
))
41153 emit_insn_after (gen_nops (const1_rtx
), insn
);
41157 /* Given a register number BASE, the lowest of a group of registers, update
41158 regsets IN and OUT with the registers that should be avoided in input
41159 and output operands respectively when trying to avoid generating a modr/m
41160 byte for -fmitigate-rop. */
41163 set_rop_modrm_reg_bits (int base
, HARD_REG_SET
&in
, HARD_REG_SET
&out
)
41165 SET_HARD_REG_BIT (out
, base
);
41166 SET_HARD_REG_BIT (out
, base
+ 1);
41167 SET_HARD_REG_BIT (in
, base
+ 2);
41168 SET_HARD_REG_BIT (in
, base
+ 3);
41171 /* Called if -fmitigate_rop is in effect. Try to rewrite instructions so
41172 that certain encodings of modr/m bytes do not occur. */
41174 ix86_mitigate_rop (void)
41176 HARD_REG_SET input_risky
;
41177 HARD_REG_SET output_risky
;
41178 HARD_REG_SET inout_risky
;
41180 CLEAR_HARD_REG_SET (output_risky
);
41181 CLEAR_HARD_REG_SET (input_risky
);
41182 SET_HARD_REG_BIT (output_risky
, AX_REG
);
41183 SET_HARD_REG_BIT (output_risky
, CX_REG
);
41184 SET_HARD_REG_BIT (input_risky
, BX_REG
);
41185 SET_HARD_REG_BIT (input_risky
, DX_REG
);
41186 set_rop_modrm_reg_bits (FIRST_SSE_REG
, input_risky
, output_risky
);
41187 set_rop_modrm_reg_bits (FIRST_REX_INT_REG
, input_risky
, output_risky
);
41188 set_rop_modrm_reg_bits (FIRST_REX_SSE_REG
, input_risky
, output_risky
);
41189 set_rop_modrm_reg_bits (FIRST_EXT_REX_SSE_REG
, input_risky
, output_risky
);
41190 set_rop_modrm_reg_bits (FIRST_MASK_REG
, input_risky
, output_risky
);
41191 set_rop_modrm_reg_bits (FIRST_BND_REG
, input_risky
, output_risky
);
41192 COPY_HARD_REG_SET (inout_risky
, input_risky
);
41193 IOR_HARD_REG_SET (inout_risky
, output_risky
);
41195 df_note_add_problem ();
41196 /* Fix up what stack-regs did. */
41197 df_insn_rescan_all ();
41200 regrename_init (true);
41201 regrename_analyze (NULL
);
41203 auto_vec
<du_head_p
> cands
;
41205 for (rtx_insn
*insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
41207 if (!NONDEBUG_INSN_P (insn
))
41210 if (GET_CODE (PATTERN (insn
)) == USE
41211 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
41214 extract_insn (insn
);
41217 int modrm
= ix86_get_modrm_for_rop (insn
, recog_data
.operand
,
41218 recog_data
.n_operands
, &opno0
,
41221 if (!ix86_rop_should_change_byte_p (modrm
))
41224 insn_rr_info
*info
= &insn_rr
[INSN_UID (insn
)];
41226 /* This happens when regrename has to fail a block. */
41227 if (!info
->op_info
)
41230 if (info
->op_info
[opno0
].n_chains
!= 0)
41232 gcc_assert (info
->op_info
[opno0
].n_chains
== 1);
41234 op0c
= regrename_chain_from_id (info
->op_info
[opno0
].heads
[0]->id
);
41235 if (op0c
->target_data_1
+ op0c
->target_data_2
== 0
41236 && !op0c
->cannot_rename
)
41237 cands
.safe_push (op0c
);
41239 op0c
->target_data_1
++;
41241 if (info
->op_info
[opno1
].n_chains
!= 0)
41243 gcc_assert (info
->op_info
[opno1
].n_chains
== 1);
41245 op1c
= regrename_chain_from_id (info
->op_info
[opno1
].heads
[0]->id
);
41246 if (op1c
->target_data_1
+ op1c
->target_data_2
== 0
41247 && !op1c
->cannot_rename
)
41248 cands
.safe_push (op1c
);
41250 op1c
->target_data_2
++;
41256 FOR_EACH_VEC_ELT (cands
, i
, head
)
41258 int old_reg
, best_reg
;
41259 HARD_REG_SET unavailable
;
41261 CLEAR_HARD_REG_SET (unavailable
);
41262 if (head
->target_data_1
)
41263 IOR_HARD_REG_SET (unavailable
, output_risky
);
41264 if (head
->target_data_2
)
41265 IOR_HARD_REG_SET (unavailable
, input_risky
);
41268 reg_class superclass
= regrename_find_superclass (head
, &n_uses
,
41270 old_reg
= head
->regno
;
41271 best_reg
= find_rename_reg (head
, superclass
, &unavailable
,
41273 bool ok
= regrename_do_replace (head
, best_reg
);
41276 fprintf (dump_file
, "Chain %d renamed as %s in %s\n", head
->id
,
41277 reg_names
[best_reg
], reg_class_names
[superclass
]);
41281 regrename_finish ();
41288 INIT_REG_SET (&live
);
41290 FOR_EACH_BB_FN (bb
, cfun
)
41294 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
41295 df_simulate_initialize_backwards (bb
, &live
);
41297 FOR_BB_INSNS_REVERSE (bb
, insn
)
41299 if (!NONDEBUG_INSN_P (insn
))
41302 df_simulate_one_insn_backwards (bb
, insn
, &live
);
41304 if (GET_CODE (PATTERN (insn
)) == USE
41305 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
41308 extract_insn (insn
);
41309 constrain_operands_cached (insn
, reload_completed
);
41311 int modrm
= ix86_get_modrm_for_rop (insn
, recog_data
.operand
,
41312 recog_data
.n_operands
, &opno0
,
41315 || !ix86_rop_should_change_byte_p (modrm
)
41319 rtx oldreg
= recog_data
.operand
[opno1
];
41320 preprocess_constraints (insn
);
41321 const operand_alternative
*alt
= which_op_alt ();
41324 for (i
= 0; i
< recog_data
.n_operands
; i
++)
41326 && alt
[i
].earlyclobber
41327 && reg_overlap_mentioned_p (recog_data
.operand
[i
],
41331 if (i
< recog_data
.n_operands
)
41335 fprintf (dump_file
,
41336 "attempting to fix modrm byte in insn %d:"
41337 " reg %d class %s", INSN_UID (insn
), REGNO (oldreg
),
41338 reg_class_names
[alt
[opno1
].cl
]);
41340 HARD_REG_SET unavailable
;
41341 REG_SET_TO_HARD_REG_SET (unavailable
, &live
);
41342 SET_HARD_REG_BIT (unavailable
, REGNO (oldreg
));
41343 IOR_COMPL_HARD_REG_SET (unavailable
, call_used_reg_set
);
41344 IOR_HARD_REG_SET (unavailable
, fixed_reg_set
);
41345 IOR_HARD_REG_SET (unavailable
, output_risky
);
41346 IOR_COMPL_HARD_REG_SET (unavailable
,
41347 reg_class_contents
[alt
[opno1
].cl
]);
41349 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
41350 if (!TEST_HARD_REG_BIT (unavailable
, i
))
41352 if (i
== FIRST_PSEUDO_REGISTER
)
41355 fprintf (dump_file
, ", none available\n");
41359 fprintf (dump_file
, " -> %d\n", i
);
41360 rtx newreg
= gen_rtx_REG (recog_data
.operand_mode
[opno1
], i
);
41361 validate_change (insn
, recog_data
.operand_loc
[opno1
], newreg
, false);
41362 insn
= emit_insn_before (gen_move_insn (newreg
, oldreg
), insn
);
41367 /* Implement machine specific optimizations. We implement padding of returns
41368 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
41372 /* We are freeing block_for_insn in the toplev to keep compatibility
41373 with old MDEP_REORGS that are not CFG based. Recompute it now. */
41374 compute_bb_for_insn ();
41376 if (flag_mitigate_rop
)
41377 ix86_mitigate_rop ();
41379 if (TARGET_SEH
&& current_function_has_exception_handlers ())
41380 ix86_seh_fixup_eh_fallthru ();
41382 if (optimize
&& optimize_function_for_speed_p (cfun
))
41384 if (TARGET_PAD_SHORT_FUNCTION
)
41385 ix86_pad_short_function ();
41386 else if (TARGET_PAD_RETURNS
)
41387 ix86_pad_returns ();
41388 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
41389 if (TARGET_FOUR_JUMP_LIMIT
)
41390 ix86_avoid_jump_mispredicts ();
41395 /* Return nonzero when QImode register that must be represented via REX prefix
41398 x86_extended_QIreg_mentioned_p (rtx_insn
*insn
)
41401 extract_insn_cached (insn
);
41402 for (i
= 0; i
< recog_data
.n_operands
; i
++)
41403 if (GENERAL_REG_P (recog_data
.operand
[i
])
41404 && !QI_REGNO_P (REGNO (recog_data
.operand
[i
])))
41409 /* Return true when INSN mentions register that must be encoded using REX
41412 x86_extended_reg_mentioned_p (rtx insn
)
41414 subrtx_iterator::array_type array
;
41415 FOR_EACH_SUBRTX (iter
, array
, INSN_P (insn
) ? PATTERN (insn
) : insn
, NONCONST
)
41417 const_rtx x
= *iter
;
41419 && (REX_INT_REGNO_P (REGNO (x
)) || REX_SSE_REGNO_P (REGNO (x
))))
41425 /* If profitable, negate (without causing overflow) integer constant
41426 of mode MODE at location LOC. Return true in this case. */
41428 x86_maybe_negate_const_int (rtx
*loc
, machine_mode mode
)
41432 if (!CONST_INT_P (*loc
))
41438 /* DImode x86_64 constants must fit in 32 bits. */
41439 gcc_assert (x86_64_immediate_operand (*loc
, mode
));
41450 gcc_unreachable ();
41453 /* Avoid overflows. */
41454 if (mode_signbit_p (mode
, *loc
))
41457 val
= INTVAL (*loc
);
41459 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
41460 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
41461 if ((val
< 0 && val
!= -128)
41464 *loc
= GEN_INT (-val
);
41471 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
41472 optabs would emit if we didn't have TFmode patterns. */
41475 x86_emit_floatuns (rtx operands
[2])
41477 rtx_code_label
*neglab
, *donelab
;
41478 rtx i0
, i1
, f0
, in
, out
;
41479 machine_mode mode
, inmode
;
41481 inmode
= GET_MODE (operands
[1]);
41482 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
41485 in
= force_reg (inmode
, operands
[1]);
41486 mode
= GET_MODE (out
);
41487 neglab
= gen_label_rtx ();
41488 donelab
= gen_label_rtx ();
41489 f0
= gen_reg_rtx (mode
);
41491 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
41493 expand_float (out
, in
, 0);
41495 emit_jump_insn (gen_jump (donelab
));
41498 emit_label (neglab
);
41500 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
41502 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
41504 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
41506 expand_float (f0
, i0
, 0);
41508 emit_insn (gen_rtx_SET (out
, gen_rtx_PLUS (mode
, f0
, f0
)));
41510 emit_label (donelab
);
41513 static bool canonicalize_perm (struct expand_vec_perm_d
*d
);
41514 static bool expand_vec_perm_1 (struct expand_vec_perm_d
*d
);
41515 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
);
41516 static bool expand_vec_perm_palignr (struct expand_vec_perm_d
*d
, bool);
41518 /* Get a vector mode of the same size as the original but with elements
41519 twice as wide. This is only guaranteed to apply to integral vectors. */
41521 static inline machine_mode
41522 get_mode_wider_vector (machine_mode o
)
41524 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
41525 machine_mode n
= GET_MODE_WIDER_MODE (o
).require ();
41526 gcc_assert (GET_MODE_NUNITS (o
) == GET_MODE_NUNITS (n
) * 2);
41527 gcc_assert (GET_MODE_SIZE (o
) == GET_MODE_SIZE (n
));
41531 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
41532 fill target with val via vec_duplicate. */
41535 ix86_vector_duplicate_value (machine_mode mode
, rtx target
, rtx val
)
41541 /* First attempt to recognize VAL as-is. */
41542 dup
= gen_vec_duplicate (mode
, val
);
41543 insn
= emit_insn (gen_rtx_SET (target
, dup
));
41544 if (recog_memoized (insn
) < 0)
41547 machine_mode innermode
= GET_MODE_INNER (mode
);
41550 /* If that fails, force VAL into a register. */
41553 reg
= force_reg (innermode
, val
);
41554 if (GET_MODE (reg
) != innermode
)
41555 reg
= gen_lowpart (innermode
, reg
);
41556 SET_SRC (PATTERN (insn
)) = gen_vec_duplicate (mode
, reg
);
41557 seq
= get_insns ();
41560 emit_insn_before (seq
, insn
);
41562 ok
= recog_memoized (insn
) >= 0;
41568 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
41569 with all elements equal to VAR. Return true if successful. */
41572 ix86_expand_vector_init_duplicate (bool mmx_ok
, machine_mode mode
,
41573 rtx target
, rtx val
)
41597 return ix86_vector_duplicate_value (mode
, target
, val
);
41602 if (TARGET_SSE
|| TARGET_3DNOW_A
)
41606 val
= gen_lowpart (SImode
, val
);
41607 x
= gen_rtx_TRUNCATE (HImode
, val
);
41608 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
41609 emit_insn (gen_rtx_SET (target
, x
));
41621 return ix86_vector_duplicate_value (mode
, target
, val
);
41625 struct expand_vec_perm_d dperm
;
41629 memset (&dperm
, 0, sizeof (dperm
));
41630 dperm
.target
= target
;
41631 dperm
.vmode
= mode
;
41632 dperm
.nelt
= GET_MODE_NUNITS (mode
);
41633 dperm
.op0
= dperm
.op1
= gen_reg_rtx (mode
);
41634 dperm
.one_operand_p
= true;
41636 /* Extend to SImode using a paradoxical SUBREG. */
41637 tmp1
= gen_reg_rtx (SImode
);
41638 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
41640 /* Insert the SImode value as low element of a V4SImode vector. */
41641 tmp2
= gen_reg_rtx (V4SImode
);
41642 emit_insn (gen_vec_setv4si_0 (tmp2
, CONST0_RTX (V4SImode
), tmp1
));
41643 emit_move_insn (dperm
.op0
, gen_lowpart (mode
, tmp2
));
41645 ok
= (expand_vec_perm_1 (&dperm
)
41646 || expand_vec_perm_broadcast_1 (&dperm
));
41654 return ix86_vector_duplicate_value (mode
, target
, val
);
41661 /* Replicate the value once into the next wider mode and recurse. */
41663 machine_mode smode
, wsmode
, wvmode
;
41666 smode
= GET_MODE_INNER (mode
);
41667 wvmode
= get_mode_wider_vector (mode
);
41668 wsmode
= GET_MODE_INNER (wvmode
);
41670 val
= convert_modes (wsmode
, smode
, val
, true);
41671 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
41672 GEN_INT (GET_MODE_BITSIZE (smode
)),
41673 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
41674 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
41676 x
= gen_reg_rtx (wvmode
);
41677 ok
= ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
);
41679 emit_move_insn (target
, gen_lowpart (GET_MODE (target
), x
));
41686 return ix86_vector_duplicate_value (mode
, target
, val
);
41689 machine_mode hvmode
= (mode
== V16HImode
? V8HImode
: V16QImode
);
41690 rtx x
= gen_reg_rtx (hvmode
);
41692 ok
= ix86_expand_vector_init_duplicate (false, hvmode
, x
, val
);
41695 x
= gen_rtx_VEC_CONCAT (mode
, x
, x
);
41696 emit_insn (gen_rtx_SET (target
, x
));
41702 if (TARGET_AVX512BW
)
41703 return ix86_vector_duplicate_value (mode
, target
, val
);
41706 machine_mode hvmode
= (mode
== V32HImode
? V16HImode
: V32QImode
);
41707 rtx x
= gen_reg_rtx (hvmode
);
41709 ok
= ix86_expand_vector_init_duplicate (false, hvmode
, x
, val
);
41712 x
= gen_rtx_VEC_CONCAT (mode
, x
, x
);
41713 emit_insn (gen_rtx_SET (target
, x
));
41722 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
41723 whose ONE_VAR element is VAR, and other elements are zero. Return true
41727 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, machine_mode mode
,
41728 rtx target
, rtx var
, int one_var
)
41730 machine_mode vsimode
;
41733 bool use_vector_set
= false;
41738 /* For SSE4.1, we normally use vector set. But if the second
41739 element is zero and inter-unit moves are OK, we use movq
41741 use_vector_set
= (TARGET_64BIT
&& TARGET_SSE4_1
41742 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
41748 use_vector_set
= TARGET_SSE4_1
;
41751 use_vector_set
= TARGET_SSE2
;
41754 use_vector_set
= TARGET_SSE
|| TARGET_3DNOW_A
;
41761 use_vector_set
= TARGET_AVX
;
41764 /* Use ix86_expand_vector_set in 64bit mode only. */
41765 use_vector_set
= TARGET_AVX
&& TARGET_64BIT
;
41771 if (use_vector_set
)
41773 emit_insn (gen_rtx_SET (target
, CONST0_RTX (mode
)));
41774 var
= force_reg (GET_MODE_INNER (mode
), var
);
41775 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
41791 var
= force_reg (GET_MODE_INNER (mode
), var
);
41792 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
41793 emit_insn (gen_rtx_SET (target
, x
));
41798 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
41799 new_target
= gen_reg_rtx (mode
);
41801 new_target
= target
;
41802 var
= force_reg (GET_MODE_INNER (mode
), var
);
41803 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
41804 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
41805 emit_insn (gen_rtx_SET (new_target
, x
));
41808 /* We need to shuffle the value to the correct position, so
41809 create a new pseudo to store the intermediate result. */
41811 /* With SSE2, we can use the integer shuffle insns. */
41812 if (mode
!= V4SFmode
&& TARGET_SSE2
)
41814 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
41816 GEN_INT (one_var
== 1 ? 0 : 1),
41817 GEN_INT (one_var
== 2 ? 0 : 1),
41818 GEN_INT (one_var
== 3 ? 0 : 1)));
41819 if (target
!= new_target
)
41820 emit_move_insn (target
, new_target
);
41824 /* Otherwise convert the intermediate result to V4SFmode and
41825 use the SSE1 shuffle instructions. */
41826 if (mode
!= V4SFmode
)
41828 tmp
= gen_reg_rtx (V4SFmode
);
41829 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
41834 emit_insn (gen_sse_shufps_v4sf (tmp
, tmp
, tmp
,
41836 GEN_INT (one_var
== 1 ? 0 : 1),
41837 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
41838 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
41840 if (mode
!= V4SFmode
)
41841 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
41842 else if (tmp
!= target
)
41843 emit_move_insn (target
, tmp
);
41845 else if (target
!= new_target
)
41846 emit_move_insn (target
, new_target
);
41851 vsimode
= V4SImode
;
41857 vsimode
= V2SImode
;
41863 /* Zero extend the variable element to SImode and recurse. */
41864 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
41866 x
= gen_reg_rtx (vsimode
);
41867 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
41869 gcc_unreachable ();
41871 emit_move_insn (target
, gen_lowpart (mode
, x
));
41879 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
41880 consisting of the values in VALS. It is known that all elements
41881 except ONE_VAR are constants. Return true if successful. */
41884 ix86_expand_vector_init_one_var (bool mmx_ok
, machine_mode mode
,
41885 rtx target
, rtx vals
, int one_var
)
41887 rtx var
= XVECEXP (vals
, 0, one_var
);
41888 machine_mode wmode
;
41891 const_vec
= copy_rtx (vals
);
41892 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
41893 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
41901 /* For the two element vectors, it's just as easy to use
41902 the general case. */
41906 /* Use ix86_expand_vector_set in 64bit mode only. */
41930 /* There's no way to set one QImode entry easily. Combine
41931 the variable value with its adjacent constant value, and
41932 promote to an HImode set. */
41933 x
= XVECEXP (vals
, 0, one_var
^ 1);
41936 var
= convert_modes (HImode
, QImode
, var
, true);
41937 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
41938 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
41939 x
= GEN_INT (INTVAL (x
) & 0xff);
41943 var
= convert_modes (HImode
, QImode
, var
, true);
41944 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
41946 if (x
!= const0_rtx
)
41947 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
41948 1, OPTAB_LIB_WIDEN
);
41950 x
= gen_reg_rtx (wmode
);
41951 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
41952 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
41954 emit_move_insn (target
, gen_lowpart (mode
, x
));
41961 emit_move_insn (target
, const_vec
);
41962 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
41966 /* A subroutine of ix86_expand_vector_init_general. Use vector
41967 concatenate to handle the most general case: all values variable,
41968 and none identical. */
41971 ix86_expand_vector_init_concat (machine_mode mode
,
41972 rtx target
, rtx
*ops
, int n
)
41974 machine_mode cmode
, hmode
= VOIDmode
, gmode
= VOIDmode
;
41975 rtx first
[16], second
[8], third
[4];
42027 gcc_unreachable ();
42030 if (!register_operand (ops
[1], cmode
))
42031 ops
[1] = force_reg (cmode
, ops
[1]);
42032 if (!register_operand (ops
[0], cmode
))
42033 ops
[0] = force_reg (cmode
, ops
[0]);
42034 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_CONCAT (mode
, ops
[0],
42054 gcc_unreachable ();
42078 gcc_unreachable ();
42096 gcc_unreachable ();
42101 /* FIXME: We process inputs backward to help RA. PR 36222. */
42104 for (; i
> 0; i
-= 2, j
--)
42106 first
[j
] = gen_reg_rtx (cmode
);
42107 v
= gen_rtvec (2, ops
[i
- 1], ops
[i
]);
42108 ix86_expand_vector_init (false, first
[j
],
42109 gen_rtx_PARALLEL (cmode
, v
));
42115 gcc_assert (hmode
!= VOIDmode
);
42116 gcc_assert (gmode
!= VOIDmode
);
42117 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
42119 second
[j
] = gen_reg_rtx (hmode
);
42120 ix86_expand_vector_init_concat (hmode
, second
[j
],
42124 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
42126 third
[j
] = gen_reg_rtx (gmode
);
42127 ix86_expand_vector_init_concat (gmode
, third
[j
],
42131 ix86_expand_vector_init_concat (mode
, target
, third
, n
);
42135 gcc_assert (hmode
!= VOIDmode
);
42136 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
42138 second
[j
] = gen_reg_rtx (hmode
);
42139 ix86_expand_vector_init_concat (hmode
, second
[j
],
42143 ix86_expand_vector_init_concat (mode
, target
, second
, n
);
42146 ix86_expand_vector_init_concat (mode
, target
, first
, n
);
42150 gcc_unreachable ();
42154 /* A subroutine of ix86_expand_vector_init_general. Use vector
42155 interleave to handle the most general case: all values variable,
42156 and none identical. */
42159 ix86_expand_vector_init_interleave (machine_mode mode
,
42160 rtx target
, rtx
*ops
, int n
)
42162 machine_mode first_imode
, second_imode
, third_imode
, inner_mode
;
42165 rtx (*gen_load_even
) (rtx
, rtx
, rtx
);
42166 rtx (*gen_interleave_first_low
) (rtx
, rtx
, rtx
);
42167 rtx (*gen_interleave_second_low
) (rtx
, rtx
, rtx
);
42172 gen_load_even
= gen_vec_setv8hi
;
42173 gen_interleave_first_low
= gen_vec_interleave_lowv4si
;
42174 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
42175 inner_mode
= HImode
;
42176 first_imode
= V4SImode
;
42177 second_imode
= V2DImode
;
42178 third_imode
= VOIDmode
;
42181 gen_load_even
= gen_vec_setv16qi
;
42182 gen_interleave_first_low
= gen_vec_interleave_lowv8hi
;
42183 gen_interleave_second_low
= gen_vec_interleave_lowv4si
;
42184 inner_mode
= QImode
;
42185 first_imode
= V8HImode
;
42186 second_imode
= V4SImode
;
42187 third_imode
= V2DImode
;
42190 gcc_unreachable ();
42193 for (i
= 0; i
< n
; i
++)
42195 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
42196 op0
= gen_reg_rtx (SImode
);
42197 emit_move_insn (op0
, gen_lowpart (SImode
, ops
[i
+ i
]));
42199 /* Insert the SImode value as low element of V4SImode vector. */
42200 op1
= gen_reg_rtx (V4SImode
);
42201 op0
= gen_rtx_VEC_MERGE (V4SImode
,
42202 gen_rtx_VEC_DUPLICATE (V4SImode
,
42204 CONST0_RTX (V4SImode
),
42206 emit_insn (gen_rtx_SET (op1
, op0
));
42208 /* Cast the V4SImode vector back to a vector in orignal mode. */
42209 op0
= gen_reg_rtx (mode
);
42210 emit_move_insn (op0
, gen_lowpart (mode
, op1
));
42212 /* Load even elements into the second position. */
42213 emit_insn (gen_load_even (op0
,
42214 force_reg (inner_mode
,
42218 /* Cast vector to FIRST_IMODE vector. */
42219 ops
[i
] = gen_reg_rtx (first_imode
);
42220 emit_move_insn (ops
[i
], gen_lowpart (first_imode
, op0
));
42223 /* Interleave low FIRST_IMODE vectors. */
42224 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
42226 op0
= gen_reg_rtx (first_imode
);
42227 emit_insn (gen_interleave_first_low (op0
, ops
[i
], ops
[i
+ 1]));
42229 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
42230 ops
[j
] = gen_reg_rtx (second_imode
);
42231 emit_move_insn (ops
[j
], gen_lowpart (second_imode
, op0
));
42234 /* Interleave low SECOND_IMODE vectors. */
42235 switch (second_imode
)
42238 for (i
= j
= 0; i
< n
/ 2; i
+= 2, j
++)
42240 op0
= gen_reg_rtx (second_imode
);
42241 emit_insn (gen_interleave_second_low (op0
, ops
[i
],
42244 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
42246 ops
[j
] = gen_reg_rtx (third_imode
);
42247 emit_move_insn (ops
[j
], gen_lowpart (third_imode
, op0
));
42249 second_imode
= V2DImode
;
42250 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
42254 op0
= gen_reg_rtx (second_imode
);
42255 emit_insn (gen_interleave_second_low (op0
, ops
[0],
42258 /* Cast the SECOND_IMODE vector back to a vector on original
42260 emit_insn (gen_rtx_SET (target
, gen_lowpart (mode
, op0
)));
42264 gcc_unreachable ();
42268 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
42269 all values variable, and none identical. */
42272 ix86_expand_vector_init_general (bool mmx_ok
, machine_mode mode
,
42273 rtx target
, rtx vals
)
42275 rtx ops
[64], op0
, op1
, op2
, op3
, op4
, op5
;
42276 machine_mode half_mode
= VOIDmode
;
42277 machine_mode quarter_mode
= VOIDmode
;
42284 if (!mmx_ok
&& !TARGET_SSE
)
42300 n
= GET_MODE_NUNITS (mode
);
42301 for (i
= 0; i
< n
; i
++)
42302 ops
[i
] = XVECEXP (vals
, 0, i
);
42303 ix86_expand_vector_init_concat (mode
, target
, ops
, n
);
42307 for (i
= 0; i
< 2; i
++)
42308 ops
[i
] = gen_lowpart (V2DImode
, XVECEXP (vals
, 0, i
));
42309 op0
= gen_reg_rtx (V4DImode
);
42310 ix86_expand_vector_init_concat (V4DImode
, op0
, ops
, 2);
42311 emit_move_insn (target
, gen_lowpart (GET_MODE (target
), op0
));
42315 for (i
= 0; i
< 4; i
++)
42316 ops
[i
] = gen_lowpart (V2DImode
, XVECEXP (vals
, 0, i
));
42317 ops
[4] = gen_reg_rtx (V4DImode
);
42318 ix86_expand_vector_init_concat (V4DImode
, ops
[4], ops
, 2);
42319 ops
[5] = gen_reg_rtx (V4DImode
);
42320 ix86_expand_vector_init_concat (V4DImode
, ops
[5], ops
+ 2, 2);
42321 op0
= gen_reg_rtx (V8DImode
);
42322 ix86_expand_vector_init_concat (V8DImode
, op0
, ops
+ 4, 2);
42323 emit_move_insn (target
, gen_lowpart (GET_MODE (target
), op0
));
42327 half_mode
= V16QImode
;
42331 half_mode
= V8HImode
;
42335 n
= GET_MODE_NUNITS (mode
);
42336 for (i
= 0; i
< n
; i
++)
42337 ops
[i
] = XVECEXP (vals
, 0, i
);
42338 op0
= gen_reg_rtx (half_mode
);
42339 op1
= gen_reg_rtx (half_mode
);
42340 ix86_expand_vector_init_interleave (half_mode
, op0
, ops
,
42342 ix86_expand_vector_init_interleave (half_mode
, op1
,
42343 &ops
[n
>> 1], n
>> 2);
42344 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
42348 quarter_mode
= V16QImode
;
42349 half_mode
= V32QImode
;
42353 quarter_mode
= V8HImode
;
42354 half_mode
= V16HImode
;
42358 n
= GET_MODE_NUNITS (mode
);
42359 for (i
= 0; i
< n
; i
++)
42360 ops
[i
] = XVECEXP (vals
, 0, i
);
42361 op0
= gen_reg_rtx (quarter_mode
);
42362 op1
= gen_reg_rtx (quarter_mode
);
42363 op2
= gen_reg_rtx (quarter_mode
);
42364 op3
= gen_reg_rtx (quarter_mode
);
42365 op4
= gen_reg_rtx (half_mode
);
42366 op5
= gen_reg_rtx (half_mode
);
42367 ix86_expand_vector_init_interleave (quarter_mode
, op0
, ops
,
42369 ix86_expand_vector_init_interleave (quarter_mode
, op1
,
42370 &ops
[n
>> 2], n
>> 3);
42371 ix86_expand_vector_init_interleave (quarter_mode
, op2
,
42372 &ops
[n
>> 1], n
>> 3);
42373 ix86_expand_vector_init_interleave (quarter_mode
, op3
,
42374 &ops
[(n
>> 1) | (n
>> 2)], n
>> 3);
42375 emit_insn (gen_rtx_SET (op4
, gen_rtx_VEC_CONCAT (half_mode
, op0
, op1
)));
42376 emit_insn (gen_rtx_SET (op5
, gen_rtx_VEC_CONCAT (half_mode
, op2
, op3
)));
42377 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_CONCAT (mode
, op4
, op5
)));
42381 if (!TARGET_SSE4_1
)
42389 /* Don't use ix86_expand_vector_init_interleave if we can't
42390 move from GPR to SSE register directly. */
42391 if (!TARGET_INTER_UNIT_MOVES_TO_VEC
)
42394 n
= GET_MODE_NUNITS (mode
);
42395 for (i
= 0; i
< n
; i
++)
42396 ops
[i
] = XVECEXP (vals
, 0, i
);
42397 ix86_expand_vector_init_interleave (mode
, target
, ops
, n
>> 1);
42405 gcc_unreachable ();
42409 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
42410 machine_mode inner_mode
;
42411 rtx words
[4], shift
;
42413 inner_mode
= GET_MODE_INNER (mode
);
42414 n_elts
= GET_MODE_NUNITS (mode
);
42415 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
42416 n_elt_per_word
= n_elts
/ n_words
;
42417 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
42419 for (i
= 0; i
< n_words
; ++i
)
42421 rtx word
= NULL_RTX
;
42423 for (j
= 0; j
< n_elt_per_word
; ++j
)
42425 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
42426 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
42432 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
42433 word
, 1, OPTAB_LIB_WIDEN
);
42434 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
42435 word
, 1, OPTAB_LIB_WIDEN
);
42443 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
42444 else if (n_words
== 2)
42446 rtx tmp
= gen_reg_rtx (mode
);
42447 emit_clobber (tmp
);
42448 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
42449 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
42450 emit_move_insn (target
, tmp
);
42452 else if (n_words
== 4)
42454 rtx tmp
= gen_reg_rtx (V4SImode
);
42455 gcc_assert (word_mode
== SImode
);
42456 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
42457 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
42458 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
42461 gcc_unreachable ();
42465 /* Initialize vector TARGET via VALS. Suppress the use of MMX
42466 instructions unless MMX_OK is true. */
42469 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
42471 machine_mode mode
= GET_MODE (target
);
42472 machine_mode inner_mode
= GET_MODE_INNER (mode
);
42473 int n_elts
= GET_MODE_NUNITS (mode
);
42474 int n_var
= 0, one_var
= -1;
42475 bool all_same
= true, all_const_zero
= true;
42479 /* Handle first initialization from vector elts. */
42480 if (n_elts
!= XVECLEN (vals
, 0))
42482 rtx subtarget
= target
;
42483 x
= XVECEXP (vals
, 0, 0);
42484 gcc_assert (GET_MODE_INNER (GET_MODE (x
)) == inner_mode
);
42485 if (GET_MODE_NUNITS (GET_MODE (x
)) * 2 == n_elts
)
42487 rtx ops
[2] = { XVECEXP (vals
, 0, 0), XVECEXP (vals
, 0, 1) };
42488 if (inner_mode
== QImode
|| inner_mode
== HImode
)
42490 unsigned int n_bits
= n_elts
* GET_MODE_SIZE (inner_mode
);
42491 mode
= mode_for_vector (SImode
, n_bits
/ 4).require ();
42492 inner_mode
= mode_for_vector (SImode
, n_bits
/ 8).require ();
42493 ops
[0] = gen_lowpart (inner_mode
, ops
[0]);
42494 ops
[1] = gen_lowpart (inner_mode
, ops
[1]);
42495 subtarget
= gen_reg_rtx (mode
);
42497 ix86_expand_vector_init_concat (mode
, subtarget
, ops
, 2);
42498 if (subtarget
!= target
)
42499 emit_move_insn (target
, gen_lowpart (GET_MODE (target
), subtarget
));
42502 gcc_unreachable ();
42505 for (i
= 0; i
< n_elts
; ++i
)
42507 x
= XVECEXP (vals
, 0, i
);
42508 if (!(CONST_SCALAR_INT_P (x
)
42509 || CONST_DOUBLE_P (x
)
42510 || CONST_FIXED_P (x
)))
42511 n_var
++, one_var
= i
;
42512 else if (x
!= CONST0_RTX (inner_mode
))
42513 all_const_zero
= false;
42514 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
42518 /* Constants are best loaded from the constant pool. */
42521 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
42525 /* If all values are identical, broadcast the value. */
42527 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
42528 XVECEXP (vals
, 0, 0)))
42531 /* Values where only one field is non-constant are best loaded from
42532 the pool and overwritten via move later. */
42536 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
42537 XVECEXP (vals
, 0, one_var
),
42541 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
42545 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
42549 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
42551 machine_mode mode
= GET_MODE (target
);
42552 machine_mode inner_mode
= GET_MODE_INNER (mode
);
42553 machine_mode half_mode
;
42554 bool use_vec_merge
= false;
42556 static rtx (*gen_extract
[6][2]) (rtx
, rtx
)
42558 { gen_vec_extract_lo_v32qi
, gen_vec_extract_hi_v32qi
},
42559 { gen_vec_extract_lo_v16hi
, gen_vec_extract_hi_v16hi
},
42560 { gen_vec_extract_lo_v8si
, gen_vec_extract_hi_v8si
},
42561 { gen_vec_extract_lo_v4di
, gen_vec_extract_hi_v4di
},
42562 { gen_vec_extract_lo_v8sf
, gen_vec_extract_hi_v8sf
},
42563 { gen_vec_extract_lo_v4df
, gen_vec_extract_hi_v4df
}
42565 static rtx (*gen_insert
[6][2]) (rtx
, rtx
, rtx
)
42567 { gen_vec_set_lo_v32qi
, gen_vec_set_hi_v32qi
},
42568 { gen_vec_set_lo_v16hi
, gen_vec_set_hi_v16hi
},
42569 { gen_vec_set_lo_v8si
, gen_vec_set_hi_v8si
},
42570 { gen_vec_set_lo_v4di
, gen_vec_set_hi_v4di
},
42571 { gen_vec_set_lo_v8sf
, gen_vec_set_hi_v8sf
},
42572 { gen_vec_set_lo_v4df
, gen_vec_set_hi_v4df
}
42575 machine_mode mmode
= VOIDmode
;
42576 rtx (*gen_blendm
) (rtx
, rtx
, rtx
, rtx
);
42584 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
42585 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
42587 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
42589 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
42590 emit_insn (gen_rtx_SET (target
, tmp
));
42596 use_vec_merge
= TARGET_SSE4_1
&& TARGET_64BIT
;
42600 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
42601 ix86_expand_vector_extract (false, tmp
, target
, 1 - elt
);
42603 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
42605 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
42606 emit_insn (gen_rtx_SET (target
, tmp
));
42613 /* For the two element vectors, we implement a VEC_CONCAT with
42614 the extraction of the other element. */
42616 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
42617 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
42620 op0
= val
, op1
= tmp
;
42622 op0
= tmp
, op1
= val
;
42624 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
42625 emit_insn (gen_rtx_SET (target
, tmp
));
42630 use_vec_merge
= TARGET_SSE4_1
;
42637 use_vec_merge
= true;
42641 /* tmp = target = A B C D */
42642 tmp
= copy_to_reg (target
);
42643 /* target = A A B B */
42644 emit_insn (gen_vec_interleave_lowv4sf (target
, target
, target
));
42645 /* target = X A B B */
42646 ix86_expand_vector_set (false, target
, val
, 0);
42647 /* target = A X C D */
42648 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
42649 const1_rtx
, const0_rtx
,
42650 GEN_INT (2+4), GEN_INT (3+4)));
42654 /* tmp = target = A B C D */
42655 tmp
= copy_to_reg (target
);
42656 /* tmp = X B C D */
42657 ix86_expand_vector_set (false, tmp
, val
, 0);
42658 /* target = A B X D */
42659 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
42660 const0_rtx
, const1_rtx
,
42661 GEN_INT (0+4), GEN_INT (3+4)));
42665 /* tmp = target = A B C D */
42666 tmp
= copy_to_reg (target
);
42667 /* tmp = X B C D */
42668 ix86_expand_vector_set (false, tmp
, val
, 0);
42669 /* target = A B X D */
42670 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
42671 const0_rtx
, const1_rtx
,
42672 GEN_INT (2+4), GEN_INT (0+4)));
42676 gcc_unreachable ();
42681 use_vec_merge
= TARGET_SSE4_1
;
42685 /* Element 0 handled by vec_merge below. */
42688 use_vec_merge
= true;
42694 /* With SSE2, use integer shuffles to swap element 0 and ELT,
42695 store into element 0, then shuffle them back. */
42699 order
[0] = GEN_INT (elt
);
42700 order
[1] = const1_rtx
;
42701 order
[2] = const2_rtx
;
42702 order
[3] = GEN_INT (3);
42703 order
[elt
] = const0_rtx
;
42705 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
42706 order
[1], order
[2], order
[3]));
42708 ix86_expand_vector_set (false, target
, val
, 0);
42710 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
42711 order
[1], order
[2], order
[3]));
42715 /* For SSE1, we have to reuse the V4SF code. */
42716 rtx t
= gen_reg_rtx (V4SFmode
);
42717 emit_move_insn (t
, gen_lowpart (V4SFmode
, target
));
42718 ix86_expand_vector_set (false, t
, gen_lowpart (SFmode
, val
), elt
);
42719 emit_move_insn (target
, gen_lowpart (mode
, t
));
42724 use_vec_merge
= TARGET_SSE2
;
42727 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
42731 use_vec_merge
= TARGET_SSE4_1
;
42738 half_mode
= V16QImode
;
42744 half_mode
= V8HImode
;
42750 half_mode
= V4SImode
;
42756 half_mode
= V2DImode
;
42762 half_mode
= V4SFmode
;
42768 half_mode
= V2DFmode
;
42774 /* Compute offset. */
42778 gcc_assert (i
<= 1);
42780 /* Extract the half. */
42781 tmp
= gen_reg_rtx (half_mode
);
42782 emit_insn (gen_extract
[j
][i
] (tmp
, target
));
42784 /* Put val in tmp at elt. */
42785 ix86_expand_vector_set (false, tmp
, val
, elt
);
42788 emit_insn (gen_insert
[j
][i
] (target
, target
, tmp
));
42792 if (TARGET_AVX512F
)
42795 gen_blendm
= gen_avx512f_blendmv8df
;
42800 if (TARGET_AVX512F
)
42803 gen_blendm
= gen_avx512f_blendmv8di
;
42808 if (TARGET_AVX512F
)
42811 gen_blendm
= gen_avx512f_blendmv16sf
;
42816 if (TARGET_AVX512F
)
42819 gen_blendm
= gen_avx512f_blendmv16si
;
42824 if (TARGET_AVX512F
&& TARGET_AVX512BW
)
42827 gen_blendm
= gen_avx512bw_blendmv32hi
;
42832 if (TARGET_AVX512F
&& TARGET_AVX512BW
)
42835 gen_blendm
= gen_avx512bw_blendmv64qi
;
42843 if (mmode
!= VOIDmode
)
42845 tmp
= gen_reg_rtx (mode
);
42846 emit_insn (gen_rtx_SET (tmp
, gen_rtx_VEC_DUPLICATE (mode
, val
)));
42847 /* The avx512*_blendm<mode> expanders have different operand order
42848 from VEC_MERGE. In VEC_MERGE, the first input operand is used for
42849 elements where the mask is set and second input operand otherwise,
42850 in {sse,avx}*_*blend* the first input operand is used for elements
42851 where the mask is clear and second input operand otherwise. */
42852 emit_insn (gen_blendm (target
, target
, tmp
,
42854 gen_int_mode (1 << elt
, mmode
))));
42856 else if (use_vec_merge
)
42858 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
42859 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
42860 emit_insn (gen_rtx_SET (target
, tmp
));
42864 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
42866 emit_move_insn (mem
, target
);
42868 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
42869 emit_move_insn (tmp
, val
);
42871 emit_move_insn (target
, mem
);
42876 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
42878 machine_mode mode
= GET_MODE (vec
);
42879 machine_mode inner_mode
= GET_MODE_INNER (mode
);
42880 bool use_vec_extr
= false;
42895 use_vec_extr
= true;
42899 use_vec_extr
= TARGET_SSE4_1
;
42911 tmp
= gen_reg_rtx (mode
);
42912 emit_insn (gen_sse_shufps_v4sf (tmp
, vec
, vec
,
42913 GEN_INT (elt
), GEN_INT (elt
),
42914 GEN_INT (elt
+4), GEN_INT (elt
+4)));
42918 tmp
= gen_reg_rtx (mode
);
42919 emit_insn (gen_vec_interleave_highv4sf (tmp
, vec
, vec
));
42923 gcc_unreachable ();
42926 use_vec_extr
= true;
42931 use_vec_extr
= TARGET_SSE4_1
;
42945 tmp
= gen_reg_rtx (mode
);
42946 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
42947 GEN_INT (elt
), GEN_INT (elt
),
42948 GEN_INT (elt
), GEN_INT (elt
)));
42952 tmp
= gen_reg_rtx (mode
);
42953 emit_insn (gen_vec_interleave_highv4si (tmp
, vec
, vec
));
42957 gcc_unreachable ();
42960 use_vec_extr
= true;
42965 /* For SSE1, we have to reuse the V4SF code. */
42966 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
42967 gen_lowpart (V4SFmode
, vec
), elt
);
42973 use_vec_extr
= TARGET_SSE2
;
42976 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
42980 use_vec_extr
= TARGET_SSE4_1
;
42986 tmp
= gen_reg_rtx (V4SFmode
);
42988 emit_insn (gen_vec_extract_lo_v8sf (tmp
, vec
));
42990 emit_insn (gen_vec_extract_hi_v8sf (tmp
, vec
));
42991 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
42999 tmp
= gen_reg_rtx (V2DFmode
);
43001 emit_insn (gen_vec_extract_lo_v4df (tmp
, vec
));
43003 emit_insn (gen_vec_extract_hi_v4df (tmp
, vec
));
43004 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
43012 tmp
= gen_reg_rtx (V16QImode
);
43014 emit_insn (gen_vec_extract_lo_v32qi (tmp
, vec
));
43016 emit_insn (gen_vec_extract_hi_v32qi (tmp
, vec
));
43017 ix86_expand_vector_extract (false, target
, tmp
, elt
& 15);
43025 tmp
= gen_reg_rtx (V8HImode
);
43027 emit_insn (gen_vec_extract_lo_v16hi (tmp
, vec
));
43029 emit_insn (gen_vec_extract_hi_v16hi (tmp
, vec
));
43030 ix86_expand_vector_extract (false, target
, tmp
, elt
& 7);
43038 tmp
= gen_reg_rtx (V4SImode
);
43040 emit_insn (gen_vec_extract_lo_v8si (tmp
, vec
));
43042 emit_insn (gen_vec_extract_hi_v8si (tmp
, vec
));
43043 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
43051 tmp
= gen_reg_rtx (V2DImode
);
43053 emit_insn (gen_vec_extract_lo_v4di (tmp
, vec
));
43055 emit_insn (gen_vec_extract_hi_v4di (tmp
, vec
));
43056 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
43062 if (TARGET_AVX512BW
)
43064 tmp
= gen_reg_rtx (V16HImode
);
43066 emit_insn (gen_vec_extract_lo_v32hi (tmp
, vec
));
43068 emit_insn (gen_vec_extract_hi_v32hi (tmp
, vec
));
43069 ix86_expand_vector_extract (false, target
, tmp
, elt
& 15);
43075 if (TARGET_AVX512BW
)
43077 tmp
= gen_reg_rtx (V32QImode
);
43079 emit_insn (gen_vec_extract_lo_v64qi (tmp
, vec
));
43081 emit_insn (gen_vec_extract_hi_v64qi (tmp
, vec
));
43082 ix86_expand_vector_extract (false, target
, tmp
, elt
& 31);
43088 tmp
= gen_reg_rtx (V8SFmode
);
43090 emit_insn (gen_vec_extract_lo_v16sf (tmp
, vec
));
43092 emit_insn (gen_vec_extract_hi_v16sf (tmp
, vec
));
43093 ix86_expand_vector_extract (false, target
, tmp
, elt
& 7);
43097 tmp
= gen_reg_rtx (V4DFmode
);
43099 emit_insn (gen_vec_extract_lo_v8df (tmp
, vec
));
43101 emit_insn (gen_vec_extract_hi_v8df (tmp
, vec
));
43102 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
43106 tmp
= gen_reg_rtx (V8SImode
);
43108 emit_insn (gen_vec_extract_lo_v16si (tmp
, vec
));
43110 emit_insn (gen_vec_extract_hi_v16si (tmp
, vec
));
43111 ix86_expand_vector_extract (false, target
, tmp
, elt
& 7);
43115 tmp
= gen_reg_rtx (V4DImode
);
43117 emit_insn (gen_vec_extract_lo_v8di (tmp
, vec
));
43119 emit_insn (gen_vec_extract_hi_v8di (tmp
, vec
));
43120 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
43124 /* ??? Could extract the appropriate HImode element and shift. */
43131 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
43132 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
43134 /* Let the rtl optimizers know about the zero extension performed. */
43135 if (inner_mode
== QImode
|| inner_mode
== HImode
)
43137 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
43138 target
= gen_lowpart (SImode
, target
);
43141 emit_insn (gen_rtx_SET (target
, tmp
));
43145 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
43147 emit_move_insn (mem
, vec
);
43149 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
43150 emit_move_insn (target
, tmp
);
43154 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
43155 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
43156 The upper bits of DEST are undefined, though they shouldn't cause
43157 exceptions (some bits from src or all zeros are ok). */
43160 emit_reduc_half (rtx dest
, rtx src
, int i
)
43163 switch (GET_MODE (src
))
43167 tem
= gen_sse_movhlps (dest
, src
, src
);
43169 tem
= gen_sse_shufps_v4sf (dest
, src
, src
, const1_rtx
, const1_rtx
,
43170 GEN_INT (1 + 4), GEN_INT (1 + 4));
43173 tem
= gen_vec_interleave_highv2df (dest
, src
, src
);
43179 d
= gen_reg_rtx (V1TImode
);
43180 tem
= gen_sse2_lshrv1ti3 (d
, gen_lowpart (V1TImode
, src
),
43185 tem
= gen_avx_vperm2f128v8sf3 (dest
, src
, src
, const1_rtx
);
43187 tem
= gen_avx_shufps256 (dest
, src
, src
,
43188 GEN_INT (i
== 128 ? 2 + (3 << 2) : 1));
43192 tem
= gen_avx_vperm2f128v4df3 (dest
, src
, src
, const1_rtx
);
43194 tem
= gen_avx_shufpd256 (dest
, src
, src
, const1_rtx
);
43202 if (GET_MODE (dest
) != V4DImode
)
43203 d
= gen_reg_rtx (V4DImode
);
43204 tem
= gen_avx2_permv2ti (d
, gen_lowpart (V4DImode
, src
),
43205 gen_lowpart (V4DImode
, src
),
43210 d
= gen_reg_rtx (V2TImode
);
43211 tem
= gen_avx2_lshrv2ti3 (d
, gen_lowpart (V2TImode
, src
),
43222 tem
= gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode
, dest
),
43223 gen_lowpart (V16SImode
, src
),
43224 gen_lowpart (V16SImode
, src
),
43225 GEN_INT (0x4 + (i
== 512 ? 4 : 0)),
43226 GEN_INT (0x5 + (i
== 512 ? 4 : 0)),
43227 GEN_INT (0x6 + (i
== 512 ? 4 : 0)),
43228 GEN_INT (0x7 + (i
== 512 ? 4 : 0)),
43229 GEN_INT (0xC), GEN_INT (0xD),
43230 GEN_INT (0xE), GEN_INT (0xF),
43231 GEN_INT (0x10), GEN_INT (0x11),
43232 GEN_INT (0x12), GEN_INT (0x13),
43233 GEN_INT (0x14), GEN_INT (0x15),
43234 GEN_INT (0x16), GEN_INT (0x17));
43236 tem
= gen_avx512f_pshufd_1 (gen_lowpart (V16SImode
, dest
),
43237 gen_lowpart (V16SImode
, src
),
43238 GEN_INT (i
== 128 ? 0x2 : 0x1),
43242 GEN_INT (i
== 128 ? 0x6 : 0x5),
43246 GEN_INT (i
== 128 ? 0xA : 0x9),
43250 GEN_INT (i
== 128 ? 0xE : 0xD),
43256 gcc_unreachable ();
43260 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), d
));
43263 /* Expand a vector reduction. FN is the binary pattern to reduce;
43264 DEST is the destination; IN is the input vector. */
43267 ix86_expand_reduc (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
43269 rtx half
, dst
, vec
= in
;
43270 machine_mode mode
= GET_MODE (in
);
43273 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
43275 && mode
== V8HImode
43276 && fn
== gen_uminv8hi3
)
43278 emit_insn (gen_sse4_1_phminposuw (dest
, in
));
43282 for (i
= GET_MODE_BITSIZE (mode
);
43283 i
> GET_MODE_UNIT_BITSIZE (mode
);
43286 half
= gen_reg_rtx (mode
);
43287 emit_reduc_half (half
, vec
, i
);
43288 if (i
== GET_MODE_UNIT_BITSIZE (mode
) * 2)
43291 dst
= gen_reg_rtx (mode
);
43292 emit_insn (fn (dst
, half
, vec
));
43297 /* Target hook for scalar_mode_supported_p. */
43299 ix86_scalar_mode_supported_p (scalar_mode mode
)
43301 if (DECIMAL_FLOAT_MODE_P (mode
))
43302 return default_decimal_float_supported_p ();
43303 else if (mode
== TFmode
)
43306 return default_scalar_mode_supported_p (mode
);
43309 /* Implements target hook vector_mode_supported_p. */
43311 ix86_vector_mode_supported_p (machine_mode mode
)
43313 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
43315 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
43317 if (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
43319 if (TARGET_AVX512F
&& VALID_AVX512F_REG_MODE (mode
))
43321 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
43323 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
43328 /* Target hook for c_mode_for_suffix. */
43329 static machine_mode
43330 ix86_c_mode_for_suffix (char suffix
)
43340 /* Worker function for TARGET_MD_ASM_ADJUST.
43342 We implement asm flag outputs, and maintain source compatibility
43343 with the old cc0-based compiler. */
43346 ix86_md_asm_adjust (vec
<rtx
> &outputs
, vec
<rtx
> &/*inputs*/,
43347 vec
<const char *> &constraints
,
43348 vec
<rtx
> &clobbers
, HARD_REG_SET
&clobbered_regs
)
43350 clobbers
.safe_push (gen_rtx_REG (CCFPmode
, FPSR_REG
));
43351 SET_HARD_REG_BIT (clobbered_regs
, FPSR_REG
);
43353 bool saw_asm_flag
= false;
43356 for (unsigned i
= 0, n
= outputs
.length (); i
< n
; ++i
)
43358 const char *con
= constraints
[i
];
43359 if (strncmp (con
, "=@cc", 4) != 0)
43362 if (strchr (con
, ',') != NULL
)
43364 error ("alternatives not allowed in asm flag output");
43368 bool invert
= false;
43370 invert
= true, con
++;
43372 machine_mode mode
= CCmode
;
43373 rtx_code code
= UNKNOWN
;
43379 mode
= CCAmode
, code
= EQ
;
43380 else if (con
[1] == 'e' && con
[2] == 0)
43381 mode
= CCCmode
, code
= NE
;
43385 mode
= CCCmode
, code
= EQ
;
43386 else if (con
[1] == 'e' && con
[2] == 0)
43387 mode
= CCAmode
, code
= NE
;
43391 mode
= CCCmode
, code
= EQ
;
43395 mode
= CCZmode
, code
= EQ
;
43399 mode
= CCGCmode
, code
= GT
;
43400 else if (con
[1] == 'e' && con
[2] == 0)
43401 mode
= CCGCmode
, code
= GE
;
43405 mode
= CCGCmode
, code
= LT
;
43406 else if (con
[1] == 'e' && con
[2] == 0)
43407 mode
= CCGCmode
, code
= LE
;
43411 mode
= CCOmode
, code
= EQ
;
43415 mode
= CCPmode
, code
= EQ
;
43419 mode
= CCSmode
, code
= EQ
;
43423 mode
= CCZmode
, code
= EQ
;
43426 if (code
== UNKNOWN
)
43428 error ("unknown asm flag output %qs", constraints
[i
]);
43432 code
= reverse_condition (code
);
43434 rtx dest
= outputs
[i
];
43437 /* This is the first asm flag output. Here we put the flags
43438 register in as the real output and adjust the condition to
43440 constraints
[i
] = "=Bf";
43441 outputs
[i
] = gen_rtx_REG (CCmode
, FLAGS_REG
);
43442 saw_asm_flag
= true;
43446 /* We don't need the flags register as output twice. */
43447 constraints
[i
] = "=X";
43448 outputs
[i
] = gen_rtx_SCRATCH (SImode
);
43451 rtx x
= gen_rtx_REG (mode
, FLAGS_REG
);
43452 x
= gen_rtx_fmt_ee (code
, QImode
, x
, const0_rtx
);
43454 machine_mode dest_mode
= GET_MODE (dest
);
43455 if (!SCALAR_INT_MODE_P (dest_mode
))
43457 error ("invalid type for asm flag output");
43461 if (dest_mode
== DImode
&& !TARGET_64BIT
)
43462 dest_mode
= SImode
;
43464 if (dest_mode
!= QImode
)
43466 rtx destqi
= gen_reg_rtx (QImode
);
43467 emit_insn (gen_rtx_SET (destqi
, x
));
43469 if (TARGET_ZERO_EXTEND_WITH_AND
43470 && optimize_function_for_speed_p (cfun
))
43472 x
= force_reg (dest_mode
, const0_rtx
);
43474 emit_insn (gen_movstrictqi
43475 (gen_lowpart (QImode
, x
), destqi
));
43478 x
= gen_rtx_ZERO_EXTEND (dest_mode
, destqi
);
43481 if (dest_mode
!= GET_MODE (dest
))
43483 rtx tmp
= gen_reg_rtx (SImode
);
43485 emit_insn (gen_rtx_SET (tmp
, x
));
43486 emit_insn (gen_zero_extendsidi2 (dest
, tmp
));
43489 emit_insn (gen_rtx_SET (dest
, x
));
43491 rtx_insn
*seq
= get_insns ();
43498 /* If we had no asm flag outputs, clobber the flags. */
43499 clobbers
.safe_push (gen_rtx_REG (CCmode
, FLAGS_REG
));
43500 SET_HARD_REG_BIT (clobbered_regs
, FLAGS_REG
);
43505 /* Implements target vector targetm.asm.encode_section_info. */
43507 static void ATTRIBUTE_UNUSED
43508 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
43510 default_encode_section_info (decl
, rtl
, first
);
43512 if (ix86_in_large_data_p (decl
))
43513 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
43516 /* Worker function for REVERSE_CONDITION. */
43519 ix86_reverse_condition (enum rtx_code code
, machine_mode mode
)
43521 return (mode
== CCFPmode
43522 ? reverse_condition_maybe_unordered (code
)
43523 : reverse_condition (code
));
43526 /* Output code to perform an x87 FP register move, from OPERANDS[1]
43530 output_387_reg_move (rtx_insn
*insn
, rtx
*operands
)
43532 if (REG_P (operands
[0]))
43534 if (REG_P (operands
[1])
43535 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
43537 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
43538 return output_387_ffreep (operands
, 0);
43539 return "fstp\t%y0";
43541 if (STACK_TOP_P (operands
[0]))
43542 return "fld%Z1\t%y1";
43545 else if (MEM_P (operands
[0]))
43547 gcc_assert (REG_P (operands
[1]));
43548 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
43549 return "fstp%Z0\t%y0";
43552 /* There is no non-popping store to memory for XFmode.
43553 So if we need one, follow the store with a load. */
43554 if (GET_MODE (operands
[0]) == XFmode
)
43555 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
43557 return "fst%Z0\t%y0";
43564 /* Output code to perform a conditional jump to LABEL, if C2 flag in
43565 FP status register is set. */
43568 ix86_emit_fp_unordered_jump (rtx label
)
43570 rtx reg
= gen_reg_rtx (HImode
);
43573 emit_insn (gen_x86_fnstsw_1 (reg
));
43575 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
43577 emit_insn (gen_x86_sahf_1 (reg
));
43579 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
43580 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
43584 emit_insn (gen_testqi_ext_1_ccno (reg
, GEN_INT (0x04)));
43586 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
43587 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
43590 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
43591 gen_rtx_LABEL_REF (VOIDmode
, label
),
43593 temp
= gen_rtx_SET (pc_rtx
, temp
);
43595 emit_jump_insn (temp
);
43596 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
43599 /* Output code to perform a log1p XFmode calculation. */
43601 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
43603 rtx_code_label
*label1
= gen_label_rtx ();
43604 rtx_code_label
*label2
= gen_label_rtx ();
43606 rtx tmp
= gen_reg_rtx (XFmode
);
43607 rtx tmp2
= gen_reg_rtx (XFmode
);
43610 emit_insn (gen_absxf2 (tmp
, op1
));
43611 test
= gen_rtx_GE (VOIDmode
, tmp
,
43612 const_double_from_real_value (
43613 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
43615 emit_jump_insn (gen_cbranchxf4 (test
, XEXP (test
, 0), XEXP (test
, 1), label1
));
43617 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
43618 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
43619 emit_jump (label2
);
43621 emit_label (label1
);
43622 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
43623 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
43624 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
43625 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
43627 emit_label (label2
);
43630 /* Emit code for round calculation. */
43631 void ix86_emit_i387_round (rtx op0
, rtx op1
)
43633 machine_mode inmode
= GET_MODE (op1
);
43634 machine_mode outmode
= GET_MODE (op0
);
43635 rtx e1
, e2
, res
, tmp
, tmp1
, half
;
43636 rtx scratch
= gen_reg_rtx (HImode
);
43637 rtx flags
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
43638 rtx_code_label
*jump_label
= gen_label_rtx ();
43640 rtx (*gen_abs
) (rtx
, rtx
);
43641 rtx (*gen_neg
) (rtx
, rtx
);
43646 gen_abs
= gen_abssf2
;
43649 gen_abs
= gen_absdf2
;
43652 gen_abs
= gen_absxf2
;
43655 gcc_unreachable ();
43661 gen_neg
= gen_negsf2
;
43664 gen_neg
= gen_negdf2
;
43667 gen_neg
= gen_negxf2
;
43670 gen_neg
= gen_neghi2
;
43673 gen_neg
= gen_negsi2
;
43676 gen_neg
= gen_negdi2
;
43679 gcc_unreachable ();
43682 e1
= gen_reg_rtx (inmode
);
43683 e2
= gen_reg_rtx (inmode
);
43684 res
= gen_reg_rtx (outmode
);
43686 half
= const_double_from_real_value (dconsthalf
, inmode
);
43688 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
43690 /* scratch = fxam(op1) */
43691 emit_insn (gen_rtx_SET (scratch
,
43692 gen_rtx_UNSPEC (HImode
, gen_rtvec (1, op1
),
43694 /* e1 = fabs(op1) */
43695 emit_insn (gen_abs (e1
, op1
));
43697 /* e2 = e1 + 0.5 */
43698 half
= force_reg (inmode
, half
);
43699 emit_insn (gen_rtx_SET (e2
, gen_rtx_PLUS (inmode
, e1
, half
)));
43701 /* res = floor(e2) */
43702 if (inmode
!= XFmode
)
43704 tmp1
= gen_reg_rtx (XFmode
);
43706 emit_insn (gen_rtx_SET (tmp1
, gen_rtx_FLOAT_EXTEND (XFmode
, e2
)));
43716 rtx tmp0
= gen_reg_rtx (XFmode
);
43718 emit_insn (gen_frndintxf2_floor (tmp0
, tmp1
));
43720 emit_insn (gen_rtx_SET (res
,
43721 gen_rtx_UNSPEC (outmode
, gen_rtvec (1, tmp0
),
43722 UNSPEC_TRUNC_NOOP
)));
43726 emit_insn (gen_frndintxf2_floor (res
, tmp1
));
43729 emit_insn (gen_lfloorxfhi2 (res
, tmp1
));
43732 emit_insn (gen_lfloorxfsi2 (res
, tmp1
));
43735 emit_insn (gen_lfloorxfdi2 (res
, tmp1
));
43738 gcc_unreachable ();
43741 /* flags = signbit(a) */
43742 emit_insn (gen_testqi_ext_1_ccno (scratch
, GEN_INT (0x02)));
43744 /* if (flags) then res = -res */
43745 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
43746 gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
),
43747 gen_rtx_LABEL_REF (VOIDmode
, jump_label
),
43749 insn
= emit_jump_insn (gen_rtx_SET (pc_rtx
, tmp
));
43750 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
43751 JUMP_LABEL (insn
) = jump_label
;
43753 emit_insn (gen_neg (res
, res
));
43755 emit_label (jump_label
);
43756 LABEL_NUSES (jump_label
) = 1;
43758 emit_move_insn (op0
, res
);
43761 /* Output code to perform a Newton-Rhapson approximation of a single precision
43762 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
43764 void ix86_emit_swdivsf (rtx res
, rtx a
, rtx b
, machine_mode mode
)
43766 rtx x0
, x1
, e0
, e1
;
43768 x0
= gen_reg_rtx (mode
);
43769 e0
= gen_reg_rtx (mode
);
43770 e1
= gen_reg_rtx (mode
);
43771 x1
= gen_reg_rtx (mode
);
43773 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
43775 b
= force_reg (mode
, b
);
43777 /* x0 = rcp(b) estimate */
43778 if (mode
== V16SFmode
|| mode
== V8DFmode
)
43780 if (TARGET_AVX512ER
)
43782 emit_insn (gen_rtx_SET (x0
, gen_rtx_UNSPEC (mode
, gen_rtvec (1, b
),
43785 emit_insn (gen_rtx_SET (res
, gen_rtx_MULT (mode
, a
, x0
)));
43789 emit_insn (gen_rtx_SET (x0
, gen_rtx_UNSPEC (mode
, gen_rtvec (1, b
),
43793 emit_insn (gen_rtx_SET (x0
, gen_rtx_UNSPEC (mode
, gen_rtvec (1, b
),
43797 emit_insn (gen_rtx_SET (e0
, gen_rtx_MULT (mode
, x0
, b
)));
43800 emit_insn (gen_rtx_SET (e0
, gen_rtx_MULT (mode
, x0
, e0
)));
43803 emit_insn (gen_rtx_SET (e1
, gen_rtx_PLUS (mode
, x0
, x0
)));
43806 emit_insn (gen_rtx_SET (x1
, gen_rtx_MINUS (mode
, e1
, e0
)));
43809 emit_insn (gen_rtx_SET (res
, gen_rtx_MULT (mode
, a
, x1
)));
43812 /* Output code to perform a Newton-Rhapson approximation of a
43813 single precision floating point [reciprocal] square root. */
43815 void ix86_emit_swsqrtsf (rtx res
, rtx a
, machine_mode mode
, bool recip
)
43817 rtx x0
, e0
, e1
, e2
, e3
, mthree
, mhalf
;
43821 x0
= gen_reg_rtx (mode
);
43822 e0
= gen_reg_rtx (mode
);
43823 e1
= gen_reg_rtx (mode
);
43824 e2
= gen_reg_rtx (mode
);
43825 e3
= gen_reg_rtx (mode
);
43827 if (TARGET_AVX512ER
&& mode
== V16SFmode
)
43830 /* res = rsqrt28(a) estimate */
43831 emit_insn (gen_rtx_SET (res
, gen_rtx_UNSPEC (mode
, gen_rtvec (1, a
),
43835 /* x0 = rsqrt28(a) estimate */
43836 emit_insn (gen_rtx_SET (x0
, gen_rtx_UNSPEC (mode
, gen_rtvec (1, a
),
43838 /* res = rcp28(x0) estimate */
43839 emit_insn (gen_rtx_SET (res
, gen_rtx_UNSPEC (mode
, gen_rtvec (1, x0
),
43845 real_from_integer (&r
, VOIDmode
, -3, SIGNED
);
43846 mthree
= const_double_from_real_value (r
, SFmode
);
43848 real_arithmetic (&r
, NEGATE_EXPR
, &dconsthalf
, NULL
);
43849 mhalf
= const_double_from_real_value (r
, SFmode
);
43850 unspec
= UNSPEC_RSQRT
;
43852 if (VECTOR_MODE_P (mode
))
43854 mthree
= ix86_build_const_vector (mode
, true, mthree
);
43855 mhalf
= ix86_build_const_vector (mode
, true, mhalf
);
43856 /* There is no 512-bit rsqrt. There is however rsqrt14. */
43857 if (GET_MODE_SIZE (mode
) == 64)
43858 unspec
= UNSPEC_RSQRT14
;
43861 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
43862 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
43864 a
= force_reg (mode
, a
);
43866 /* x0 = rsqrt(a) estimate */
43867 emit_insn (gen_rtx_SET (x0
, gen_rtx_UNSPEC (mode
, gen_rtvec (1, a
),
43870 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
43873 rtx zero
= force_reg (mode
, CONST0_RTX(mode
));
43876 /* Handle masked compare. */
43877 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 64)
43879 mask
= gen_reg_rtx (HImode
);
43880 /* Imm value 0x4 corresponds to not-equal comparison. */
43881 emit_insn (gen_avx512f_cmpv16sf3 (mask
, zero
, a
, GEN_INT (0x4)));
43882 emit_insn (gen_avx512f_blendmv16sf (x0
, zero
, x0
, mask
));
43886 mask
= gen_reg_rtx (mode
);
43887 emit_insn (gen_rtx_SET (mask
, gen_rtx_NE (mode
, zero
, a
)));
43888 emit_insn (gen_rtx_SET (x0
, gen_rtx_AND (mode
, x0
, mask
)));
43893 emit_insn (gen_rtx_SET (e0
, gen_rtx_MULT (mode
, x0
, a
)));
43895 emit_insn (gen_rtx_SET (e1
, gen_rtx_MULT (mode
, e0
, x0
)));
43898 mthree
= force_reg (mode
, mthree
);
43899 emit_insn (gen_rtx_SET (e2
, gen_rtx_PLUS (mode
, e1
, mthree
)));
43901 mhalf
= force_reg (mode
, mhalf
);
43903 /* e3 = -.5 * x0 */
43904 emit_insn (gen_rtx_SET (e3
, gen_rtx_MULT (mode
, x0
, mhalf
)));
43906 /* e3 = -.5 * e0 */
43907 emit_insn (gen_rtx_SET (e3
, gen_rtx_MULT (mode
, e0
, mhalf
)));
43908 /* ret = e2 * e3 */
43909 emit_insn (gen_rtx_SET (res
, gen_rtx_MULT (mode
, e2
, e3
)));
43912 #ifdef TARGET_SOLARIS
43913 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
43916 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
43919 /* With Binutils 2.15, the "@unwind" marker must be specified on
43920 every occurrence of the ".eh_frame" section, not just the first
43923 && strcmp (name
, ".eh_frame") == 0)
43925 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
43926 flags
& SECTION_WRITE
? "aw" : "a");
43931 if (HAVE_COMDAT_GROUP
&& flags
& SECTION_LINKONCE
)
43933 solaris_elf_asm_comdat_section (name
, flags
, decl
);
43938 default_elf_asm_named_section (name
, flags
, decl
);
43940 #endif /* TARGET_SOLARIS */
43942 /* Return the mangling of TYPE if it is an extended fundamental type. */
43944 static const char *
43945 ix86_mangle_type (const_tree type
)
43947 type
= TYPE_MAIN_VARIANT (type
);
43949 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
43950 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
43953 switch (TYPE_MODE (type
))
43956 /* __float128 is "g". */
43959 /* "long double" or __float80 is "e". */
43966 static GTY(()) tree ix86_tls_stack_chk_guard_decl
;
43969 ix86_stack_protect_guard (void)
43971 if (TARGET_SSP_TLS_GUARD
)
43973 tree type_node
= lang_hooks
.types
.type_for_mode (ptr_mode
, 1);
43974 int qual
= ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg
);
43975 tree type
= build_qualified_type (type_node
, qual
);
43978 if (global_options_set
.x_ix86_stack_protector_guard_symbol_str
)
43980 t
= ix86_tls_stack_chk_guard_decl
;
43987 (UNKNOWN_LOCATION
, VAR_DECL
,
43988 get_identifier (ix86_stack_protector_guard_symbol_str
),
43990 TREE_STATIC (t
) = 1;
43991 TREE_PUBLIC (t
) = 1;
43992 DECL_EXTERNAL (t
) = 1;
43994 TREE_THIS_VOLATILE (t
) = 1;
43995 DECL_ARTIFICIAL (t
) = 1;
43996 DECL_IGNORED_P (t
) = 1;
43998 /* Do not share RTL as the declaration is visible outside of
43999 current function. */
44001 RTX_FLAG (x
, used
) = 1;
44003 ix86_tls_stack_chk_guard_decl
= t
;
44008 tree asptrtype
= build_pointer_type (type
);
44010 t
= build_int_cst (asptrtype
, ix86_stack_protector_guard_offset
);
44011 t
= build2 (MEM_REF
, asptrtype
, t
,
44012 build_int_cst (asptrtype
, 0));
44018 return default_stack_protect_guard ();
44021 /* For 32-bit code we can save PIC register setup by using
44022 __stack_chk_fail_local hidden function instead of calling
44023 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
44024 register, so it is better to call __stack_chk_fail directly. */
44026 static tree ATTRIBUTE_UNUSED
44027 ix86_stack_protect_fail (void)
44029 return TARGET_64BIT
44030 ? default_external_stack_protect_fail ()
44031 : default_hidden_stack_protect_fail ();
44034 /* Select a format to encode pointers in exception handling data. CODE
44035 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
44036 true if the symbol may be affected by dynamic relocations.
44038 ??? All x86 object file formats are capable of representing this.
44039 After all, the relocation needed is the same as for the call insn.
44040 Whether or not a particular assembler allows us to enter such, I
44041 guess we'll have to see. */
44043 asm_preferred_eh_data_format (int code
, int global
)
44047 int type
= DW_EH_PE_sdata8
;
44049 || ix86_cmodel
== CM_SMALL_PIC
44050 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
44051 type
= DW_EH_PE_sdata4
;
44052 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
44054 if (ix86_cmodel
== CM_SMALL
44055 || (ix86_cmodel
== CM_MEDIUM
&& code
))
44056 return DW_EH_PE_udata4
;
44057 return DW_EH_PE_absptr
;
44060 /* Expand copysign from SIGN to the positive value ABS_VALUE
44061 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
44064 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
44066 machine_mode mode
= GET_MODE (sign
);
44067 rtx sgn
= gen_reg_rtx (mode
);
44068 if (mask
== NULL_RTX
)
44070 machine_mode vmode
;
44072 if (mode
== SFmode
)
44074 else if (mode
== DFmode
)
44079 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), false);
44080 if (!VECTOR_MODE_P (mode
))
44082 /* We need to generate a scalar mode mask in this case. */
44083 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
44084 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
44085 mask
= gen_reg_rtx (mode
);
44086 emit_insn (gen_rtx_SET (mask
, tmp
));
44090 mask
= gen_rtx_NOT (mode
, mask
);
44091 emit_insn (gen_rtx_SET (sgn
, gen_rtx_AND (mode
, mask
, sign
)));
44092 emit_insn (gen_rtx_SET (result
, gen_rtx_IOR (mode
, abs_value
, sgn
)));
44095 /* Expand fabs (OP0) and return a new rtx that holds the result. The
44096 mask for masking out the sign-bit is stored in *SMASK, if that is
44099 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
44101 machine_mode vmode
, mode
= GET_MODE (op0
);
44104 xa
= gen_reg_rtx (mode
);
44105 if (mode
== SFmode
)
44107 else if (mode
== DFmode
)
44111 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), true);
44112 if (!VECTOR_MODE_P (mode
))
44114 /* We need to generate a scalar mode mask in this case. */
44115 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
44116 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
44117 mask
= gen_reg_rtx (mode
);
44118 emit_insn (gen_rtx_SET (mask
, tmp
));
44120 emit_insn (gen_rtx_SET (xa
, gen_rtx_AND (mode
, op0
, mask
)));
44128 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
44129 swapping the operands if SWAP_OPERANDS is true. The expanded
44130 code is a forward jump to a newly created label in case the
44131 comparison is true. The generated label rtx is returned. */
44132 static rtx_code_label
*
44133 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
44134 bool swap_operands
)
44136 bool unordered_compare
= ix86_unordered_fp_compare (code
);
44137 rtx_code_label
*label
;
44141 std::swap (op0
, op1
);
44143 label
= gen_label_rtx ();
44144 tmp
= gen_rtx_COMPARE (CCFPmode
, op0
, op1
);
44145 if (unordered_compare
)
44146 tmp
= gen_rtx_UNSPEC (CCFPmode
, gen_rtvec (1, tmp
), UNSPEC_NOTRAP
);
44147 reg
= gen_rtx_REG (CCFPmode
, FLAGS_REG
);
44148 emit_insn (gen_rtx_SET (reg
, tmp
));
44149 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, reg
, const0_rtx
);
44150 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
44151 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
44152 tmp
= emit_jump_insn (gen_rtx_SET (pc_rtx
, tmp
));
44153 JUMP_LABEL (tmp
) = label
;
44158 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
44159 using comparison code CODE. Operands are swapped for the comparison if
44160 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
44162 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
44163 bool swap_operands
)
44165 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
);
44166 machine_mode mode
= GET_MODE (op0
);
44167 rtx mask
= gen_reg_rtx (mode
);
44170 std::swap (op0
, op1
);
44172 insn
= mode
== DFmode
? gen_setcc_df_sse
: gen_setcc_sf_sse
;
44174 emit_insn (insn (mask
, op0
, op1
,
44175 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
44179 /* Generate and return a rtx of mode MODE for 2**n where n is the number
44180 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
44182 ix86_gen_TWO52 (machine_mode mode
)
44184 REAL_VALUE_TYPE TWO52r
;
44187 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
44188 TWO52
= const_double_from_real_value (TWO52r
, mode
);
44189 TWO52
= force_reg (mode
, TWO52
);
44194 /* Expand SSE sequence for computing lround from OP1 storing
44197 ix86_expand_lround (rtx op0
, rtx op1
)
44199 /* C code for the stuff we're doing below:
44200 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
44203 machine_mode mode
= GET_MODE (op1
);
44204 const struct real_format
*fmt
;
44205 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
44208 /* load nextafter (0.5, 0.0) */
44209 fmt
= REAL_MODE_FORMAT (mode
);
44210 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
44211 real_arithmetic (&pred_half
, MINUS_EXPR
, &dconsthalf
, &half_minus_pred_half
);
44213 /* adj = copysign (0.5, op1) */
44214 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
44215 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
44217 /* adj = op1 + adj */
44218 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
44220 /* op0 = (imode)adj */
44221 expand_fix (op0
, adj
, 0);
44224 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
44227 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
44229 /* C code for the stuff we're doing below (for do_floor):
44231 xi -= (double)xi > op1 ? 1 : 0;
44234 machine_mode fmode
= GET_MODE (op1
);
44235 machine_mode imode
= GET_MODE (op0
);
44236 rtx ireg
, freg
, tmp
;
44237 rtx_code_label
*label
;
44239 /* reg = (long)op1 */
44240 ireg
= gen_reg_rtx (imode
);
44241 expand_fix (ireg
, op1
, 0);
44243 /* freg = (double)reg */
44244 freg
= gen_reg_rtx (fmode
);
44245 expand_float (freg
, ireg
, 0);
44247 /* ireg = (freg > op1) ? ireg - 1 : ireg */
44248 label
= ix86_expand_sse_compare_and_jump (UNLE
,
44249 freg
, op1
, !do_floor
);
44250 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
44251 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
44252 emit_move_insn (ireg
, tmp
);
44254 emit_label (label
);
44255 LABEL_NUSES (label
) = 1;
44257 emit_move_insn (op0
, ireg
);
44260 /* Expand rint rounding OPERAND1 and storing the result in OPERAND0. */
44262 ix86_expand_rint (rtx operand0
, rtx operand1
)
44264 /* C code for the stuff we're doing below:
44265 xa = fabs (operand1);
44266 if (!isless (xa, 2**52))
44269 if (flag_rounding_math)
44271 two52 = copysign (two52, operand1);
44274 xa = xa + two52 - two52;
44275 return copysign (xa, operand1);
44277 machine_mode mode
= GET_MODE (operand0
);
44278 rtx res
, xa
, TWO52
, two52
, mask
;
44279 rtx_code_label
*label
;
44281 res
= gen_reg_rtx (mode
);
44282 emit_move_insn (res
, operand1
);
44284 /* xa = abs (operand1) */
44285 xa
= ix86_expand_sse_fabs (res
, &mask
);
44287 /* if (!isless (xa, TWO52)) goto label; */
44288 TWO52
= ix86_gen_TWO52 (mode
);
44289 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
44292 if (flag_rounding_math
)
44294 two52
= gen_reg_rtx (mode
);
44295 ix86_sse_copysign_to_positive (two52
, TWO52
, res
, mask
);
44299 xa
= expand_simple_binop (mode
, PLUS
, xa
, two52
, NULL_RTX
, 0, OPTAB_DIRECT
);
44300 xa
= expand_simple_binop (mode
, MINUS
, xa
, two52
, xa
, 0, OPTAB_DIRECT
);
44302 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
44304 emit_label (label
);
44305 LABEL_NUSES (label
) = 1;
44307 emit_move_insn (operand0
, res
);
44310 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
44313 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
44315 /* C code for the stuff we expand below.
44316 double xa = fabs (x), x2;
44317 if (!isless (xa, TWO52))
44319 xa = xa + TWO52 - TWO52;
44320 x2 = copysign (xa, x);
44329 machine_mode mode
= GET_MODE (operand0
);
44330 rtx xa
, TWO52
, tmp
, one
, res
, mask
;
44331 rtx_code_label
*label
;
44333 TWO52
= ix86_gen_TWO52 (mode
);
44335 /* Temporary for holding the result, initialized to the input
44336 operand to ease control flow. */
44337 res
= gen_reg_rtx (mode
);
44338 emit_move_insn (res
, operand1
);
44340 /* xa = abs (operand1) */
44341 xa
= ix86_expand_sse_fabs (res
, &mask
);
44343 /* if (!isless (xa, TWO52)) goto label; */
44344 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
44346 /* xa = xa + TWO52 - TWO52; */
44347 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
44348 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
44350 /* xa = copysign (xa, operand1) */
44351 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
44353 /* generate 1.0 or -1.0 */
44354 one
= force_reg (mode
,
44355 const_double_from_real_value (do_floor
44356 ? dconst1
: dconstm1
, mode
));
44358 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
44359 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
44360 emit_insn (gen_rtx_SET (tmp
, gen_rtx_AND (mode
, one
, tmp
)));
44361 /* We always need to subtract here to preserve signed zero. */
44362 tmp
= expand_simple_binop (mode
, MINUS
,
44363 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
44364 emit_move_insn (res
, tmp
);
44366 emit_label (label
);
44367 LABEL_NUSES (label
) = 1;
44369 emit_move_insn (operand0
, res
);
44372 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
44375 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
44377 /* C code for the stuff we expand below.
44378 double xa = fabs (x), x2;
44379 if (!isless (xa, TWO52))
44381 x2 = (double)(long)x;
44388 if (HONOR_SIGNED_ZEROS (mode))
44389 return copysign (x2, x);
44392 machine_mode mode
= GET_MODE (operand0
);
44393 rtx xa
, xi
, TWO52
, tmp
, one
, res
, mask
;
44394 rtx_code_label
*label
;
44396 TWO52
= ix86_gen_TWO52 (mode
);
44398 /* Temporary for holding the result, initialized to the input
44399 operand to ease control flow. */
44400 res
= gen_reg_rtx (mode
);
44401 emit_move_insn (res
, operand1
);
44403 /* xa = abs (operand1) */
44404 xa
= ix86_expand_sse_fabs (res
, &mask
);
44406 /* if (!isless (xa, TWO52)) goto label; */
44407 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
44409 /* xa = (double)(long)x */
44410 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
44411 expand_fix (xi
, res
, 0);
44412 expand_float (xa
, xi
, 0);
44415 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
44417 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
44418 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
44419 emit_insn (gen_rtx_SET (tmp
, gen_rtx_AND (mode
, one
, tmp
)));
44420 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
44421 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
44422 emit_move_insn (res
, tmp
);
44424 if (HONOR_SIGNED_ZEROS (mode
))
44425 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
44427 emit_label (label
);
44428 LABEL_NUSES (label
) = 1;
44430 emit_move_insn (operand0
, res
);
44433 /* Expand SSE sequence for computing round from OPERAND1 storing
44434 into OPERAND0. Sequence that works without relying on DImode truncation
44435 via cvttsd2siq that is only available on 64bit targets. */
44437 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
44439 /* C code for the stuff we expand below.
44440 double xa = fabs (x), xa2, x2;
44441 if (!isless (xa, TWO52))
44443 Using the absolute value and copying back sign makes
44444 -0.0 -> -0.0 correct.
44445 xa2 = xa + TWO52 - TWO52;
44450 else if (dxa > 0.5)
44452 x2 = copysign (xa2, x);
44455 machine_mode mode
= GET_MODE (operand0
);
44456 rtx xa
, xa2
, dxa
, TWO52
, tmp
, half
, mhalf
, one
, res
, mask
;
44457 rtx_code_label
*label
;
44459 TWO52
= ix86_gen_TWO52 (mode
);
44461 /* Temporary for holding the result, initialized to the input
44462 operand to ease control flow. */
44463 res
= gen_reg_rtx (mode
);
44464 emit_move_insn (res
, operand1
);
44466 /* xa = abs (operand1) */
44467 xa
= ix86_expand_sse_fabs (res
, &mask
);
44469 /* if (!isless (xa, TWO52)) goto label; */
44470 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
44472 /* xa2 = xa + TWO52 - TWO52; */
44473 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
44474 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
44476 /* dxa = xa2 - xa; */
44477 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
44479 /* generate 0.5, 1.0 and -0.5 */
44480 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
44481 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
44482 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
44486 tmp
= gen_reg_rtx (mode
);
44487 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
44488 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
44489 emit_insn (gen_rtx_SET (tmp
, gen_rtx_AND (mode
, one
, tmp
)));
44490 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
44491 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
44492 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
44493 emit_insn (gen_rtx_SET (tmp
, gen_rtx_AND (mode
, one
, tmp
)));
44494 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
44496 /* res = copysign (xa2, operand1) */
44497 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
44499 emit_label (label
);
44500 LABEL_NUSES (label
) = 1;
44502 emit_move_insn (operand0
, res
);
44505 /* Expand SSE sequence for computing trunc from OPERAND1 storing
44508 ix86_expand_trunc (rtx operand0
, rtx operand1
)
44510 /* C code for SSE variant we expand below.
44511 double xa = fabs (x), x2;
44512 if (!isless (xa, TWO52))
44514 x2 = (double)(long)x;
44515 if (HONOR_SIGNED_ZEROS (mode))
44516 return copysign (x2, x);
44519 machine_mode mode
= GET_MODE (operand0
);
44520 rtx xa
, xi
, TWO52
, res
, mask
;
44521 rtx_code_label
*label
;
44523 TWO52
= ix86_gen_TWO52 (mode
);
44525 /* Temporary for holding the result, initialized to the input
44526 operand to ease control flow. */
44527 res
= gen_reg_rtx (mode
);
44528 emit_move_insn (res
, operand1
);
44530 /* xa = abs (operand1) */
44531 xa
= ix86_expand_sse_fabs (res
, &mask
);
44533 /* if (!isless (xa, TWO52)) goto label; */
44534 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
44536 /* x = (double)(long)x */
44537 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
44538 expand_fix (xi
, res
, 0);
44539 expand_float (res
, xi
, 0);
44541 if (HONOR_SIGNED_ZEROS (mode
))
44542 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
44544 emit_label (label
);
44545 LABEL_NUSES (label
) = 1;
44547 emit_move_insn (operand0
, res
);
44550 /* Expand SSE sequence for computing trunc from OPERAND1 storing
44553 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
44555 machine_mode mode
= GET_MODE (operand0
);
44556 rtx xa
, mask
, TWO52
, one
, res
, smask
, tmp
;
44557 rtx_code_label
*label
;
44559 /* C code for SSE variant we expand below.
44560 double xa = fabs (x), x2;
44561 if (!isless (xa, TWO52))
44563 xa2 = xa + TWO52 - TWO52;
44567 x2 = copysign (xa2, x);
44571 TWO52
= ix86_gen_TWO52 (mode
);
44573 /* Temporary for holding the result, initialized to the input
44574 operand to ease control flow. */
44575 res
= gen_reg_rtx (mode
);
44576 emit_move_insn (res
, operand1
);
44578 /* xa = abs (operand1) */
44579 xa
= ix86_expand_sse_fabs (res
, &smask
);
44581 /* if (!isless (xa, TWO52)) goto label; */
44582 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
44584 /* res = xa + TWO52 - TWO52; */
44585 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
44586 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
44587 emit_move_insn (res
, tmp
);
44590 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
44592 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
44593 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
44594 emit_insn (gen_rtx_SET (mask
, gen_rtx_AND (mode
, mask
, one
)));
44595 tmp
= expand_simple_binop (mode
, MINUS
,
44596 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
44597 emit_move_insn (res
, tmp
);
44599 /* res = copysign (res, operand1) */
44600 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
44602 emit_label (label
);
44603 LABEL_NUSES (label
) = 1;
44605 emit_move_insn (operand0
, res
);
44608 /* Expand SSE sequence for computing round from OPERAND1 storing
44611 ix86_expand_round (rtx operand0
, rtx operand1
)
44613 /* C code for the stuff we're doing below:
44614 double xa = fabs (x);
44615 if (!isless (xa, TWO52))
44617 xa = (double)(long)(xa + nextafter (0.5, 0.0));
44618 return copysign (xa, x);
44620 machine_mode mode
= GET_MODE (operand0
);
44621 rtx res
, TWO52
, xa
, xi
, half
, mask
;
44622 rtx_code_label
*label
;
44623 const struct real_format
*fmt
;
44624 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
44626 /* Temporary for holding the result, initialized to the input
44627 operand to ease control flow. */
44628 res
= gen_reg_rtx (mode
);
44629 emit_move_insn (res
, operand1
);
44631 TWO52
= ix86_gen_TWO52 (mode
);
44632 xa
= ix86_expand_sse_fabs (res
, &mask
);
44633 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
44635 /* load nextafter (0.5, 0.0) */
44636 fmt
= REAL_MODE_FORMAT (mode
);
44637 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
44638 real_arithmetic (&pred_half
, MINUS_EXPR
, &dconsthalf
, &half_minus_pred_half
);
44640 /* xa = xa + 0.5 */
44641 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
44642 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
44644 /* xa = (double)(int64_t)xa */
44645 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
44646 expand_fix (xi
, xa
, 0);
44647 expand_float (xa
, xi
, 0);
44649 /* res = copysign (xa, operand1) */
44650 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
44652 emit_label (label
);
44653 LABEL_NUSES (label
) = 1;
44655 emit_move_insn (operand0
, res
);
44658 /* Expand SSE sequence for computing round
44659 from OP1 storing into OP0 using sse4 round insn. */
44661 ix86_expand_round_sse4 (rtx op0
, rtx op1
)
44663 machine_mode mode
= GET_MODE (op0
);
44664 rtx e1
, e2
, res
, half
;
44665 const struct real_format
*fmt
;
44666 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
44667 rtx (*gen_copysign
) (rtx
, rtx
, rtx
);
44668 rtx (*gen_round
) (rtx
, rtx
, rtx
);
44673 gen_copysign
= gen_copysignsf3
;
44674 gen_round
= gen_sse4_1_roundsf2
;
44677 gen_copysign
= gen_copysigndf3
;
44678 gen_round
= gen_sse4_1_rounddf2
;
44681 gcc_unreachable ();
44684 /* round (a) = trunc (a + copysign (0.5, a)) */
44686 /* load nextafter (0.5, 0.0) */
44687 fmt
= REAL_MODE_FORMAT (mode
);
44688 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
44689 real_arithmetic (&pred_half
, MINUS_EXPR
, &dconsthalf
, &half_minus_pred_half
);
44690 half
= const_double_from_real_value (pred_half
, mode
);
44692 /* e1 = copysign (0.5, op1) */
44693 e1
= gen_reg_rtx (mode
);
44694 emit_insn (gen_copysign (e1
, half
, op1
));
44696 /* e2 = op1 + e1 */
44697 e2
= expand_simple_binop (mode
, PLUS
, op1
, e1
, NULL_RTX
, 0, OPTAB_DIRECT
);
44699 /* res = trunc (e2) */
44700 res
= gen_reg_rtx (mode
);
44701 emit_insn (gen_round (res
, e2
, GEN_INT (ROUND_TRUNC
)));
44703 emit_move_insn (op0
, res
);
44707 /* Table of valid machine attributes. */
44708 static const struct attribute_spec ix86_attribute_table
[] =
44710 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
44711 affects_type_identity, handler, exclude } */
44712 /* Stdcall attribute says callee is responsible for popping arguments
44713 if they are not variable. */
44714 { "stdcall", 0, 0, false, true, true, true, ix86_handle_cconv_attribute
,
44716 /* Fastcall attribute says callee is responsible for popping arguments
44717 if they are not variable. */
44718 { "fastcall", 0, 0, false, true, true, true, ix86_handle_cconv_attribute
,
44720 /* Thiscall attribute says callee is responsible for popping arguments
44721 if they are not variable. */
44722 { "thiscall", 0, 0, false, true, true, true, ix86_handle_cconv_attribute
,
44724 /* Cdecl attribute says the callee is a normal C declaration */
44725 { "cdecl", 0, 0, false, true, true, true, ix86_handle_cconv_attribute
,
44727 /* Regparm attribute specifies how many integer arguments are to be
44728 passed in registers. */
44729 { "regparm", 1, 1, false, true, true, true, ix86_handle_cconv_attribute
,
44731 /* Sseregparm attribute says we are using x86_64 calling conventions
44732 for FP arguments. */
44733 { "sseregparm", 0, 0, false, true, true, true, ix86_handle_cconv_attribute
,
44735 /* The transactional memory builtins are implicitly regparm or fastcall
44736 depending on the ABI. Override the generic do-nothing attribute that
44737 these builtins were declared with. */
44738 { "*tm regparm", 0, 0, false, true, true, true,
44739 ix86_handle_tm_regparm_attribute
, NULL
},
44740 /* force_align_arg_pointer says this function realigns the stack at entry. */
44741 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
44742 false, true, true, false, ix86_handle_force_align_arg_pointer_attribute
,
44744 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
44745 { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute
,
44747 { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute
,
44749 { "shared", 0, 0, true, false, false, false,
44750 ix86_handle_shared_attribute
, false, NULL
},
44752 { "ms_struct", 0, 0, false, false, false, false,
44753 ix86_handle_struct_attribute
, NULL
},
44754 { "gcc_struct", 0, 0, false, false, false, false,
44755 ix86_handle_struct_attribute
, NULL
},
44756 #ifdef SUBTARGET_ATTRIBUTE_TABLE
44757 SUBTARGET_ATTRIBUTE_TABLE
,
44759 /* ms_abi and sysv_abi calling convention function attributes. */
44760 { "ms_abi", 0, 0, false, true, true, true, ix86_handle_abi_attribute
, NULL
},
44761 { "sysv_abi", 0, 0, false, true, true, true, ix86_handle_abi_attribute
,
44763 { "ms_abi va_list", 0, 0, false, false, false, false, NULL
, NULL
},
44764 { "sysv_abi va_list", 0, 0, false, false, false, false, NULL
, NULL
},
44765 { "ms_hook_prologue", 0, 0, true, false, false, false,
44766 ix86_handle_fndecl_attribute
, NULL
},
44767 { "callee_pop_aggregate_return", 1, 1, false, true, true, true,
44768 ix86_handle_callee_pop_aggregate_return
, NULL
},
44769 { "interrupt", 0, 0, false, true, true, false,
44770 ix86_handle_interrupt_attribute
, NULL
},
44771 { "no_caller_saved_registers", 0, 0, false, true, true, false,
44772 ix86_handle_no_caller_saved_registers_attribute
, NULL
},
44773 { "naked", 0, 0, true, false, false, false,
44774 ix86_handle_fndecl_attribute
, NULL
},
44777 { NULL
, 0, 0, false, false, false, false, NULL
, NULL
}
44780 /* Implement targetm.vectorize.builtin_vectorization_cost. */
44782 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
44786 machine_mode mode
= TImode
;
44788 if (vectype
!= NULL
)
44790 fp
= FLOAT_TYPE_P (vectype
);
44791 mode
= TYPE_MODE (vectype
);
44794 switch (type_of_cost
)
44797 return fp
? ix86_cost
->addss
: COSTS_N_INSNS (1);
44800 /* load/store costs are relative to register move which is 2. Recompute
44801 it to COSTS_N_INSNS so everything have same base. */
44802 return COSTS_N_INSNS (fp
? ix86_cost
->sse_load
[0]
44803 : ix86_cost
->int_load
[2]) / 2;
44806 return COSTS_N_INSNS (fp
? ix86_cost
->sse_store
[0]
44807 : ix86_cost
->int_store
[2]) / 2;
44810 return ix86_vec_cost (mode
,
44811 fp
? ix86_cost
->addss
: ix86_cost
->sse_op
,
44815 index
= sse_store_index (mode
);
44816 /* See PR82713 - we may end up being called on non-vector type. */
44819 return ix86_vec_cost (mode
,
44820 COSTS_N_INSNS (ix86_cost
->sse_load
[index
]) / 2,
44824 index
= sse_store_index (mode
);
44825 /* See PR82713 - we may end up being called on non-vector type. */
44828 return ix86_vec_cost (mode
,
44829 COSTS_N_INSNS (ix86_cost
->sse_store
[index
]) / 2,
44832 case vec_to_scalar
:
44833 case scalar_to_vec
:
44834 return ix86_vec_cost (mode
, ix86_cost
->sse_op
, true);
44836 /* We should have separate costs for unaligned loads and gather/scatter.
44837 Do that incrementally. */
44838 case unaligned_load
:
44839 index
= sse_store_index (mode
);
44840 /* See PR82713 - we may end up being called on non-vector type. */
44843 return ix86_vec_cost (mode
,
44845 (ix86_cost
->sse_unaligned_load
[index
]) / 2,
44848 case unaligned_store
:
44849 index
= sse_store_index (mode
);
44850 /* See PR82713 - we may end up being called on non-vector type. */
44853 return ix86_vec_cost (mode
,
44855 (ix86_cost
->sse_unaligned_store
[index
]) / 2,
44858 case vector_gather_load
:
44859 return ix86_vec_cost (mode
,
44861 (ix86_cost
->gather_static
44862 + ix86_cost
->gather_per_elt
44863 * TYPE_VECTOR_SUBPARTS (vectype
)) / 2,
44866 case vector_scatter_store
:
44867 return ix86_vec_cost (mode
,
44869 (ix86_cost
->scatter_static
44870 + ix86_cost
->scatter_per_elt
44871 * TYPE_VECTOR_SUBPARTS (vectype
)) / 2,
44874 case cond_branch_taken
:
44875 return ix86_cost
->cond_taken_branch_cost
;
44877 case cond_branch_not_taken
:
44878 return ix86_cost
->cond_not_taken_branch_cost
;
44881 case vec_promote_demote
:
44882 return ix86_vec_cost (mode
,
44883 ix86_cost
->sse_op
, true);
44885 case vec_construct
:
44886 return ix86_vec_cost (mode
, ix86_cost
->sse_op
, false);
44889 gcc_unreachable ();
44893 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
44894 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
44895 insn every time. */
44897 static GTY(()) rtx_insn
*vselect_insn
;
44899 /* Initialize vselect_insn. */
44902 init_vselect_insn (void)
44907 x
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (MAX_VECT_LEN
));
44908 for (i
= 0; i
< MAX_VECT_LEN
; ++i
)
44909 XVECEXP (x
, 0, i
) = const0_rtx
;
44910 x
= gen_rtx_VEC_SELECT (V2DFmode
, gen_rtx_VEC_CONCAT (V4DFmode
, const0_rtx
,
44912 x
= gen_rtx_SET (const0_rtx
, x
);
44914 vselect_insn
= emit_insn (x
);
44918 /* Construct (set target (vec_select op0 (parallel perm))) and
44919 return true if that's a valid instruction in the active ISA. */
44922 expand_vselect (rtx target
, rtx op0
, const unsigned char *perm
,
44923 unsigned nelt
, bool testing_p
)
44926 rtx x
, save_vconcat
;
44929 if (vselect_insn
== NULL_RTX
)
44930 init_vselect_insn ();
44932 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 1);
44933 PUT_NUM_ELEM (XVEC (x
, 0), nelt
);
44934 for (i
= 0; i
< nelt
; ++i
)
44935 XVECEXP (x
, 0, i
) = GEN_INT (perm
[i
]);
44936 save_vconcat
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
44937 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = op0
;
44938 PUT_MODE (SET_SRC (PATTERN (vselect_insn
)), GET_MODE (target
));
44939 SET_DEST (PATTERN (vselect_insn
)) = target
;
44940 icode
= recog_memoized (vselect_insn
);
44942 if (icode
>= 0 && !testing_p
)
44943 emit_insn (copy_rtx (PATTERN (vselect_insn
)));
44945 SET_DEST (PATTERN (vselect_insn
)) = const0_rtx
;
44946 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = save_vconcat
;
44947 INSN_CODE (vselect_insn
) = -1;
44952 /* Similar, but generate a vec_concat from op0 and op1 as well. */
44955 expand_vselect_vconcat (rtx target
, rtx op0
, rtx op1
,
44956 const unsigned char *perm
, unsigned nelt
,
44959 machine_mode v2mode
;
44963 if (vselect_insn
== NULL_RTX
)
44964 init_vselect_insn ();
44966 if (!GET_MODE_2XWIDER_MODE (GET_MODE (op0
)).exists (&v2mode
))
44968 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
44969 PUT_MODE (x
, v2mode
);
44972 ok
= expand_vselect (target
, x
, perm
, nelt
, testing_p
);
44973 XEXP (x
, 0) = const0_rtx
;
44974 XEXP (x
, 1) = const0_rtx
;
44978 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
44979 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
44982 expand_vec_perm_blend (struct expand_vec_perm_d
*d
)
44984 machine_mode mmode
, vmode
= d
->vmode
;
44985 unsigned i
, mask
, nelt
= d
->nelt
;
44986 rtx target
, op0
, op1
, maskop
, x
;
44987 rtx rperm
[32], vperm
;
44989 if (d
->one_operand_p
)
44991 if (TARGET_AVX512F
&& GET_MODE_SIZE (vmode
) == 64
44992 && (TARGET_AVX512BW
44993 || GET_MODE_UNIT_SIZE (vmode
) >= 4))
44995 else if (TARGET_AVX2
&& GET_MODE_SIZE (vmode
) == 32)
44997 else if (TARGET_AVX
&& (vmode
== V4DFmode
|| vmode
== V8SFmode
))
44999 else if (TARGET_SSE4_1
&& GET_MODE_SIZE (vmode
) == 16)
45004 /* This is a blend, not a permute. Elements must stay in their
45005 respective lanes. */
45006 for (i
= 0; i
< nelt
; ++i
)
45008 unsigned e
= d
->perm
[i
];
45009 if (!(e
== i
|| e
== i
+ nelt
))
45016 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
45017 decision should be extracted elsewhere, so that we only try that
45018 sequence once all budget==3 options have been tried. */
45019 target
= d
->target
;
45038 for (i
= 0; i
< nelt
; ++i
)
45039 mask
|= (d
->perm
[i
] >= nelt
) << i
;
45043 for (i
= 0; i
< 2; ++i
)
45044 mask
|= (d
->perm
[i
] >= 2 ? 15 : 0) << (i
* 4);
45049 for (i
= 0; i
< 4; ++i
)
45050 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
45055 /* See if bytes move in pairs so we can use pblendw with
45056 an immediate argument, rather than pblendvb with a vector
45058 for (i
= 0; i
< 16; i
+= 2)
45059 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
45062 for (i
= 0; i
< nelt
; ++i
)
45063 rperm
[i
] = (d
->perm
[i
] < nelt
? const0_rtx
: constm1_rtx
);
45066 vperm
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
45067 vperm
= force_reg (vmode
, vperm
);
45069 if (GET_MODE_SIZE (vmode
) == 16)
45070 emit_insn (gen_sse4_1_pblendvb (target
, op0
, op1
, vperm
));
45072 emit_insn (gen_avx2_pblendvb (target
, op0
, op1
, vperm
));
45073 if (target
!= d
->target
)
45074 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, target
));
45078 for (i
= 0; i
< 8; ++i
)
45079 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
45084 target
= gen_reg_rtx (vmode
);
45085 op0
= gen_lowpart (vmode
, op0
);
45086 op1
= gen_lowpart (vmode
, op1
);
45090 /* See if bytes move in pairs. If not, vpblendvb must be used. */
45091 for (i
= 0; i
< 32; i
+= 2)
45092 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
45094 /* See if bytes move in quadruplets. If yes, vpblendd
45095 with immediate can be used. */
45096 for (i
= 0; i
< 32; i
+= 4)
45097 if (d
->perm
[i
] + 2 != d
->perm
[i
+ 2])
45101 /* See if bytes move the same in both lanes. If yes,
45102 vpblendw with immediate can be used. */
45103 for (i
= 0; i
< 16; i
+= 2)
45104 if (d
->perm
[i
] + 16 != d
->perm
[i
+ 16])
45107 /* Use vpblendw. */
45108 for (i
= 0; i
< 16; ++i
)
45109 mask
|= (d
->perm
[i
* 2] >= 32) << i
;
45114 /* Use vpblendd. */
45115 for (i
= 0; i
< 8; ++i
)
45116 mask
|= (d
->perm
[i
* 4] >= 32) << i
;
45121 /* See if words move in pairs. If yes, vpblendd can be used. */
45122 for (i
= 0; i
< 16; i
+= 2)
45123 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
45127 /* See if words move the same in both lanes. If not,
45128 vpblendvb must be used. */
45129 for (i
= 0; i
< 8; i
++)
45130 if (d
->perm
[i
] + 8 != d
->perm
[i
+ 8])
45132 /* Use vpblendvb. */
45133 for (i
= 0; i
< 32; ++i
)
45134 rperm
[i
] = (d
->perm
[i
/ 2] < 16 ? const0_rtx
: constm1_rtx
);
45138 target
= gen_reg_rtx (vmode
);
45139 op0
= gen_lowpart (vmode
, op0
);
45140 op1
= gen_lowpart (vmode
, op1
);
45141 goto finish_pblendvb
;
45144 /* Use vpblendw. */
45145 for (i
= 0; i
< 16; ++i
)
45146 mask
|= (d
->perm
[i
] >= 16) << i
;
45150 /* Use vpblendd. */
45151 for (i
= 0; i
< 8; ++i
)
45152 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
45157 /* Use vpblendd. */
45158 for (i
= 0; i
< 4; ++i
)
45159 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
45164 gcc_unreachable ();
45187 if (mmode
!= VOIDmode
)
45188 maskop
= force_reg (mmode
, gen_int_mode (mask
, mmode
));
45190 maskop
= GEN_INT (mask
);
45192 /* This matches five different patterns with the different modes. */
45193 x
= gen_rtx_VEC_MERGE (vmode
, op1
, op0
, maskop
);
45194 x
= gen_rtx_SET (target
, x
);
45196 if (target
!= d
->target
)
45197 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, target
));
45202 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
45203 in terms of the variable form of vpermilps.
45205 Note that we will have already failed the immediate input vpermilps,
45206 which requires that the high and low part shuffle be identical; the
45207 variable form doesn't require that. */
45210 expand_vec_perm_vpermil (struct expand_vec_perm_d
*d
)
45212 rtx rperm
[8], vperm
;
45215 if (!TARGET_AVX
|| d
->vmode
!= V8SFmode
|| !d
->one_operand_p
)
45218 /* We can only permute within the 128-bit lane. */
45219 for (i
= 0; i
< 8; ++i
)
45221 unsigned e
= d
->perm
[i
];
45222 if (i
< 4 ? e
>= 4 : e
< 4)
45229 for (i
= 0; i
< 8; ++i
)
45231 unsigned e
= d
->perm
[i
];
45233 /* Within each 128-bit lane, the elements of op0 are numbered
45234 from 0 and the elements of op1 are numbered from 4. */
45240 rperm
[i
] = GEN_INT (e
);
45243 vperm
= gen_rtx_CONST_VECTOR (V8SImode
, gen_rtvec_v (8, rperm
));
45244 vperm
= force_reg (V8SImode
, vperm
);
45245 emit_insn (gen_avx_vpermilvarv8sf3 (d
->target
, d
->op0
, vperm
));
45250 /* Return true if permutation D can be performed as VMODE permutation
45254 valid_perm_using_mode_p (machine_mode vmode
, struct expand_vec_perm_d
*d
)
45256 unsigned int i
, j
, chunk
;
45258 if (GET_MODE_CLASS (vmode
) != MODE_VECTOR_INT
45259 || GET_MODE_CLASS (d
->vmode
) != MODE_VECTOR_INT
45260 || GET_MODE_SIZE (vmode
) != GET_MODE_SIZE (d
->vmode
))
45263 if (GET_MODE_NUNITS (vmode
) >= d
->nelt
)
45266 chunk
= d
->nelt
/ GET_MODE_NUNITS (vmode
);
45267 for (i
= 0; i
< d
->nelt
; i
+= chunk
)
45268 if (d
->perm
[i
] & (chunk
- 1))
45271 for (j
= 1; j
< chunk
; ++j
)
45272 if (d
->perm
[i
] + j
!= d
->perm
[i
+ j
])
45278 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
45279 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
45282 expand_vec_perm_pshufb (struct expand_vec_perm_d
*d
)
45284 unsigned i
, nelt
, eltsz
, mask
;
45285 unsigned char perm
[64];
45286 machine_mode vmode
= V16QImode
;
45287 rtx rperm
[64], vperm
, target
, op0
, op1
;
45291 if (!d
->one_operand_p
)
45293 if (!TARGET_XOP
|| GET_MODE_SIZE (d
->vmode
) != 16)
45296 && valid_perm_using_mode_p (V2TImode
, d
))
45301 /* Use vperm2i128 insn. The pattern uses
45302 V4DImode instead of V2TImode. */
45303 target
= d
->target
;
45304 if (d
->vmode
!= V4DImode
)
45305 target
= gen_reg_rtx (V4DImode
);
45306 op0
= gen_lowpart (V4DImode
, d
->op0
);
45307 op1
= gen_lowpart (V4DImode
, d
->op1
);
45309 = GEN_INT ((d
->perm
[0] / (nelt
/ 2))
45310 | ((d
->perm
[nelt
/ 2] / (nelt
/ 2)) * 16));
45311 emit_insn (gen_avx2_permv2ti (target
, op0
, op1
, rperm
[0]));
45312 if (target
!= d
->target
)
45313 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, target
));
45321 if (GET_MODE_SIZE (d
->vmode
) == 16)
45326 else if (GET_MODE_SIZE (d
->vmode
) == 32)
45331 /* V4DImode should be already handled through
45332 expand_vselect by vpermq instruction. */
45333 gcc_assert (d
->vmode
!= V4DImode
);
45336 if (d
->vmode
== V8SImode
45337 || d
->vmode
== V16HImode
45338 || d
->vmode
== V32QImode
)
45340 /* First see if vpermq can be used for
45341 V8SImode/V16HImode/V32QImode. */
45342 if (valid_perm_using_mode_p (V4DImode
, d
))
45344 for (i
= 0; i
< 4; i
++)
45345 perm
[i
] = (d
->perm
[i
* nelt
/ 4] * 4 / nelt
) & 3;
45348 target
= gen_reg_rtx (V4DImode
);
45349 if (expand_vselect (target
, gen_lowpart (V4DImode
, d
->op0
),
45352 emit_move_insn (d
->target
,
45353 gen_lowpart (d
->vmode
, target
));
45359 /* Next see if vpermd can be used. */
45360 if (valid_perm_using_mode_p (V8SImode
, d
))
45363 /* Or if vpermps can be used. */
45364 else if (d
->vmode
== V8SFmode
)
45367 if (vmode
== V32QImode
)
45369 /* vpshufb only works intra lanes, it is not
45370 possible to shuffle bytes in between the lanes. */
45371 for (i
= 0; i
< nelt
; ++i
)
45372 if ((d
->perm
[i
] ^ i
) & (nelt
/ 2))
45376 else if (GET_MODE_SIZE (d
->vmode
) == 64)
45378 if (!TARGET_AVX512BW
)
45381 /* If vpermq didn't work, vpshufb won't work either. */
45382 if (d
->vmode
== V8DFmode
|| d
->vmode
== V8DImode
)
45386 if (d
->vmode
== V16SImode
45387 || d
->vmode
== V32HImode
45388 || d
->vmode
== V64QImode
)
45390 /* First see if vpermq can be used for
45391 V16SImode/V32HImode/V64QImode. */
45392 if (valid_perm_using_mode_p (V8DImode
, d
))
45394 for (i
= 0; i
< 8; i
++)
45395 perm
[i
] = (d
->perm
[i
* nelt
/ 8] * 8 / nelt
) & 7;
45398 target
= gen_reg_rtx (V8DImode
);
45399 if (expand_vselect (target
, gen_lowpart (V8DImode
, d
->op0
),
45402 emit_move_insn (d
->target
,
45403 gen_lowpart (d
->vmode
, target
));
45409 /* Next see if vpermd can be used. */
45410 if (valid_perm_using_mode_p (V16SImode
, d
))
45413 /* Or if vpermps can be used. */
45414 else if (d
->vmode
== V16SFmode
)
45416 if (vmode
== V64QImode
)
45418 /* vpshufb only works intra lanes, it is not
45419 possible to shuffle bytes in between the lanes. */
45420 for (i
= 0; i
< nelt
; ++i
)
45421 if ((d
->perm
[i
] ^ i
) & (nelt
/ 4))
45432 if (vmode
== V8SImode
)
45433 for (i
= 0; i
< 8; ++i
)
45434 rperm
[i
] = GEN_INT ((d
->perm
[i
* nelt
/ 8] * 8 / nelt
) & 7);
45435 else if (vmode
== V16SImode
)
45436 for (i
= 0; i
< 16; ++i
)
45437 rperm
[i
] = GEN_INT ((d
->perm
[i
* nelt
/ 16] * 16 / nelt
) & 15);
45440 eltsz
= GET_MODE_UNIT_SIZE (d
->vmode
);
45441 if (!d
->one_operand_p
)
45442 mask
= 2 * nelt
- 1;
45443 else if (vmode
== V16QImode
)
45445 else if (vmode
== V64QImode
)
45446 mask
= nelt
/ 4 - 1;
45448 mask
= nelt
/ 2 - 1;
45450 for (i
= 0; i
< nelt
; ++i
)
45452 unsigned j
, e
= d
->perm
[i
] & mask
;
45453 for (j
= 0; j
< eltsz
; ++j
)
45454 rperm
[i
* eltsz
+ j
] = GEN_INT (e
* eltsz
+ j
);
45458 vperm
= gen_rtx_CONST_VECTOR (vmode
,
45459 gen_rtvec_v (GET_MODE_NUNITS (vmode
), rperm
));
45460 vperm
= force_reg (vmode
, vperm
);
45462 target
= d
->target
;
45463 if (d
->vmode
!= vmode
)
45464 target
= gen_reg_rtx (vmode
);
45465 op0
= gen_lowpart (vmode
, d
->op0
);
45466 if (d
->one_operand_p
)
45468 if (vmode
== V16QImode
)
45469 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, vperm
));
45470 else if (vmode
== V32QImode
)
45471 emit_insn (gen_avx2_pshufbv32qi3 (target
, op0
, vperm
));
45472 else if (vmode
== V64QImode
)
45473 emit_insn (gen_avx512bw_pshufbv64qi3 (target
, op0
, vperm
));
45474 else if (vmode
== V8SFmode
)
45475 emit_insn (gen_avx2_permvarv8sf (target
, op0
, vperm
));
45476 else if (vmode
== V8SImode
)
45477 emit_insn (gen_avx2_permvarv8si (target
, op0
, vperm
));
45478 else if (vmode
== V16SFmode
)
45479 emit_insn (gen_avx512f_permvarv16sf (target
, op0
, vperm
));
45480 else if (vmode
== V16SImode
)
45481 emit_insn (gen_avx512f_permvarv16si (target
, op0
, vperm
));
45483 gcc_unreachable ();
45487 op1
= gen_lowpart (vmode
, d
->op1
);
45488 emit_insn (gen_xop_pperm (target
, op0
, op1
, vperm
));
45490 if (target
!= d
->target
)
45491 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, target
));
45496 /* For V*[QHS]Imode permutations, check if the same permutation
45497 can't be performed in a 2x, 4x or 8x wider inner mode. */
45500 canonicalize_vector_int_perm (const struct expand_vec_perm_d
*d
,
45501 struct expand_vec_perm_d
*nd
)
45504 machine_mode mode
= VOIDmode
;
45508 case E_V16QImode
: mode
= V8HImode
; break;
45509 case E_V32QImode
: mode
= V16HImode
; break;
45510 case E_V64QImode
: mode
= V32HImode
; break;
45511 case E_V8HImode
: mode
= V4SImode
; break;
45512 case E_V16HImode
: mode
= V8SImode
; break;
45513 case E_V32HImode
: mode
= V16SImode
; break;
45514 case E_V4SImode
: mode
= V2DImode
; break;
45515 case E_V8SImode
: mode
= V4DImode
; break;
45516 case E_V16SImode
: mode
= V8DImode
; break;
45517 default: return false;
45519 for (i
= 0; i
< d
->nelt
; i
+= 2)
45520 if ((d
->perm
[i
] & 1) || d
->perm
[i
+ 1] != d
->perm
[i
] + 1)
45523 nd
->nelt
= d
->nelt
/ 2;
45524 for (i
= 0; i
< nd
->nelt
; i
++)
45525 nd
->perm
[i
] = d
->perm
[2 * i
] / 2;
45526 if (GET_MODE_INNER (mode
) != DImode
)
45527 canonicalize_vector_int_perm (nd
, nd
);
45530 nd
->one_operand_p
= d
->one_operand_p
;
45531 nd
->testing_p
= d
->testing_p
;
45532 if (d
->op0
== d
->op1
)
45533 nd
->op0
= nd
->op1
= gen_lowpart (nd
->vmode
, d
->op0
);
45536 nd
->op0
= gen_lowpart (nd
->vmode
, d
->op0
);
45537 nd
->op1
= gen_lowpart (nd
->vmode
, d
->op1
);
45540 nd
->target
= gen_raw_REG (nd
->vmode
, LAST_VIRTUAL_REGISTER
+ 1);
45542 nd
->target
= gen_reg_rtx (nd
->vmode
);
45547 /* Try to expand one-operand permutation with constant mask. */
45550 ix86_expand_vec_one_operand_perm_avx512 (struct expand_vec_perm_d
*d
)
45552 machine_mode mode
= GET_MODE (d
->op0
);
45553 machine_mode maskmode
= mode
;
45554 rtx (*gen
) (rtx
, rtx
, rtx
) = NULL
;
45555 rtx target
, op0
, mask
;
45558 if (!rtx_equal_p (d
->op0
, d
->op1
))
45561 if (!TARGET_AVX512F
)
45567 gen
= gen_avx512f_permvarv16si
;
45570 gen
= gen_avx512f_permvarv16sf
;
45571 maskmode
= V16SImode
;
45574 gen
= gen_avx512f_permvarv8di
;
45577 gen
= gen_avx512f_permvarv8df
;
45578 maskmode
= V8DImode
;
45584 target
= d
->target
;
45586 for (int i
= 0; i
< d
->nelt
; ++i
)
45587 vec
[i
] = GEN_INT (d
->perm
[i
]);
45588 mask
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (d
->nelt
, vec
));
45589 emit_insn (gen (target
, op0
, force_reg (maskmode
, mask
)));
45593 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
45594 in a single instruction. */
45597 expand_vec_perm_1 (struct expand_vec_perm_d
*d
)
45599 unsigned i
, nelt
= d
->nelt
;
45600 struct expand_vec_perm_d nd
;
45602 /* Check plain VEC_SELECT first, because AVX has instructions that could
45603 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
45604 input where SEL+CONCAT may not. */
45605 if (d
->one_operand_p
)
45607 int mask
= nelt
- 1;
45608 bool identity_perm
= true;
45609 bool broadcast_perm
= true;
45611 for (i
= 0; i
< nelt
; i
++)
45613 nd
.perm
[i
] = d
->perm
[i
] & mask
;
45614 if (nd
.perm
[i
] != i
)
45615 identity_perm
= false;
45617 broadcast_perm
= false;
45623 emit_move_insn (d
->target
, d
->op0
);
45626 else if (broadcast_perm
&& TARGET_AVX2
)
45628 /* Use vpbroadcast{b,w,d}. */
45629 rtx (*gen
) (rtx
, rtx
) = NULL
;
45633 if (TARGET_AVX512BW
)
45634 gen
= gen_avx512bw_vec_dupv64qi_1
;
45637 gen
= gen_avx2_pbroadcastv32qi_1
;
45640 if (TARGET_AVX512BW
)
45641 gen
= gen_avx512bw_vec_dupv32hi_1
;
45644 gen
= gen_avx2_pbroadcastv16hi_1
;
45647 if (TARGET_AVX512F
)
45648 gen
= gen_avx512f_vec_dupv16si_1
;
45651 gen
= gen_avx2_pbroadcastv8si_1
;
45654 gen
= gen_avx2_pbroadcastv16qi
;
45657 gen
= gen_avx2_pbroadcastv8hi
;
45660 if (TARGET_AVX512F
)
45661 gen
= gen_avx512f_vec_dupv16sf_1
;
45664 gen
= gen_avx2_vec_dupv8sf_1
;
45667 if (TARGET_AVX512F
)
45668 gen
= gen_avx512f_vec_dupv8df_1
;
45671 if (TARGET_AVX512F
)
45672 gen
= gen_avx512f_vec_dupv8di_1
;
45674 /* For other modes prefer other shuffles this function creates. */
45680 emit_insn (gen (d
->target
, d
->op0
));
45685 if (expand_vselect (d
->target
, d
->op0
, nd
.perm
, nelt
, d
->testing_p
))
45688 /* There are plenty of patterns in sse.md that are written for
45689 SEL+CONCAT and are not replicated for a single op. Perhaps
45690 that should be changed, to avoid the nastiness here. */
45692 /* Recognize interleave style patterns, which means incrementing
45693 every other permutation operand. */
45694 for (i
= 0; i
< nelt
; i
+= 2)
45696 nd
.perm
[i
] = d
->perm
[i
] & mask
;
45697 nd
.perm
[i
+ 1] = (d
->perm
[i
+ 1] & mask
) + nelt
;
45699 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, nd
.perm
, nelt
,
45703 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
45706 for (i
= 0; i
< nelt
; i
+= 4)
45708 nd
.perm
[i
+ 0] = d
->perm
[i
+ 0] & mask
;
45709 nd
.perm
[i
+ 1] = d
->perm
[i
+ 1] & mask
;
45710 nd
.perm
[i
+ 2] = (d
->perm
[i
+ 2] & mask
) + nelt
;
45711 nd
.perm
[i
+ 3] = (d
->perm
[i
+ 3] & mask
) + nelt
;
45714 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, nd
.perm
, nelt
,
45720 /* Finally, try the fully general two operand permute. */
45721 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op1
, d
->perm
, nelt
,
45725 /* Recognize interleave style patterns with reversed operands. */
45726 if (!d
->one_operand_p
)
45728 for (i
= 0; i
< nelt
; ++i
)
45730 unsigned e
= d
->perm
[i
];
45738 if (expand_vselect_vconcat (d
->target
, d
->op1
, d
->op0
, nd
.perm
, nelt
,
45743 /* Try the SSE4.1 blend variable merge instructions. */
45744 if (expand_vec_perm_blend (d
))
45747 /* Try one of the AVX vpermil variable permutations. */
45748 if (expand_vec_perm_vpermil (d
))
45751 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
45752 vpshufb, vpermd, vpermps or vpermq variable permutation. */
45753 if (expand_vec_perm_pshufb (d
))
45756 /* Try the AVX2 vpalignr instruction. */
45757 if (expand_vec_perm_palignr (d
, true))
45760 /* Try the AVX512F vperm{s,d} instructions. */
45761 if (ix86_expand_vec_one_operand_perm_avx512 (d
))
45764 /* Try the AVX512F vpermt2/vpermi2 instructions. */
45765 if (ix86_expand_vec_perm_vpermt2 (NULL_RTX
, NULL_RTX
, NULL_RTX
, NULL_RTX
, d
))
45768 /* See if we can get the same permutation in different vector integer
45770 if (canonicalize_vector_int_perm (d
, &nd
) && expand_vec_perm_1 (&nd
))
45773 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, nd
.target
));
45779 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
45780 in terms of a pair of pshuflw + pshufhw instructions. */
45783 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d
*d
)
45785 unsigned char perm2
[MAX_VECT_LEN
];
45789 if (d
->vmode
!= V8HImode
|| !d
->one_operand_p
)
45792 /* The two permutations only operate in 64-bit lanes. */
45793 for (i
= 0; i
< 4; ++i
)
45794 if (d
->perm
[i
] >= 4)
45796 for (i
= 4; i
< 8; ++i
)
45797 if (d
->perm
[i
] < 4)
45803 /* Emit the pshuflw. */
45804 memcpy (perm2
, d
->perm
, 4);
45805 for (i
= 4; i
< 8; ++i
)
45807 ok
= expand_vselect (d
->target
, d
->op0
, perm2
, 8, d
->testing_p
);
45810 /* Emit the pshufhw. */
45811 memcpy (perm2
+ 4, d
->perm
+ 4, 4);
45812 for (i
= 0; i
< 4; ++i
)
45814 ok
= expand_vselect (d
->target
, d
->target
, perm2
, 8, d
->testing_p
);
45820 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
45821 the permutation using the SSSE3 palignr instruction. This succeeds
45822 when all of the elements in PERM fit within one vector and we merely
45823 need to shift them down so that a single vector permutation has a
45824 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
45825 the vpalignr instruction itself can perform the requested permutation. */
45828 expand_vec_perm_palignr (struct expand_vec_perm_d
*d
, bool single_insn_only_p
)
45830 unsigned i
, nelt
= d
->nelt
;
45831 unsigned min
, max
, minswap
, maxswap
;
45832 bool in_order
, ok
, swap
= false;
45834 struct expand_vec_perm_d dcopy
;
45836 /* Even with AVX, palignr only operates on 128-bit vectors,
45837 in AVX2 palignr operates on both 128-bit lanes. */
45838 if ((!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
45839 && (!TARGET_AVX2
|| GET_MODE_SIZE (d
->vmode
) != 32))
45844 minswap
= 2 * nelt
;
45846 for (i
= 0; i
< nelt
; ++i
)
45848 unsigned e
= d
->perm
[i
];
45849 unsigned eswap
= d
->perm
[i
] ^ nelt
;
45850 if (GET_MODE_SIZE (d
->vmode
) == 32)
45852 e
= (e
& ((nelt
/ 2) - 1)) | ((e
& nelt
) >> 1);
45853 eswap
= e
^ (nelt
/ 2);
45859 if (eswap
< minswap
)
45861 if (eswap
> maxswap
)
45865 || max
- min
>= (GET_MODE_SIZE (d
->vmode
) == 32 ? nelt
/ 2 : nelt
))
45867 if (d
->one_operand_p
45869 || maxswap
- minswap
>= (GET_MODE_SIZE (d
->vmode
) == 32
45870 ? nelt
/ 2 : nelt
))
45877 /* Given that we have SSSE3, we know we'll be able to implement the
45878 single operand permutation after the palignr with pshufb for
45879 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
45881 if (d
->testing_p
&& GET_MODE_SIZE (d
->vmode
) == 16 && !single_insn_only_p
)
45887 dcopy
.op0
= d
->op1
;
45888 dcopy
.op1
= d
->op0
;
45889 for (i
= 0; i
< nelt
; ++i
)
45890 dcopy
.perm
[i
] ^= nelt
;
45894 for (i
= 0; i
< nelt
; ++i
)
45896 unsigned e
= dcopy
.perm
[i
];
45897 if (GET_MODE_SIZE (d
->vmode
) == 32
45899 && (e
& (nelt
/ 2 - 1)) < min
)
45900 e
= e
- min
- (nelt
/ 2);
45907 dcopy
.one_operand_p
= true;
45909 if (single_insn_only_p
&& !in_order
)
45912 /* For AVX2, test whether we can permute the result in one instruction. */
45917 dcopy
.op1
= dcopy
.op0
;
45918 return expand_vec_perm_1 (&dcopy
);
45921 shift
= GEN_INT (min
* GET_MODE_UNIT_BITSIZE (d
->vmode
));
45922 if (GET_MODE_SIZE (d
->vmode
) == 16)
45924 target
= gen_reg_rtx (TImode
);
45925 emit_insn (gen_ssse3_palignrti (target
, gen_lowpart (TImode
, dcopy
.op1
),
45926 gen_lowpart (TImode
, dcopy
.op0
), shift
));
45930 target
= gen_reg_rtx (V2TImode
);
45931 emit_insn (gen_avx2_palignrv2ti (target
,
45932 gen_lowpart (V2TImode
, dcopy
.op1
),
45933 gen_lowpart (V2TImode
, dcopy
.op0
),
45937 dcopy
.op0
= dcopy
.op1
= gen_lowpart (d
->vmode
, target
);
45939 /* Test for the degenerate case where the alignment by itself
45940 produces the desired permutation. */
45943 emit_move_insn (d
->target
, dcopy
.op0
);
45947 ok
= expand_vec_perm_1 (&dcopy
);
45948 gcc_assert (ok
|| GET_MODE_SIZE (d
->vmode
) == 32);
45953 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
45954 the permutation using the SSE4_1 pblendv instruction. Potentially
45955 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
45958 expand_vec_perm_pblendv (struct expand_vec_perm_d
*d
)
45960 unsigned i
, which
, nelt
= d
->nelt
;
45961 struct expand_vec_perm_d dcopy
, dcopy1
;
45962 machine_mode vmode
= d
->vmode
;
45965 /* Use the same checks as in expand_vec_perm_blend. */
45966 if (d
->one_operand_p
)
45968 if (TARGET_AVX2
&& GET_MODE_SIZE (vmode
) == 32)
45970 else if (TARGET_AVX
&& (vmode
== V4DFmode
|| vmode
== V8SFmode
))
45972 else if (TARGET_SSE4_1
&& GET_MODE_SIZE (vmode
) == 16)
45977 /* Figure out where permutation elements stay not in their
45978 respective lanes. */
45979 for (i
= 0, which
= 0; i
< nelt
; ++i
)
45981 unsigned e
= d
->perm
[i
];
45983 which
|= (e
< nelt
? 1 : 2);
45985 /* We can pblend the part where elements stay not in their
45986 respective lanes only when these elements are all in one
45987 half of a permutation.
45988 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
45989 lanes, but both 8 and 9 >= 8
45990 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
45991 respective lanes and 8 >= 8, but 2 not. */
45992 if (which
!= 1 && which
!= 2)
45994 if (d
->testing_p
&& GET_MODE_SIZE (vmode
) == 16)
45997 /* First we apply one operand permutation to the part where
45998 elements stay not in their respective lanes. */
46001 dcopy
.op0
= dcopy
.op1
= d
->op1
;
46003 dcopy
.op0
= dcopy
.op1
= d
->op0
;
46005 dcopy
.target
= gen_reg_rtx (vmode
);
46006 dcopy
.one_operand_p
= true;
46008 for (i
= 0; i
< nelt
; ++i
)
46009 dcopy
.perm
[i
] = d
->perm
[i
] & (nelt
- 1);
46011 ok
= expand_vec_perm_1 (&dcopy
);
46012 if (GET_MODE_SIZE (vmode
) != 16 && !ok
)
46019 /* Next we put permuted elements into their positions. */
46022 dcopy1
.op1
= dcopy
.target
;
46024 dcopy1
.op0
= dcopy
.target
;
46026 for (i
= 0; i
< nelt
; ++i
)
46027 dcopy1
.perm
[i
] = ((d
->perm
[i
] >= nelt
) ? (nelt
+ i
) : i
);
46029 ok
= expand_vec_perm_blend (&dcopy1
);
46035 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
);
46037 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
46038 a two vector permutation into a single vector permutation by using
46039 an interleave operation to merge the vectors. */
46042 expand_vec_perm_interleave2 (struct expand_vec_perm_d
*d
)
46044 struct expand_vec_perm_d dremap
, dfinal
;
46045 unsigned i
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
46046 unsigned HOST_WIDE_INT contents
;
46047 unsigned char remap
[2 * MAX_VECT_LEN
];
46049 bool ok
, same_halves
= false;
46051 if (GET_MODE_SIZE (d
->vmode
) == 16)
46053 if (d
->one_operand_p
)
46056 else if (GET_MODE_SIZE (d
->vmode
) == 32)
46060 /* For 32-byte modes allow even d->one_operand_p.
46061 The lack of cross-lane shuffling in some instructions
46062 might prevent a single insn shuffle. */
46064 dfinal
.testing_p
= true;
46065 /* If expand_vec_perm_interleave3 can expand this into
46066 a 3 insn sequence, give up and let it be expanded as
46067 3 insn sequence. While that is one insn longer,
46068 it doesn't need a memory operand and in the common
46069 case that both interleave low and high permutations
46070 with the same operands are adjacent needs 4 insns
46071 for both after CSE. */
46072 if (expand_vec_perm_interleave3 (&dfinal
))
46078 /* Examine from whence the elements come. */
46080 for (i
= 0; i
< nelt
; ++i
)
46081 contents
|= HOST_WIDE_INT_1U
<< d
->perm
[i
];
46083 memset (remap
, 0xff, sizeof (remap
));
46086 if (GET_MODE_SIZE (d
->vmode
) == 16)
46088 unsigned HOST_WIDE_INT h1
, h2
, h3
, h4
;
46090 /* Split the two input vectors into 4 halves. */
46091 h1
= (HOST_WIDE_INT_1U
<< nelt2
) - 1;
46096 /* If the elements from the low halves use interleave low, and similarly
46097 for interleave high. If the elements are from mis-matched halves, we
46098 can use shufps for V4SF/V4SI or do a DImode shuffle. */
46099 if ((contents
& (h1
| h3
)) == contents
)
46102 for (i
= 0; i
< nelt2
; ++i
)
46105 remap
[i
+ nelt
] = i
* 2 + 1;
46106 dremap
.perm
[i
* 2] = i
;
46107 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
46109 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
46110 dremap
.vmode
= V4SFmode
;
46112 else if ((contents
& (h2
| h4
)) == contents
)
46115 for (i
= 0; i
< nelt2
; ++i
)
46117 remap
[i
+ nelt2
] = i
* 2;
46118 remap
[i
+ nelt
+ nelt2
] = i
* 2 + 1;
46119 dremap
.perm
[i
* 2] = i
+ nelt2
;
46120 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt2
;
46122 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
46123 dremap
.vmode
= V4SFmode
;
46125 else if ((contents
& (h1
| h4
)) == contents
)
46128 for (i
= 0; i
< nelt2
; ++i
)
46131 remap
[i
+ nelt
+ nelt2
] = i
+ nelt2
;
46132 dremap
.perm
[i
] = i
;
46133 dremap
.perm
[i
+ nelt2
] = i
+ nelt
+ nelt2
;
46138 dremap
.vmode
= V2DImode
;
46140 dremap
.perm
[0] = 0;
46141 dremap
.perm
[1] = 3;
46144 else if ((contents
& (h2
| h3
)) == contents
)
46147 for (i
= 0; i
< nelt2
; ++i
)
46149 remap
[i
+ nelt2
] = i
;
46150 remap
[i
+ nelt
] = i
+ nelt2
;
46151 dremap
.perm
[i
] = i
+ nelt2
;
46152 dremap
.perm
[i
+ nelt2
] = i
+ nelt
;
46157 dremap
.vmode
= V2DImode
;
46159 dremap
.perm
[0] = 1;
46160 dremap
.perm
[1] = 2;
46168 unsigned int nelt4
= nelt
/ 4, nzcnt
= 0;
46169 unsigned HOST_WIDE_INT q
[8];
46170 unsigned int nonzero_halves
[4];
46172 /* Split the two input vectors into 8 quarters. */
46173 q
[0] = (HOST_WIDE_INT_1U
<< nelt4
) - 1;
46174 for (i
= 1; i
< 8; ++i
)
46175 q
[i
] = q
[0] << (nelt4
* i
);
46176 for (i
= 0; i
< 4; ++i
)
46177 if (((q
[2 * i
] | q
[2 * i
+ 1]) & contents
) != 0)
46179 nonzero_halves
[nzcnt
] = i
;
46185 gcc_assert (d
->one_operand_p
);
46186 nonzero_halves
[1] = nonzero_halves
[0];
46187 same_halves
= true;
46189 else if (d
->one_operand_p
)
46191 gcc_assert (nonzero_halves
[0] == 0);
46192 gcc_assert (nonzero_halves
[1] == 1);
46197 if (d
->perm
[0] / nelt2
== nonzero_halves
[1])
46199 /* Attempt to increase the likelihood that dfinal
46200 shuffle will be intra-lane. */
46201 std::swap (nonzero_halves
[0], nonzero_halves
[1]);
46204 /* vperm2f128 or vperm2i128. */
46205 for (i
= 0; i
< nelt2
; ++i
)
46207 remap
[i
+ nonzero_halves
[1] * nelt2
] = i
+ nelt2
;
46208 remap
[i
+ nonzero_halves
[0] * nelt2
] = i
;
46209 dremap
.perm
[i
+ nelt2
] = i
+ nonzero_halves
[1] * nelt2
;
46210 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * nelt2
;
46213 if (d
->vmode
!= V8SFmode
46214 && d
->vmode
!= V4DFmode
46215 && d
->vmode
!= V8SImode
)
46217 dremap
.vmode
= V8SImode
;
46219 for (i
= 0; i
< 4; ++i
)
46221 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * 4;
46222 dremap
.perm
[i
+ 4] = i
+ nonzero_halves
[1] * 4;
46226 else if (d
->one_operand_p
)
46228 else if (TARGET_AVX2
46229 && (contents
& (q
[0] | q
[2] | q
[4] | q
[6])) == contents
)
46232 for (i
= 0; i
< nelt4
; ++i
)
46235 remap
[i
+ nelt
] = i
* 2 + 1;
46236 remap
[i
+ nelt2
] = i
* 2 + nelt2
;
46237 remap
[i
+ nelt
+ nelt2
] = i
* 2 + nelt2
+ 1;
46238 dremap
.perm
[i
* 2] = i
;
46239 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
46240 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
;
46241 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
;
46244 else if (TARGET_AVX2
46245 && (contents
& (q
[1] | q
[3] | q
[5] | q
[7])) == contents
)
46248 for (i
= 0; i
< nelt4
; ++i
)
46250 remap
[i
+ nelt4
] = i
* 2;
46251 remap
[i
+ nelt
+ nelt4
] = i
* 2 + 1;
46252 remap
[i
+ nelt2
+ nelt4
] = i
* 2 + nelt2
;
46253 remap
[i
+ nelt
+ nelt2
+ nelt4
] = i
* 2 + nelt2
+ 1;
46254 dremap
.perm
[i
* 2] = i
+ nelt4
;
46255 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt4
;
46256 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
+ nelt4
;
46257 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
+ nelt4
;
46264 /* Use the remapping array set up above to move the elements from their
46265 swizzled locations into their final destinations. */
46267 for (i
= 0; i
< nelt
; ++i
)
46269 unsigned e
= remap
[d
->perm
[i
]];
46270 gcc_assert (e
< nelt
);
46271 /* If same_halves is true, both halves of the remapped vector are the
46272 same. Avoid cross-lane accesses if possible. */
46273 if (same_halves
&& i
>= nelt2
)
46275 gcc_assert (e
< nelt2
);
46276 dfinal
.perm
[i
] = e
+ nelt2
;
46279 dfinal
.perm
[i
] = e
;
46283 dremap
.target
= gen_reg_rtx (dremap
.vmode
);
46284 dfinal
.op0
= gen_lowpart (dfinal
.vmode
, dremap
.target
);
46286 dfinal
.op1
= dfinal
.op0
;
46287 dfinal
.one_operand_p
= true;
46289 /* Test if the final remap can be done with a single insn. For V4SFmode or
46290 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
46292 ok
= expand_vec_perm_1 (&dfinal
);
46293 seq
= get_insns ();
46302 if (dremap
.vmode
!= dfinal
.vmode
)
46304 dremap
.op0
= gen_lowpart (dremap
.vmode
, dremap
.op0
);
46305 dremap
.op1
= gen_lowpart (dremap
.vmode
, dremap
.op1
);
46308 ok
= expand_vec_perm_1 (&dremap
);
46315 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
46316 a single vector cross-lane permutation into vpermq followed
46317 by any of the single insn permutations. */
46320 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d
*d
)
46322 struct expand_vec_perm_d dremap
, dfinal
;
46323 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, nelt4
= nelt
/ 4;
46324 unsigned contents
[2];
46328 && (d
->vmode
== V32QImode
|| d
->vmode
== V16HImode
)
46329 && d
->one_operand_p
))
46334 for (i
= 0; i
< nelt2
; ++i
)
46336 contents
[0] |= 1u << (d
->perm
[i
] / nelt4
);
46337 contents
[1] |= 1u << (d
->perm
[i
+ nelt2
] / nelt4
);
46340 for (i
= 0; i
< 2; ++i
)
46342 unsigned int cnt
= 0;
46343 for (j
= 0; j
< 4; ++j
)
46344 if ((contents
[i
] & (1u << j
)) != 0 && ++cnt
> 2)
46352 dremap
.vmode
= V4DImode
;
46354 dremap
.target
= gen_reg_rtx (V4DImode
);
46355 dremap
.op0
= gen_lowpart (V4DImode
, d
->op0
);
46356 dremap
.op1
= dremap
.op0
;
46357 dremap
.one_operand_p
= true;
46358 for (i
= 0; i
< 2; ++i
)
46360 unsigned int cnt
= 0;
46361 for (j
= 0; j
< 4; ++j
)
46362 if ((contents
[i
] & (1u << j
)) != 0)
46363 dremap
.perm
[2 * i
+ cnt
++] = j
;
46364 for (; cnt
< 2; ++cnt
)
46365 dremap
.perm
[2 * i
+ cnt
] = 0;
46369 dfinal
.op0
= gen_lowpart (dfinal
.vmode
, dremap
.target
);
46370 dfinal
.op1
= dfinal
.op0
;
46371 dfinal
.one_operand_p
= true;
46372 for (i
= 0, j
= 0; i
< nelt
; ++i
)
46376 dfinal
.perm
[i
] = (d
->perm
[i
] & (nelt4
- 1)) | (j
? nelt2
: 0);
46377 if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
])
46379 else if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
+ 1])
46380 dfinal
.perm
[i
] |= nelt4
;
46382 gcc_unreachable ();
46385 ok
= expand_vec_perm_1 (&dremap
);
46388 ok
= expand_vec_perm_1 (&dfinal
);
46394 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
46395 a vector permutation using two instructions, vperm2f128 resp.
46396 vperm2i128 followed by any single in-lane permutation. */
46399 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d
*d
)
46401 struct expand_vec_perm_d dfirst
, dsecond
;
46402 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, perm
;
46406 || GET_MODE_SIZE (d
->vmode
) != 32
46407 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
&& !TARGET_AVX2
))
46411 dsecond
.one_operand_p
= false;
46412 dsecond
.testing_p
= true;
46414 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
46415 immediate. For perm < 16 the second permutation uses
46416 d->op0 as first operand, for perm >= 16 it uses d->op1
46417 as first operand. The second operand is the result of
46419 for (perm
= 0; perm
< 32; perm
++)
46421 /* Ignore permutations which do not move anything cross-lane. */
46424 /* The second shuffle for e.g. V4DFmode has
46425 0123 and ABCD operands.
46426 Ignore AB23, as 23 is already in the second lane
46427 of the first operand. */
46428 if ((perm
& 0xc) == (1 << 2)) continue;
46429 /* And 01CD, as 01 is in the first lane of the first
46431 if ((perm
& 3) == 0) continue;
46432 /* And 4567, as then the vperm2[fi]128 doesn't change
46433 anything on the original 4567 second operand. */
46434 if ((perm
& 0xf) == ((3 << 2) | 2)) continue;
46438 /* The second shuffle for e.g. V4DFmode has
46439 4567 and ABCD operands.
46440 Ignore AB67, as 67 is already in the second lane
46441 of the first operand. */
46442 if ((perm
& 0xc) == (3 << 2)) continue;
46443 /* And 45CD, as 45 is in the first lane of the first
46445 if ((perm
& 3) == 2) continue;
46446 /* And 0123, as then the vperm2[fi]128 doesn't change
46447 anything on the original 0123 first operand. */
46448 if ((perm
& 0xf) == (1 << 2)) continue;
46451 for (i
= 0; i
< nelt
; i
++)
46453 j
= d
->perm
[i
] / nelt2
;
46454 if (j
== ((perm
>> (2 * (i
>= nelt2
))) & 3))
46455 dsecond
.perm
[i
] = nelt
+ (i
& nelt2
) + (d
->perm
[i
] & (nelt2
- 1));
46456 else if (j
== (unsigned) (i
>= nelt2
) + 2 * (perm
>= 16))
46457 dsecond
.perm
[i
] = d
->perm
[i
] & (nelt
- 1);
46465 ok
= expand_vec_perm_1 (&dsecond
);
46476 /* Found a usable second shuffle. dfirst will be
46477 vperm2f128 on d->op0 and d->op1. */
46478 dsecond
.testing_p
= false;
46480 dfirst
.target
= gen_reg_rtx (d
->vmode
);
46481 for (i
= 0; i
< nelt
; i
++)
46482 dfirst
.perm
[i
] = (i
& (nelt2
- 1))
46483 + ((perm
>> (2 * (i
>= nelt2
))) & 3) * nelt2
;
46485 canonicalize_perm (&dfirst
);
46486 ok
= expand_vec_perm_1 (&dfirst
);
46489 /* And dsecond is some single insn shuffle, taking
46490 d->op0 and result of vperm2f128 (if perm < 16) or
46491 d->op1 and result of vperm2f128 (otherwise). */
46493 dsecond
.op0
= dsecond
.op1
;
46494 dsecond
.op1
= dfirst
.target
;
46496 ok
= expand_vec_perm_1 (&dsecond
);
46502 /* For one operand, the only useful vperm2f128 permutation is 0x01
46504 if (d
->one_operand_p
)
46511 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
46512 a two vector permutation using 2 intra-lane interleave insns
46513 and cross-lane shuffle for 32-byte vectors. */
46516 expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
)
46519 rtx (*gen
) (rtx
, rtx
, rtx
);
46521 if (d
->one_operand_p
)
46523 if (TARGET_AVX2
&& GET_MODE_SIZE (d
->vmode
) == 32)
46525 else if (TARGET_AVX
&& (d
->vmode
== V8SFmode
|| d
->vmode
== V4DFmode
))
46531 if (d
->perm
[0] != 0 && d
->perm
[0] != nelt
/ 2)
46533 for (i
= 0; i
< nelt
; i
+= 2)
46534 if (d
->perm
[i
] != d
->perm
[0] + i
/ 2
46535 || d
->perm
[i
+ 1] != d
->perm
[0] + i
/ 2 + nelt
)
46545 gen
= gen_vec_interleave_highv32qi
;
46547 gen
= gen_vec_interleave_lowv32qi
;
46551 gen
= gen_vec_interleave_highv16hi
;
46553 gen
= gen_vec_interleave_lowv16hi
;
46557 gen
= gen_vec_interleave_highv8si
;
46559 gen
= gen_vec_interleave_lowv8si
;
46563 gen
= gen_vec_interleave_highv4di
;
46565 gen
= gen_vec_interleave_lowv4di
;
46569 gen
= gen_vec_interleave_highv8sf
;
46571 gen
= gen_vec_interleave_lowv8sf
;
46575 gen
= gen_vec_interleave_highv4df
;
46577 gen
= gen_vec_interleave_lowv4df
;
46580 gcc_unreachable ();
46583 emit_insn (gen (d
->target
, d
->op0
, d
->op1
));
46587 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
46588 a single vector permutation using a single intra-lane vector
46589 permutation, vperm2f128 swapping the lanes and vblend* insn blending
46590 the non-swapped and swapped vectors together. */
46593 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d
*d
)
46595 struct expand_vec_perm_d dfirst
, dsecond
;
46596 unsigned i
, j
, msk
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
46599 rtx (*blend
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
46603 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
)
46604 || !d
->one_operand_p
)
46608 for (i
= 0; i
< nelt
; i
++)
46609 dfirst
.perm
[i
] = 0xff;
46610 for (i
= 0, msk
= 0; i
< nelt
; i
++)
46612 j
= (d
->perm
[i
] & nelt2
) ? i
| nelt2
: i
& ~nelt2
;
46613 if (dfirst
.perm
[j
] != 0xff && dfirst
.perm
[j
] != d
->perm
[i
])
46615 dfirst
.perm
[j
] = d
->perm
[i
];
46619 for (i
= 0; i
< nelt
; i
++)
46620 if (dfirst
.perm
[i
] == 0xff)
46621 dfirst
.perm
[i
] = i
;
46624 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
46627 ok
= expand_vec_perm_1 (&dfirst
);
46628 seq
= get_insns ();
46640 dsecond
.op0
= dfirst
.target
;
46641 dsecond
.op1
= dfirst
.target
;
46642 dsecond
.one_operand_p
= true;
46643 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
46644 for (i
= 0; i
< nelt
; i
++)
46645 dsecond
.perm
[i
] = i
^ nelt2
;
46647 ok
= expand_vec_perm_1 (&dsecond
);
46650 blend
= d
->vmode
== V8SFmode
? gen_avx_blendps256
: gen_avx_blendpd256
;
46651 emit_insn (blend (d
->target
, dfirst
.target
, dsecond
.target
, GEN_INT (msk
)));
46655 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
46656 permutation using two vperm2f128, followed by a vshufpd insn blending
46657 the two vectors together. */
46660 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d
*d
)
46662 struct expand_vec_perm_d dfirst
, dsecond
, dthird
;
46665 if (!TARGET_AVX
|| (d
->vmode
!= V4DFmode
))
46675 dfirst
.perm
[0] = (d
->perm
[0] & ~1);
46676 dfirst
.perm
[1] = (d
->perm
[0] & ~1) + 1;
46677 dfirst
.perm
[2] = (d
->perm
[2] & ~1);
46678 dfirst
.perm
[3] = (d
->perm
[2] & ~1) + 1;
46679 dsecond
.perm
[0] = (d
->perm
[1] & ~1);
46680 dsecond
.perm
[1] = (d
->perm
[1] & ~1) + 1;
46681 dsecond
.perm
[2] = (d
->perm
[3] & ~1);
46682 dsecond
.perm
[3] = (d
->perm
[3] & ~1) + 1;
46683 dthird
.perm
[0] = (d
->perm
[0] % 2);
46684 dthird
.perm
[1] = (d
->perm
[1] % 2) + 4;
46685 dthird
.perm
[2] = (d
->perm
[2] % 2) + 2;
46686 dthird
.perm
[3] = (d
->perm
[3] % 2) + 6;
46688 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
46689 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
46690 dthird
.op0
= dfirst
.target
;
46691 dthird
.op1
= dsecond
.target
;
46692 dthird
.one_operand_p
= false;
46694 canonicalize_perm (&dfirst
);
46695 canonicalize_perm (&dsecond
);
46697 ok
= expand_vec_perm_1 (&dfirst
)
46698 && expand_vec_perm_1 (&dsecond
)
46699 && expand_vec_perm_1 (&dthird
);
46706 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
46707 permutation with two pshufb insns and an ior. We should have already
46708 failed all two instruction sequences. */
46711 expand_vec_perm_pshufb2 (struct expand_vec_perm_d
*d
)
46713 rtx rperm
[2][16], vperm
, l
, h
, op
, m128
;
46714 unsigned int i
, nelt
, eltsz
;
46716 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
46718 gcc_assert (!d
->one_operand_p
);
46724 eltsz
= GET_MODE_UNIT_SIZE (d
->vmode
);
46726 /* Generate two permutation masks. If the required element is within
46727 the given vector it is shuffled into the proper lane. If the required
46728 element is in the other vector, force a zero into the lane by setting
46729 bit 7 in the permutation mask. */
46730 m128
= GEN_INT (-128);
46731 for (i
= 0; i
< nelt
; ++i
)
46733 unsigned j
, e
= d
->perm
[i
];
46734 unsigned which
= (e
>= nelt
);
46738 for (j
= 0; j
< eltsz
; ++j
)
46740 rperm
[which
][i
*eltsz
+ j
] = GEN_INT (e
*eltsz
+ j
);
46741 rperm
[1-which
][i
*eltsz
+ j
] = m128
;
46745 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[0]));
46746 vperm
= force_reg (V16QImode
, vperm
);
46748 l
= gen_reg_rtx (V16QImode
);
46749 op
= gen_lowpart (V16QImode
, d
->op0
);
46750 emit_insn (gen_ssse3_pshufbv16qi3 (l
, op
, vperm
));
46752 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[1]));
46753 vperm
= force_reg (V16QImode
, vperm
);
46755 h
= gen_reg_rtx (V16QImode
);
46756 op
= gen_lowpart (V16QImode
, d
->op1
);
46757 emit_insn (gen_ssse3_pshufbv16qi3 (h
, op
, vperm
));
46760 if (d
->vmode
!= V16QImode
)
46761 op
= gen_reg_rtx (V16QImode
);
46762 emit_insn (gen_iorv16qi3 (op
, l
, h
));
46763 if (op
!= d
->target
)
46764 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, op
));
46769 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
46770 with two vpshufb insns, vpermq and vpor. We should have already failed
46771 all two or three instruction sequences. */
46774 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d
*d
)
46776 rtx rperm
[2][32], vperm
, l
, h
, hp
, op
, m128
;
46777 unsigned int i
, nelt
, eltsz
;
46780 || !d
->one_operand_p
46781 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
46788 eltsz
= GET_MODE_UNIT_SIZE (d
->vmode
);
46790 /* Generate two permutation masks. If the required element is within
46791 the same lane, it is shuffled in. If the required element from the
46792 other lane, force a zero by setting bit 7 in the permutation mask.
46793 In the other mask the mask has non-negative elements if element
46794 is requested from the other lane, but also moved to the other lane,
46795 so that the result of vpshufb can have the two V2TImode halves
46797 m128
= GEN_INT (-128);
46798 for (i
= 0; i
< nelt
; ++i
)
46800 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
46801 unsigned which
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
46803 for (j
= 0; j
< eltsz
; ++j
)
46805 rperm
[!!which
][(i
* eltsz
+ j
) ^ which
] = GEN_INT (e
* eltsz
+ j
);
46806 rperm
[!which
][(i
* eltsz
+ j
) ^ (which
^ 16)] = m128
;
46810 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
46811 vperm
= force_reg (V32QImode
, vperm
);
46813 h
= gen_reg_rtx (V32QImode
);
46814 op
= gen_lowpart (V32QImode
, d
->op0
);
46815 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
46817 /* Swap the 128-byte lanes of h into hp. */
46818 hp
= gen_reg_rtx (V4DImode
);
46819 op
= gen_lowpart (V4DImode
, h
);
46820 emit_insn (gen_avx2_permv4di_1 (hp
, op
, const2_rtx
, GEN_INT (3), const0_rtx
,
46823 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
46824 vperm
= force_reg (V32QImode
, vperm
);
46826 l
= gen_reg_rtx (V32QImode
);
46827 op
= gen_lowpart (V32QImode
, d
->op0
);
46828 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
46831 if (d
->vmode
!= V32QImode
)
46832 op
= gen_reg_rtx (V32QImode
);
46833 emit_insn (gen_iorv32qi3 (op
, l
, gen_lowpart (V32QImode
, hp
)));
46834 if (op
!= d
->target
)
46835 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, op
));
46840 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
46841 and extract-odd permutations of two V32QImode and V16QImode operand
46842 with two vpshufb insns, vpor and vpermq. We should have already
46843 failed all two or three instruction sequences. */
46846 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d
*d
)
46848 rtx rperm
[2][32], vperm
, l
, h
, ior
, op
, m128
;
46849 unsigned int i
, nelt
, eltsz
;
46852 || d
->one_operand_p
46853 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
46856 for (i
= 0; i
< d
->nelt
; ++i
)
46857 if ((d
->perm
[i
] ^ (i
* 2)) & (3 * d
->nelt
/ 2))
46864 eltsz
= GET_MODE_UNIT_SIZE (d
->vmode
);
46866 /* Generate two permutation masks. In the first permutation mask
46867 the first quarter will contain indexes for the first half
46868 of the op0, the second quarter will contain bit 7 set, third quarter
46869 will contain indexes for the second half of the op0 and the
46870 last quarter bit 7 set. In the second permutation mask
46871 the first quarter will contain bit 7 set, the second quarter
46872 indexes for the first half of the op1, the third quarter bit 7 set
46873 and last quarter indexes for the second half of the op1.
46874 I.e. the first mask e.g. for V32QImode extract even will be:
46875 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
46876 (all values masked with 0xf except for -128) and second mask
46877 for extract even will be
46878 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
46879 m128
= GEN_INT (-128);
46880 for (i
= 0; i
< nelt
; ++i
)
46882 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
46883 unsigned which
= d
->perm
[i
] >= nelt
;
46884 unsigned xorv
= (i
>= nelt
/ 4 && i
< 3 * nelt
/ 4) ? 24 : 0;
46886 for (j
= 0; j
< eltsz
; ++j
)
46888 rperm
[which
][(i
* eltsz
+ j
) ^ xorv
] = GEN_INT (e
* eltsz
+ j
);
46889 rperm
[1 - which
][(i
* eltsz
+ j
) ^ xorv
] = m128
;
46893 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
46894 vperm
= force_reg (V32QImode
, vperm
);
46896 l
= gen_reg_rtx (V32QImode
);
46897 op
= gen_lowpart (V32QImode
, d
->op0
);
46898 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
46900 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
46901 vperm
= force_reg (V32QImode
, vperm
);
46903 h
= gen_reg_rtx (V32QImode
);
46904 op
= gen_lowpart (V32QImode
, d
->op1
);
46905 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
46907 ior
= gen_reg_rtx (V32QImode
);
46908 emit_insn (gen_iorv32qi3 (ior
, l
, h
));
46910 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
46911 op
= gen_reg_rtx (V4DImode
);
46912 ior
= gen_lowpart (V4DImode
, ior
);
46913 emit_insn (gen_avx2_permv4di_1 (op
, ior
, const0_rtx
, const2_rtx
,
46914 const1_rtx
, GEN_INT (3)));
46915 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, op
));
46920 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
46921 and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
46922 with two "and" and "pack" or two "shift" and "pack" insns. We should
46923 have already failed all two instruction sequences. */
46926 expand_vec_perm_even_odd_pack (struct expand_vec_perm_d
*d
)
46928 rtx op
, dop0
, dop1
, t
;
46929 unsigned i
, odd
, c
, s
, nelt
= d
->nelt
;
46930 bool end_perm
= false;
46931 machine_mode half_mode
;
46932 rtx (*gen_and
) (rtx
, rtx
, rtx
);
46933 rtx (*gen_pack
) (rtx
, rtx
, rtx
);
46934 rtx (*gen_shift
) (rtx
, rtx
, rtx
);
46936 if (d
->one_operand_p
)
46942 /* Required for "pack". */
46943 if (!TARGET_SSE4_1
)
46947 half_mode
= V4SImode
;
46948 gen_and
= gen_andv4si3
;
46949 gen_pack
= gen_sse4_1_packusdw
;
46950 gen_shift
= gen_lshrv4si3
;
46953 /* No check as all instructions are SSE2. */
46956 half_mode
= V8HImode
;
46957 gen_and
= gen_andv8hi3
;
46958 gen_pack
= gen_sse2_packuswb
;
46959 gen_shift
= gen_lshrv8hi3
;
46966 half_mode
= V8SImode
;
46967 gen_and
= gen_andv8si3
;
46968 gen_pack
= gen_avx2_packusdw
;
46969 gen_shift
= gen_lshrv8si3
;
46977 half_mode
= V16HImode
;
46978 gen_and
= gen_andv16hi3
;
46979 gen_pack
= gen_avx2_packuswb
;
46980 gen_shift
= gen_lshrv16hi3
;
46984 /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
46985 general shuffles. */
46989 /* Check that permutation is even or odd. */
46994 for (i
= 1; i
< nelt
; ++i
)
46995 if (d
->perm
[i
] != 2 * i
+ odd
)
47001 dop0
= gen_reg_rtx (half_mode
);
47002 dop1
= gen_reg_rtx (half_mode
);
47005 t
= gen_const_vec_duplicate (half_mode
, GEN_INT (c
));
47006 t
= force_reg (half_mode
, t
);
47007 emit_insn (gen_and (dop0
, t
, gen_lowpart (half_mode
, d
->op0
)));
47008 emit_insn (gen_and (dop1
, t
, gen_lowpart (half_mode
, d
->op1
)));
47012 emit_insn (gen_shift (dop0
,
47013 gen_lowpart (half_mode
, d
->op0
),
47015 emit_insn (gen_shift (dop1
,
47016 gen_lowpart (half_mode
, d
->op1
),
47019 /* In AVX2 for 256 bit case we need to permute pack result. */
47020 if (TARGET_AVX2
&& end_perm
)
47022 op
= gen_reg_rtx (d
->vmode
);
47023 t
= gen_reg_rtx (V4DImode
);
47024 emit_insn (gen_pack (op
, dop0
, dop1
));
47025 emit_insn (gen_avx2_permv4di_1 (t
,
47026 gen_lowpart (V4DImode
, op
),
47031 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, t
));
47034 emit_insn (gen_pack (d
->target
, dop0
, dop1
));
47039 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
47040 and extract-odd permutations of two V64QI operands
47041 with two "shifts", two "truncs" and one "concat" insns for "odd"
47042 and two "truncs" and one concat insn for "even."
47043 Have already failed all two instruction sequences. */
47046 expand_vec_perm_even_odd_trunc (struct expand_vec_perm_d
*d
)
47048 rtx t1
, t2
, t3
, t4
;
47049 unsigned i
, odd
, nelt
= d
->nelt
;
47051 if (!TARGET_AVX512BW
47052 || d
->one_operand_p
47053 || d
->vmode
!= V64QImode
)
47056 /* Check that permutation is even or odd. */
47061 for (i
= 1; i
< nelt
; ++i
)
47062 if (d
->perm
[i
] != 2 * i
+ odd
)
47071 t1
= gen_reg_rtx (V32HImode
);
47072 t2
= gen_reg_rtx (V32HImode
);
47073 emit_insn (gen_lshrv32hi3 (t1
,
47074 gen_lowpart (V32HImode
, d
->op0
),
47076 emit_insn (gen_lshrv32hi3 (t2
,
47077 gen_lowpart (V32HImode
, d
->op1
),
47082 t1
= gen_lowpart (V32HImode
, d
->op0
);
47083 t2
= gen_lowpart (V32HImode
, d
->op1
);
47086 t3
= gen_reg_rtx (V32QImode
);
47087 t4
= gen_reg_rtx (V32QImode
);
47088 emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t3
, t1
));
47089 emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t4
, t2
));
47090 emit_insn (gen_avx_vec_concatv64qi (d
->target
, t3
, t4
));
47095 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
47096 and extract-odd permutations. */
47099 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d
*d
, unsigned odd
)
47101 rtx t1
, t2
, t3
, t4
, t5
;
47108 t1
= gen_reg_rtx (V4DFmode
);
47109 t2
= gen_reg_rtx (V4DFmode
);
47111 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
47112 emit_insn (gen_avx_vperm2f128v4df3 (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
47113 emit_insn (gen_avx_vperm2f128v4df3 (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
47115 /* Now an unpck[lh]pd will produce the result required. */
47117 t3
= gen_avx_unpckhpd256 (d
->target
, t1
, t2
);
47119 t3
= gen_avx_unpcklpd256 (d
->target
, t1
, t2
);
47125 int mask
= odd
? 0xdd : 0x88;
47129 t1
= gen_reg_rtx (V8SFmode
);
47130 t2
= gen_reg_rtx (V8SFmode
);
47131 t3
= gen_reg_rtx (V8SFmode
);
47133 /* Shuffle within the 128-bit lanes to produce:
47134 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
47135 emit_insn (gen_avx_shufps256 (t1
, d
->op0
, d
->op1
,
47138 /* Shuffle the lanes around to produce:
47139 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
47140 emit_insn (gen_avx_vperm2f128v8sf3 (t2
, t1
, t1
,
47143 /* Shuffle within the 128-bit lanes to produce:
47144 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
47145 emit_insn (gen_avx_shufps256 (t3
, t1
, t2
, GEN_INT (0x44)));
47147 /* Shuffle within the 128-bit lanes to produce:
47148 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
47149 emit_insn (gen_avx_shufps256 (t2
, t1
, t2
, GEN_INT (0xee)));
47151 /* Shuffle the lanes around to produce:
47152 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
47153 emit_insn (gen_avx_vperm2f128v8sf3 (d
->target
, t3
, t2
,
47162 /* These are always directly implementable by expand_vec_perm_1. */
47163 gcc_unreachable ();
47167 return expand_vec_perm_even_odd_pack (d
);
47168 else if (TARGET_SSSE3
&& !TARGET_SLOW_PSHUFB
)
47169 return expand_vec_perm_pshufb2 (d
);
47174 /* We need 2*log2(N)-1 operations to achieve odd/even
47175 with interleave. */
47176 t1
= gen_reg_rtx (V8HImode
);
47177 t2
= gen_reg_rtx (V8HImode
);
47178 emit_insn (gen_vec_interleave_highv8hi (t1
, d
->op0
, d
->op1
));
47179 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->op0
, d
->op1
));
47180 emit_insn (gen_vec_interleave_highv8hi (t2
, d
->target
, t1
));
47181 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t1
));
47183 t3
= gen_vec_interleave_highv8hi (d
->target
, d
->target
, t2
);
47185 t3
= gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t2
);
47191 return expand_vec_perm_even_odd_pack (d
);
47195 return expand_vec_perm_even_odd_pack (d
);
47198 return expand_vec_perm_even_odd_trunc (d
);
47203 struct expand_vec_perm_d d_copy
= *d
;
47204 d_copy
.vmode
= V4DFmode
;
47206 d_copy
.target
= gen_raw_REG (V4DFmode
, LAST_VIRTUAL_REGISTER
+ 1);
47208 d_copy
.target
= gen_reg_rtx (V4DFmode
);
47209 d_copy
.op0
= gen_lowpart (V4DFmode
, d
->op0
);
47210 d_copy
.op1
= gen_lowpart (V4DFmode
, d
->op1
);
47211 if (expand_vec_perm_even_odd_1 (&d_copy
, odd
))
47214 emit_move_insn (d
->target
,
47215 gen_lowpart (V4DImode
, d_copy
.target
));
47224 t1
= gen_reg_rtx (V4DImode
);
47225 t2
= gen_reg_rtx (V4DImode
);
47227 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
47228 emit_insn (gen_avx2_permv2ti (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
47229 emit_insn (gen_avx2_permv2ti (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
47231 /* Now an vpunpck[lh]qdq will produce the result required. */
47233 t3
= gen_avx2_interleave_highv4di (d
->target
, t1
, t2
);
47235 t3
= gen_avx2_interleave_lowv4di (d
->target
, t1
, t2
);
47242 struct expand_vec_perm_d d_copy
= *d
;
47243 d_copy
.vmode
= V8SFmode
;
47245 d_copy
.target
= gen_raw_REG (V8SFmode
, LAST_VIRTUAL_REGISTER
+ 1);
47247 d_copy
.target
= gen_reg_rtx (V8SFmode
);
47248 d_copy
.op0
= gen_lowpart (V8SFmode
, d
->op0
);
47249 d_copy
.op1
= gen_lowpart (V8SFmode
, d
->op1
);
47250 if (expand_vec_perm_even_odd_1 (&d_copy
, odd
))
47253 emit_move_insn (d
->target
,
47254 gen_lowpart (V8SImode
, d_copy
.target
));
47263 t1
= gen_reg_rtx (V8SImode
);
47264 t2
= gen_reg_rtx (V8SImode
);
47265 t3
= gen_reg_rtx (V4DImode
);
47266 t4
= gen_reg_rtx (V4DImode
);
47267 t5
= gen_reg_rtx (V4DImode
);
47269 /* Shuffle the lanes around into
47270 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
47271 emit_insn (gen_avx2_permv2ti (t3
, gen_lowpart (V4DImode
, d
->op0
),
47272 gen_lowpart (V4DImode
, d
->op1
),
47274 emit_insn (gen_avx2_permv2ti (t4
, gen_lowpart (V4DImode
, d
->op0
),
47275 gen_lowpart (V4DImode
, d
->op1
),
47278 /* Swap the 2nd and 3rd position in each lane into
47279 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
47280 emit_insn (gen_avx2_pshufdv3 (t1
, gen_lowpart (V8SImode
, t3
),
47281 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
47282 emit_insn (gen_avx2_pshufdv3 (t2
, gen_lowpart (V8SImode
, t4
),
47283 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
47285 /* Now an vpunpck[lh]qdq will produce
47286 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
47288 t3
= gen_avx2_interleave_highv4di (t5
, gen_lowpart (V4DImode
, t1
),
47289 gen_lowpart (V4DImode
, t2
));
47291 t3
= gen_avx2_interleave_lowv4di (t5
, gen_lowpart (V4DImode
, t1
),
47292 gen_lowpart (V4DImode
, t2
));
47294 emit_move_insn (d
->target
, gen_lowpart (V8SImode
, t5
));
47298 gcc_unreachable ();
47304 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
47305 extract-even and extract-odd permutations. */
47308 expand_vec_perm_even_odd (struct expand_vec_perm_d
*d
)
47310 unsigned i
, odd
, nelt
= d
->nelt
;
47313 if (odd
!= 0 && odd
!= 1)
47316 for (i
= 1; i
< nelt
; ++i
)
47317 if (d
->perm
[i
] != 2 * i
+ odd
)
47320 return expand_vec_perm_even_odd_1 (d
, odd
);
47323 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
47324 permutations. We assume that expand_vec_perm_1 has already failed. */
47327 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
)
47329 unsigned elt
= d
->perm
[0], nelt2
= d
->nelt
/ 2;
47330 machine_mode vmode
= d
->vmode
;
47331 unsigned char perm2
[4];
47332 rtx op0
= d
->op0
, dest
;
47339 /* These are special-cased in sse.md so that we can optionally
47340 use the vbroadcast instruction. They expand to two insns
47341 if the input happens to be in a register. */
47342 gcc_unreachable ();
47348 /* These are always implementable using standard shuffle patterns. */
47349 gcc_unreachable ();
47353 /* These can be implemented via interleave. We save one insn by
47354 stopping once we have promoted to V4SImode and then use pshufd. */
47360 rtx (*gen
) (rtx
, rtx
, rtx
)
47361 = vmode
== V16QImode
? gen_vec_interleave_lowv16qi
47362 : gen_vec_interleave_lowv8hi
;
47366 gen
= vmode
== V16QImode
? gen_vec_interleave_highv16qi
47367 : gen_vec_interleave_highv8hi
;
47372 dest
= gen_reg_rtx (vmode
);
47373 emit_insn (gen (dest
, op0
, op0
));
47374 vmode
= get_mode_wider_vector (vmode
);
47375 op0
= gen_lowpart (vmode
, dest
);
47377 while (vmode
!= V4SImode
);
47379 memset (perm2
, elt
, 4);
47380 dest
= gen_reg_rtx (V4SImode
);
47381 ok
= expand_vselect (dest
, op0
, perm2
, 4, d
->testing_p
);
47384 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, dest
));
47392 /* For AVX2 broadcasts of the first element vpbroadcast* or
47393 vpermq should be used by expand_vec_perm_1. */
47394 gcc_assert (!TARGET_AVX2
|| d
->perm
[0]);
47398 gcc_unreachable ();
47402 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
47403 broadcast permutations. */
47406 expand_vec_perm_broadcast (struct expand_vec_perm_d
*d
)
47408 unsigned i
, elt
, nelt
= d
->nelt
;
47410 if (!d
->one_operand_p
)
47414 for (i
= 1; i
< nelt
; ++i
)
47415 if (d
->perm
[i
] != elt
)
47418 return expand_vec_perm_broadcast_1 (d
);
47421 /* Implement arbitrary permutations of two V64QImode operands
47422 with 2 vperm[it]2w, 2 vpshufb and one vpor instruction. */
47424 expand_vec_perm_vpermt2_vpshub2 (struct expand_vec_perm_d
*d
)
47426 if (!TARGET_AVX512BW
|| !(d
->vmode
== V64QImode
))
47432 struct expand_vec_perm_d ds
[2];
47433 rtx rperm
[128], vperm
, target0
, target1
;
47434 unsigned int i
, nelt
;
47435 machine_mode vmode
;
47440 for (i
= 0; i
< 2; i
++)
47443 ds
[i
].vmode
= V32HImode
;
47445 ds
[i
].target
= gen_reg_rtx (V32HImode
);
47446 ds
[i
].op0
= gen_lowpart (V32HImode
, d
->op0
);
47447 ds
[i
].op1
= gen_lowpart (V32HImode
, d
->op1
);
47450 /* Prepare permutations such that the first one takes care of
47451 putting the even bytes into the right positions or one higher
47452 positions (ds[0]) and the second one takes care of
47453 putting the odd bytes into the right positions or one below
47456 for (i
= 0; i
< nelt
; i
++)
47458 ds
[i
& 1].perm
[i
/ 2] = d
->perm
[i
] / 2;
47461 rperm
[i
] = constm1_rtx
;
47462 rperm
[i
+ 64] = GEN_INT ((i
& 14) + (d
->perm
[i
] & 1));
47466 rperm
[i
] = GEN_INT ((i
& 14) + (d
->perm
[i
] & 1));
47467 rperm
[i
+ 64] = constm1_rtx
;
47471 bool ok
= expand_vec_perm_1 (&ds
[0]);
47473 ds
[0].target
= gen_lowpart (V64QImode
, ds
[0].target
);
47475 ok
= expand_vec_perm_1 (&ds
[1]);
47477 ds
[1].target
= gen_lowpart (V64QImode
, ds
[1].target
);
47479 vperm
= gen_rtx_CONST_VECTOR (V64QImode
, gen_rtvec_v (64, rperm
));
47480 vperm
= force_reg (vmode
, vperm
);
47481 target0
= gen_reg_rtx (V64QImode
);
47482 emit_insn (gen_avx512bw_pshufbv64qi3 (target0
, ds
[0].target
, vperm
));
47484 vperm
= gen_rtx_CONST_VECTOR (V64QImode
, gen_rtvec_v (64, rperm
+ 64));
47485 vperm
= force_reg (vmode
, vperm
);
47486 target1
= gen_reg_rtx (V64QImode
);
47487 emit_insn (gen_avx512bw_pshufbv64qi3 (target1
, ds
[1].target
, vperm
));
47489 emit_insn (gen_iorv64qi3 (d
->target
, target0
, target1
));
47493 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
47494 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
47495 all the shorter instruction sequences. */
47498 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d
*d
)
47500 rtx rperm
[4][32], vperm
, l
[2], h
[2], op
, m128
;
47501 unsigned int i
, nelt
, eltsz
;
47505 || d
->one_operand_p
47506 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
47513 eltsz
= GET_MODE_UNIT_SIZE (d
->vmode
);
47515 /* Generate 4 permutation masks. If the required element is within
47516 the same lane, it is shuffled in. If the required element from the
47517 other lane, force a zero by setting bit 7 in the permutation mask.
47518 In the other mask the mask has non-negative elements if element
47519 is requested from the other lane, but also moved to the other lane,
47520 so that the result of vpshufb can have the two V2TImode halves
47522 m128
= GEN_INT (-128);
47523 for (i
= 0; i
< 32; ++i
)
47525 rperm
[0][i
] = m128
;
47526 rperm
[1][i
] = m128
;
47527 rperm
[2][i
] = m128
;
47528 rperm
[3][i
] = m128
;
47534 for (i
= 0; i
< nelt
; ++i
)
47536 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
47537 unsigned xlane
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
47538 unsigned int which
= ((d
->perm
[i
] & nelt
) ? 2 : 0) + (xlane
? 1 : 0);
47540 for (j
= 0; j
< eltsz
; ++j
)
47541 rperm
[which
][(i
* eltsz
+ j
) ^ xlane
] = GEN_INT (e
* eltsz
+ j
);
47542 used
[which
] = true;
47545 for (i
= 0; i
< 2; ++i
)
47547 if (!used
[2 * i
+ 1])
47552 vperm
= gen_rtx_CONST_VECTOR (V32QImode
,
47553 gen_rtvec_v (32, rperm
[2 * i
+ 1]));
47554 vperm
= force_reg (V32QImode
, vperm
);
47555 h
[i
] = gen_reg_rtx (V32QImode
);
47556 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
47557 emit_insn (gen_avx2_pshufbv32qi3 (h
[i
], op
, vperm
));
47560 /* Swap the 128-byte lanes of h[X]. */
47561 for (i
= 0; i
< 2; ++i
)
47563 if (h
[i
] == NULL_RTX
)
47565 op
= gen_reg_rtx (V4DImode
);
47566 emit_insn (gen_avx2_permv4di_1 (op
, gen_lowpart (V4DImode
, h
[i
]),
47567 const2_rtx
, GEN_INT (3), const0_rtx
,
47569 h
[i
] = gen_lowpart (V32QImode
, op
);
47572 for (i
= 0; i
< 2; ++i
)
47579 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[2 * i
]));
47580 vperm
= force_reg (V32QImode
, vperm
);
47581 l
[i
] = gen_reg_rtx (V32QImode
);
47582 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
47583 emit_insn (gen_avx2_pshufbv32qi3 (l
[i
], op
, vperm
));
47586 for (i
= 0; i
< 2; ++i
)
47590 op
= gen_reg_rtx (V32QImode
);
47591 emit_insn (gen_iorv32qi3 (op
, l
[i
], h
[i
]));
47598 gcc_assert (l
[0] && l
[1]);
47600 if (d
->vmode
!= V32QImode
)
47601 op
= gen_reg_rtx (V32QImode
);
47602 emit_insn (gen_iorv32qi3 (op
, l
[0], l
[1]));
47603 if (op
!= d
->target
)
47604 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, op
));
47608 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
47609 With all of the interface bits taken care of, perform the expansion
47610 in D and return true on success. */
47613 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
47615 /* Try a single instruction expansion. */
47616 if (expand_vec_perm_1 (d
))
47619 /* Try sequences of two instructions. */
47621 if (expand_vec_perm_pshuflw_pshufhw (d
))
47624 if (expand_vec_perm_palignr (d
, false))
47627 if (expand_vec_perm_interleave2 (d
))
47630 if (expand_vec_perm_broadcast (d
))
47633 if (expand_vec_perm_vpermq_perm_1 (d
))
47636 if (expand_vec_perm_vperm2f128 (d
))
47639 if (expand_vec_perm_pblendv (d
))
47642 /* Try sequences of three instructions. */
47644 if (expand_vec_perm_even_odd_pack (d
))
47647 if (expand_vec_perm_2vperm2f128_vshuf (d
))
47650 if (expand_vec_perm_pshufb2 (d
))
47653 if (expand_vec_perm_interleave3 (d
))
47656 if (expand_vec_perm_vperm2f128_vblend (d
))
47659 /* Try sequences of four instructions. */
47661 if (expand_vec_perm_even_odd_trunc (d
))
47663 if (expand_vec_perm_vpshufb2_vpermq (d
))
47666 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d
))
47669 if (expand_vec_perm_vpermt2_vpshub2 (d
))
47672 /* ??? Look for narrow permutations whose element orderings would
47673 allow the promotion to a wider mode. */
47675 /* ??? Look for sequences of interleave or a wider permute that place
47676 the data into the correct lanes for a half-vector shuffle like
47677 pshuf[lh]w or vpermilps. */
47679 /* ??? Look for sequences of interleave that produce the desired results.
47680 The combinatorics of punpck[lh] get pretty ugly... */
47682 if (expand_vec_perm_even_odd (d
))
47685 /* Even longer sequences. */
47686 if (expand_vec_perm_vpshufb4_vpermq2 (d
))
47689 /* See if we can get the same permutation in different vector integer
47691 struct expand_vec_perm_d nd
;
47692 if (canonicalize_vector_int_perm (d
, &nd
) && expand_vec_perm_1 (&nd
))
47695 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, nd
.target
));
47702 /* If a permutation only uses one operand, make it clear. Returns true
47703 if the permutation references both operands. */
47706 canonicalize_perm (struct expand_vec_perm_d
*d
)
47708 int i
, which
, nelt
= d
->nelt
;
47710 for (i
= which
= 0; i
< nelt
; ++i
)
47711 which
|= (d
->perm
[i
] < nelt
? 1 : 2);
47713 d
->one_operand_p
= true;
47720 if (!rtx_equal_p (d
->op0
, d
->op1
))
47722 d
->one_operand_p
= false;
47725 /* The elements of PERM do not suggest that only the first operand
47726 is used, but both operands are identical. Allow easier matching
47727 of the permutation by folding the permutation into the single
47732 for (i
= 0; i
< nelt
; ++i
)
47733 d
->perm
[i
] &= nelt
- 1;
47742 return (which
== 3);
47746 ix86_expand_vec_perm_const (rtx operands
[4])
47748 struct expand_vec_perm_d d
;
47749 unsigned char perm
[MAX_VECT_LEN
];
47754 d
.target
= operands
[0];
47755 d
.op0
= operands
[1];
47756 d
.op1
= operands
[2];
47759 d
.vmode
= GET_MODE (d
.target
);
47760 gcc_assert (VECTOR_MODE_P (d
.vmode
));
47761 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
47762 d
.testing_p
= false;
47764 gcc_assert (GET_CODE (sel
) == CONST_VECTOR
);
47765 gcc_assert (XVECLEN (sel
, 0) == nelt
);
47766 gcc_checking_assert (sizeof (d
.perm
) == sizeof (perm
));
47768 for (i
= 0; i
< nelt
; ++i
)
47770 rtx e
= XVECEXP (sel
, 0, i
);
47771 int ei
= INTVAL (e
) & (2 * nelt
- 1);
47776 two_args
= canonicalize_perm (&d
);
47778 if (ix86_expand_vec_perm_const_1 (&d
))
47781 /* If the selector says both arguments are needed, but the operands are the
47782 same, the above tried to expand with one_operand_p and flattened selector.
47783 If that didn't work, retry without one_operand_p; we succeeded with that
47785 if (two_args
&& d
.one_operand_p
)
47787 d
.one_operand_p
= false;
47788 memcpy (d
.perm
, perm
, sizeof (perm
));
47789 return ix86_expand_vec_perm_const_1 (&d
);
47795 /* Implement targetm.vectorize.vec_perm_const_ok. */
47798 ix86_vectorize_vec_perm_const_ok (machine_mode vmode
, vec_perm_indices sel
)
47800 struct expand_vec_perm_d d
;
47801 unsigned int i
, nelt
, which
;
47805 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
47806 d
.testing_p
= true;
47808 /* Given sufficient ISA support we can just return true here
47809 for selected vector modes. */
47816 if (TARGET_AVX512F
)
47817 /* All implementable with a single vperm[it]2 insn. */
47821 if (TARGET_AVX512BW
)
47822 /* All implementable with a single vperm[it]2 insn. */
47826 if (TARGET_AVX512BW
)
47827 /* Implementable with 2 vperm[it]2, 2 vpshufb and 1 or insn. */
47834 if (TARGET_AVX512VL
)
47835 /* All implementable with a single vperm[it]2 insn. */
47840 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
47845 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
47852 /* All implementable with a single vpperm insn. */
47855 /* All implementable with 2 pshufb + 1 ior. */
47861 /* All implementable with shufpd or unpck[lh]pd. */
47867 /* Extract the values from the vector CST into the permutation
47869 for (i
= which
= 0; i
< nelt
; ++i
)
47871 unsigned char e
= sel
[i
];
47872 gcc_assert (e
< 2 * nelt
);
47874 which
|= (e
< nelt
? 1 : 2);
47877 /* For all elements from second vector, fold the elements to first. */
47879 for (i
= 0; i
< nelt
; ++i
)
47882 /* Check whether the mask can be applied to the vector type. */
47883 d
.one_operand_p
= (which
!= 3);
47885 /* Implementable with shufps or pshufd. */
47886 if (d
.one_operand_p
&& (d
.vmode
== V4SFmode
|| d
.vmode
== V4SImode
))
47889 /* Otherwise we have to go through the motions and see if we can
47890 figure out how to generate the requested permutation. */
47891 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
47892 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
47893 if (!d
.one_operand_p
)
47894 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
47897 ret
= ix86_expand_vec_perm_const_1 (&d
);
47904 ix86_expand_vec_extract_even_odd (rtx targ
, rtx op0
, rtx op1
, unsigned odd
)
47906 struct expand_vec_perm_d d
;
47912 d
.vmode
= GET_MODE (targ
);
47913 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
47914 d
.one_operand_p
= false;
47915 d
.testing_p
= false;
47917 for (i
= 0; i
< nelt
; ++i
)
47918 d
.perm
[i
] = i
* 2 + odd
;
47920 /* We'll either be able to implement the permutation directly... */
47921 if (expand_vec_perm_1 (&d
))
47924 /* ... or we use the special-case patterns. */
47925 expand_vec_perm_even_odd_1 (&d
, odd
);
47929 ix86_expand_vec_interleave (rtx targ
, rtx op0
, rtx op1
, bool high_p
)
47931 struct expand_vec_perm_d d
;
47932 unsigned i
, nelt
, base
;
47938 d
.vmode
= GET_MODE (targ
);
47939 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
47940 d
.one_operand_p
= false;
47941 d
.testing_p
= false;
47943 base
= high_p
? nelt
/ 2 : 0;
47944 for (i
= 0; i
< nelt
/ 2; ++i
)
47946 d
.perm
[i
* 2] = i
+ base
;
47947 d
.perm
[i
* 2 + 1] = i
+ base
+ nelt
;
47950 /* Note that for AVX this isn't one instruction. */
47951 ok
= ix86_expand_vec_perm_const_1 (&d
);
47956 /* Expand a vector operation CODE for a V*QImode in terms of the
47957 same operation on V*HImode. */
47960 ix86_expand_vecop_qihi (enum rtx_code code
, rtx dest
, rtx op1
, rtx op2
)
47962 machine_mode qimode
= GET_MODE (dest
);
47963 machine_mode himode
;
47964 rtx (*gen_il
) (rtx
, rtx
, rtx
);
47965 rtx (*gen_ih
) (rtx
, rtx
, rtx
);
47966 rtx op1_l
, op1_h
, op2_l
, op2_h
, res_l
, res_h
;
47967 struct expand_vec_perm_d d
;
47968 bool ok
, full_interleave
;
47969 bool uns_p
= false;
47976 gen_il
= gen_vec_interleave_lowv16qi
;
47977 gen_ih
= gen_vec_interleave_highv16qi
;
47980 himode
= V16HImode
;
47981 gen_il
= gen_avx2_interleave_lowv32qi
;
47982 gen_ih
= gen_avx2_interleave_highv32qi
;
47985 himode
= V32HImode
;
47986 gen_il
= gen_avx512bw_interleave_lowv64qi
;
47987 gen_ih
= gen_avx512bw_interleave_highv64qi
;
47990 gcc_unreachable ();
47993 op2_l
= op2_h
= op2
;
47997 /* Unpack data such that we've got a source byte in each low byte of
47998 each word. We don't care what goes into the high byte of each word.
47999 Rather than trying to get zero in there, most convenient is to let
48000 it be a copy of the low byte. */
48001 op2_l
= gen_reg_rtx (qimode
);
48002 op2_h
= gen_reg_rtx (qimode
);
48003 emit_insn (gen_il (op2_l
, op2
, op2
));
48004 emit_insn (gen_ih (op2_h
, op2
, op2
));
48006 op1_l
= gen_reg_rtx (qimode
);
48007 op1_h
= gen_reg_rtx (qimode
);
48008 emit_insn (gen_il (op1_l
, op1
, op1
));
48009 emit_insn (gen_ih (op1_h
, op1
, op1
));
48010 full_interleave
= qimode
== V16QImode
;
48018 op1_l
= gen_reg_rtx (himode
);
48019 op1_h
= gen_reg_rtx (himode
);
48020 ix86_expand_sse_unpack (op1_l
, op1
, uns_p
, false);
48021 ix86_expand_sse_unpack (op1_h
, op1
, uns_p
, true);
48022 full_interleave
= true;
48025 gcc_unreachable ();
48028 /* Perform the operation. */
48029 res_l
= expand_simple_binop (himode
, code
, op1_l
, op2_l
, NULL_RTX
,
48031 res_h
= expand_simple_binop (himode
, code
, op1_h
, op2_h
, NULL_RTX
,
48033 gcc_assert (res_l
&& res_h
);
48035 /* Merge the data back into the right place. */
48037 d
.op0
= gen_lowpart (qimode
, res_l
);
48038 d
.op1
= gen_lowpart (qimode
, res_h
);
48040 d
.nelt
= GET_MODE_NUNITS (qimode
);
48041 d
.one_operand_p
= false;
48042 d
.testing_p
= false;
48044 if (full_interleave
)
48046 /* For SSE2, we used an full interleave, so the desired
48047 results are in the even elements. */
48048 for (i
= 0; i
< d
.nelt
; ++i
)
48053 /* For AVX, the interleave used above was not cross-lane. So the
48054 extraction is evens but with the second and third quarter swapped.
48055 Happily, that is even one insn shorter than even extraction.
48056 For AVX512BW we have 4 lanes. We extract evens from within a lane,
48057 always first from the first and then from the second source operand,
48058 the index bits above the low 4 bits remains the same.
48059 Thus, for d.nelt == 32 we want permutation
48060 0,2,4,..14, 32,34,36,..46, 16,18,20,..30, 48,50,52,..62
48061 and for d.nelt == 64 we want permutation
48062 0,2,4,..14, 64,66,68,..78, 16,18,20,..30, 80,82,84,..94,
48063 32,34,36,..46, 96,98,100,..110, 48,50,52,..62, 112,114,116,..126. */
48064 for (i
= 0; i
< d
.nelt
; ++i
)
48065 d
.perm
[i
] = ((i
* 2) & 14) + ((i
& 8) ? d
.nelt
: 0) + (i
& ~15);
48068 ok
= ix86_expand_vec_perm_const_1 (&d
);
48071 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
48072 gen_rtx_fmt_ee (code
, qimode
, op1
, op2
));
48075 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
48076 if op is CONST_VECTOR with all odd elements equal to their
48077 preceding element. */
48080 const_vector_equal_evenodd_p (rtx op
)
48082 machine_mode mode
= GET_MODE (op
);
48083 int i
, nunits
= GET_MODE_NUNITS (mode
);
48084 if (GET_CODE (op
) != CONST_VECTOR
48085 || nunits
!= CONST_VECTOR_NUNITS (op
))
48087 for (i
= 0; i
< nunits
; i
+= 2)
48088 if (CONST_VECTOR_ELT (op
, i
) != CONST_VECTOR_ELT (op
, i
+ 1))
48094 ix86_expand_mul_widen_evenodd (rtx dest
, rtx op1
, rtx op2
,
48095 bool uns_p
, bool odd_p
)
48097 machine_mode mode
= GET_MODE (op1
);
48098 machine_mode wmode
= GET_MODE (dest
);
48100 rtx orig_op1
= op1
, orig_op2
= op2
;
48102 if (!nonimmediate_operand (op1
, mode
))
48103 op1
= force_reg (mode
, op1
);
48104 if (!nonimmediate_operand (op2
, mode
))
48105 op2
= force_reg (mode
, op2
);
48107 /* We only play even/odd games with vectors of SImode. */
48108 gcc_assert (mode
== V4SImode
|| mode
== V8SImode
|| mode
== V16SImode
);
48110 /* If we're looking for the odd results, shift those members down to
48111 the even slots. For some cpus this is faster than a PSHUFD. */
48114 /* For XOP use vpmacsdqh, but only for smult, as it is only
48116 if (TARGET_XOP
&& mode
== V4SImode
&& !uns_p
)
48118 x
= force_reg (wmode
, CONST0_RTX (wmode
));
48119 emit_insn (gen_xop_pmacsdqh (dest
, op1
, op2
, x
));
48123 x
= GEN_INT (GET_MODE_UNIT_BITSIZE (mode
));
48124 if (!const_vector_equal_evenodd_p (orig_op1
))
48125 op1
= expand_binop (wmode
, lshr_optab
, gen_lowpart (wmode
, op1
),
48126 x
, NULL
, 1, OPTAB_DIRECT
);
48127 if (!const_vector_equal_evenodd_p (orig_op2
))
48128 op2
= expand_binop (wmode
, lshr_optab
, gen_lowpart (wmode
, op2
),
48129 x
, NULL
, 1, OPTAB_DIRECT
);
48130 op1
= gen_lowpart (mode
, op1
);
48131 op2
= gen_lowpart (mode
, op2
);
48134 if (mode
== V16SImode
)
48137 x
= gen_vec_widen_umult_even_v16si (dest
, op1
, op2
);
48139 x
= gen_vec_widen_smult_even_v16si (dest
, op1
, op2
);
48141 else if (mode
== V8SImode
)
48144 x
= gen_vec_widen_umult_even_v8si (dest
, op1
, op2
);
48146 x
= gen_vec_widen_smult_even_v8si (dest
, op1
, op2
);
48149 x
= gen_vec_widen_umult_even_v4si (dest
, op1
, op2
);
48150 else if (TARGET_SSE4_1
)
48151 x
= gen_sse4_1_mulv2siv2di3 (dest
, op1
, op2
);
48154 rtx s1
, s2
, t0
, t1
, t2
;
48156 /* The easiest way to implement this without PMULDQ is to go through
48157 the motions as if we are performing a full 64-bit multiply. With
48158 the exception that we need to do less shuffling of the elements. */
48160 /* Compute the sign-extension, aka highparts, of the two operands. */
48161 s1
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), GT
, CONST0_RTX (mode
),
48162 op1
, pc_rtx
, pc_rtx
);
48163 s2
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), GT
, CONST0_RTX (mode
),
48164 op2
, pc_rtx
, pc_rtx
);
48166 /* Multiply LO(A) * HI(B), and vice-versa. */
48167 t1
= gen_reg_rtx (wmode
);
48168 t2
= gen_reg_rtx (wmode
);
48169 emit_insn (gen_vec_widen_umult_even_v4si (t1
, s1
, op2
));
48170 emit_insn (gen_vec_widen_umult_even_v4si (t2
, s2
, op1
));
48172 /* Multiply LO(A) * LO(B). */
48173 t0
= gen_reg_rtx (wmode
);
48174 emit_insn (gen_vec_widen_umult_even_v4si (t0
, op1
, op2
));
48176 /* Combine and shift the highparts into place. */
48177 t1
= expand_binop (wmode
, add_optab
, t1
, t2
, t1
, 1, OPTAB_DIRECT
);
48178 t1
= expand_binop (wmode
, ashl_optab
, t1
, GEN_INT (32), t1
,
48181 /* Combine high and low parts. */
48182 force_expand_binop (wmode
, add_optab
, t0
, t1
, dest
, 1, OPTAB_DIRECT
);
48189 ix86_expand_mul_widen_hilo (rtx dest
, rtx op1
, rtx op2
,
48190 bool uns_p
, bool high_p
)
48192 machine_mode wmode
= GET_MODE (dest
);
48193 machine_mode mode
= GET_MODE (op1
);
48194 rtx t1
, t2
, t3
, t4
, mask
;
48199 t1
= gen_reg_rtx (mode
);
48200 t2
= gen_reg_rtx (mode
);
48201 if (TARGET_XOP
&& !uns_p
)
48203 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
48204 shuffle the elements once so that all elements are in the right
48205 place for immediate use: { A C B D }. */
48206 emit_insn (gen_sse2_pshufd_1 (t1
, op1
, const0_rtx
, const2_rtx
,
48207 const1_rtx
, GEN_INT (3)));
48208 emit_insn (gen_sse2_pshufd_1 (t2
, op2
, const0_rtx
, const2_rtx
,
48209 const1_rtx
, GEN_INT (3)));
48213 /* Put the elements into place for the multiply. */
48214 ix86_expand_vec_interleave (t1
, op1
, op1
, high_p
);
48215 ix86_expand_vec_interleave (t2
, op2
, op2
, high_p
);
48218 ix86_expand_mul_widen_evenodd (dest
, t1
, t2
, uns_p
, high_p
);
48222 /* Shuffle the elements between the lanes. After this we
48223 have { A B E F | C D G H } for each operand. */
48224 t1
= gen_reg_rtx (V4DImode
);
48225 t2
= gen_reg_rtx (V4DImode
);
48226 emit_insn (gen_avx2_permv4di_1 (t1
, gen_lowpart (V4DImode
, op1
),
48227 const0_rtx
, const2_rtx
,
48228 const1_rtx
, GEN_INT (3)));
48229 emit_insn (gen_avx2_permv4di_1 (t2
, gen_lowpart (V4DImode
, op2
),
48230 const0_rtx
, const2_rtx
,
48231 const1_rtx
, GEN_INT (3)));
48233 /* Shuffle the elements within the lanes. After this we
48234 have { A A B B | C C D D } or { E E F F | G G H H }. */
48235 t3
= gen_reg_rtx (V8SImode
);
48236 t4
= gen_reg_rtx (V8SImode
);
48237 mask
= GEN_INT (high_p
48238 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
48239 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
48240 emit_insn (gen_avx2_pshufdv3 (t3
, gen_lowpart (V8SImode
, t1
), mask
));
48241 emit_insn (gen_avx2_pshufdv3 (t4
, gen_lowpart (V8SImode
, t2
), mask
));
48243 ix86_expand_mul_widen_evenodd (dest
, t3
, t4
, uns_p
, false);
48248 t1
= expand_binop (mode
, smul_optab
, op1
, op2
, NULL_RTX
,
48249 uns_p
, OPTAB_DIRECT
);
48250 t2
= expand_binop (mode
,
48251 uns_p
? umul_highpart_optab
: smul_highpart_optab
,
48252 op1
, op2
, NULL_RTX
, uns_p
, OPTAB_DIRECT
);
48253 gcc_assert (t1
&& t2
);
48255 t3
= gen_reg_rtx (mode
);
48256 ix86_expand_vec_interleave (t3
, t1
, t2
, high_p
);
48257 emit_move_insn (dest
, gen_lowpart (wmode
, t3
));
48265 t1
= gen_reg_rtx (wmode
);
48266 t2
= gen_reg_rtx (wmode
);
48267 ix86_expand_sse_unpack (t1
, op1
, uns_p
, high_p
);
48268 ix86_expand_sse_unpack (t2
, op2
, uns_p
, high_p
);
48270 emit_insn (gen_rtx_SET (dest
, gen_rtx_MULT (wmode
, t1
, t2
)));
48274 gcc_unreachable ();
48279 ix86_expand_sse2_mulv4si3 (rtx op0
, rtx op1
, rtx op2
)
48281 rtx res_1
, res_2
, res_3
, res_4
;
48283 res_1
= gen_reg_rtx (V4SImode
);
48284 res_2
= gen_reg_rtx (V4SImode
);
48285 res_3
= gen_reg_rtx (V2DImode
);
48286 res_4
= gen_reg_rtx (V2DImode
);
48287 ix86_expand_mul_widen_evenodd (res_3
, op1
, op2
, true, false);
48288 ix86_expand_mul_widen_evenodd (res_4
, op1
, op2
, true, true);
48290 /* Move the results in element 2 down to element 1; we don't care
48291 what goes in elements 2 and 3. Then we can merge the parts
48292 back together with an interleave.
48294 Note that two other sequences were tried:
48295 (1) Use interleaves at the start instead of psrldq, which allows
48296 us to use a single shufps to merge things back at the end.
48297 (2) Use shufps here to combine the two vectors, then pshufd to
48298 put the elements in the correct order.
48299 In both cases the cost of the reformatting stall was too high
48300 and the overall sequence slower. */
48302 emit_insn (gen_sse2_pshufd_1 (res_1
, gen_lowpart (V4SImode
, res_3
),
48303 const0_rtx
, const2_rtx
,
48304 const0_rtx
, const0_rtx
));
48305 emit_insn (gen_sse2_pshufd_1 (res_2
, gen_lowpart (V4SImode
, res_4
),
48306 const0_rtx
, const2_rtx
,
48307 const0_rtx
, const0_rtx
));
48308 res_1
= emit_insn (gen_vec_interleave_lowv4si (op0
, res_1
, res_2
));
48310 set_unique_reg_note (res_1
, REG_EQUAL
, gen_rtx_MULT (V4SImode
, op1
, op2
));
48314 ix86_expand_sse2_mulvxdi3 (rtx op0
, rtx op1
, rtx op2
)
48316 machine_mode mode
= GET_MODE (op0
);
48317 rtx t1
, t2
, t3
, t4
, t5
, t6
;
48319 if (TARGET_AVX512DQ
&& mode
== V8DImode
)
48320 emit_insn (gen_avx512dq_mulv8di3 (op0
, op1
, op2
));
48321 else if (TARGET_AVX512DQ
&& TARGET_AVX512VL
&& mode
== V4DImode
)
48322 emit_insn (gen_avx512dq_mulv4di3 (op0
, op1
, op2
));
48323 else if (TARGET_AVX512DQ
&& TARGET_AVX512VL
&& mode
== V2DImode
)
48324 emit_insn (gen_avx512dq_mulv2di3 (op0
, op1
, op2
));
48325 else if (TARGET_XOP
&& mode
== V2DImode
)
48327 /* op1: A,B,C,D, op2: E,F,G,H */
48328 op1
= gen_lowpart (V4SImode
, op1
);
48329 op2
= gen_lowpart (V4SImode
, op2
);
48331 t1
= gen_reg_rtx (V4SImode
);
48332 t2
= gen_reg_rtx (V4SImode
);
48333 t3
= gen_reg_rtx (V2DImode
);
48334 t4
= gen_reg_rtx (V2DImode
);
48337 emit_insn (gen_sse2_pshufd_1 (t1
, op1
,
48343 /* t2: (B*E),(A*F),(D*G),(C*H) */
48344 emit_insn (gen_mulv4si3 (t2
, t1
, op2
));
48346 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
48347 emit_insn (gen_xop_phadddq (t3
, t2
));
48349 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
48350 emit_insn (gen_ashlv2di3 (t4
, t3
, GEN_INT (32)));
48352 /* Multiply lower parts and add all */
48353 t5
= gen_reg_rtx (V2DImode
);
48354 emit_insn (gen_vec_widen_umult_even_v4si (t5
,
48355 gen_lowpart (V4SImode
, op1
),
48356 gen_lowpart (V4SImode
, op2
)));
48357 op0
= expand_binop (mode
, add_optab
, t5
, t4
, op0
, 1, OPTAB_DIRECT
);
48362 machine_mode nmode
;
48363 rtx (*umul
) (rtx
, rtx
, rtx
);
48365 if (mode
== V2DImode
)
48367 umul
= gen_vec_widen_umult_even_v4si
;
48370 else if (mode
== V4DImode
)
48372 umul
= gen_vec_widen_umult_even_v8si
;
48375 else if (mode
== V8DImode
)
48377 umul
= gen_vec_widen_umult_even_v16si
;
48381 gcc_unreachable ();
48384 /* Multiply low parts. */
48385 t1
= gen_reg_rtx (mode
);
48386 emit_insn (umul (t1
, gen_lowpart (nmode
, op1
), gen_lowpart (nmode
, op2
)));
48388 /* Shift input vectors right 32 bits so we can multiply high parts. */
48390 t2
= expand_binop (mode
, lshr_optab
, op1
, t6
, NULL
, 1, OPTAB_DIRECT
);
48391 t3
= expand_binop (mode
, lshr_optab
, op2
, t6
, NULL
, 1, OPTAB_DIRECT
);
48393 /* Multiply high parts by low parts. */
48394 t4
= gen_reg_rtx (mode
);
48395 t5
= gen_reg_rtx (mode
);
48396 emit_insn (umul (t4
, gen_lowpart (nmode
, t2
), gen_lowpart (nmode
, op2
)));
48397 emit_insn (umul (t5
, gen_lowpart (nmode
, t3
), gen_lowpart (nmode
, op1
)));
48399 /* Combine and shift the highparts back. */
48400 t4
= expand_binop (mode
, add_optab
, t4
, t5
, t4
, 1, OPTAB_DIRECT
);
48401 t4
= expand_binop (mode
, ashl_optab
, t4
, t6
, t4
, 1, OPTAB_DIRECT
);
48403 /* Combine high and low parts. */
48404 force_expand_binop (mode
, add_optab
, t1
, t4
, op0
, 1, OPTAB_DIRECT
);
48407 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
48408 gen_rtx_MULT (mode
, op1
, op2
));
48411 /* Return 1 if control tansfer instruction INSN
48412 should be encoded with bnd prefix.
48413 If insn is NULL then return 1 when control
48414 transfer instructions should be prefixed with
48415 bnd by default for current function. */
48418 ix86_bnd_prefixed_insn_p (rtx insn
)
48420 /* For call insns check special flag. */
48421 if (insn
&& CALL_P (insn
))
48423 rtx call
= get_call_rtx_from (insn
);
48425 return CALL_EXPR_WITH_BOUNDS_P (call
);
48428 /* All other insns are prefixed only if function is instrumented. */
48429 return chkp_function_instrumented_p (current_function_decl
);
48432 /* Return 1 if control tansfer instruction INSN
48433 should be encoded with notrack prefix. */
48436 ix86_notrack_prefixed_insn_p (rtx insn
)
48438 if (!insn
|| !((flag_cf_protection
& CF_BRANCH
) && TARGET_IBT
))
48443 rtx call
= get_call_rtx_from (insn
);
48444 gcc_assert (call
!= NULL_RTX
);
48445 rtx addr
= XEXP (call
, 0);
48447 /* Do not emit 'notrack' if it's not an indirect call. */
48449 && GET_CODE (XEXP (addr
, 0)) == SYMBOL_REF
)
48452 return find_reg_note (insn
, REG_CALL_NOCF_CHECK
, 0);
48455 if (JUMP_P (insn
) && !flag_cet_switch
)
48457 rtx target
= JUMP_LABEL (insn
);
48458 if (target
== NULL_RTX
|| ANY_RETURN_P (target
))
48461 /* Check the jump is a switch table. */
48462 rtx_insn
*label
= as_a
<rtx_insn
*> (target
);
48463 rtx_insn
*table
= next_insn (label
);
48464 if (table
== NULL_RTX
|| !JUMP_TABLE_DATA_P (table
))
48472 /* Calculate integer abs() using only SSE2 instructions. */
48475 ix86_expand_sse2_abs (rtx target
, rtx input
)
48477 machine_mode mode
= GET_MODE (target
);
48482 /* For 32-bit signed integer X, the best way to calculate the absolute
48483 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
48485 tmp0
= expand_simple_binop (mode
, ASHIFTRT
, input
,
48486 GEN_INT (GET_MODE_UNIT_BITSIZE (mode
) - 1),
48487 NULL
, 0, OPTAB_DIRECT
);
48488 tmp1
= expand_simple_binop (mode
, XOR
, tmp0
, input
,
48489 NULL
, 0, OPTAB_DIRECT
);
48490 x
= expand_simple_binop (mode
, MINUS
, tmp1
, tmp0
,
48491 target
, 0, OPTAB_DIRECT
);
48494 /* For 16-bit signed integer X, the best way to calculate the absolute
48495 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
48497 tmp0
= expand_unop (mode
, neg_optab
, input
, NULL_RTX
, 0);
48499 x
= expand_simple_binop (mode
, SMAX
, tmp0
, input
,
48500 target
, 0, OPTAB_DIRECT
);
48503 /* For 8-bit signed integer X, the best way to calculate the absolute
48504 value of X is min ((unsigned char) X, (unsigned char) (-X)),
48505 as SSE2 provides the PMINUB insn. */
48507 tmp0
= expand_unop (mode
, neg_optab
, input
, NULL_RTX
, 0);
48509 x
= expand_simple_binop (V16QImode
, UMIN
, tmp0
, input
,
48510 target
, 0, OPTAB_DIRECT
);
48514 gcc_unreachable ();
48518 emit_move_insn (target
, x
);
48521 /* Expand an extract from a vector register through pextr insn.
48522 Return true if successful. */
48525 ix86_expand_pextr (rtx
*operands
)
48527 rtx dst
= operands
[0];
48528 rtx src
= operands
[1];
48530 unsigned int size
= INTVAL (operands
[2]);
48531 unsigned int pos
= INTVAL (operands
[3]);
48533 if (SUBREG_P (dst
))
48535 /* Reject non-lowpart subregs. */
48536 if (SUBREG_BYTE (dst
) > 0)
48538 dst
= SUBREG_REG (dst
);
48541 if (SUBREG_P (src
))
48543 pos
+= SUBREG_BYTE (src
) * BITS_PER_UNIT
;
48544 src
= SUBREG_REG (src
);
48547 switch (GET_MODE (src
))
48556 machine_mode srcmode
, dstmode
;
48559 if (!int_mode_for_size (size
, 0).exists (&dstmode
))
48565 if (!TARGET_SSE4_1
)
48567 srcmode
= V16QImode
;
48573 srcmode
= V8HImode
;
48577 if (!TARGET_SSE4_1
)
48579 srcmode
= V4SImode
;
48583 gcc_assert (TARGET_64BIT
);
48584 if (!TARGET_SSE4_1
)
48586 srcmode
= V2DImode
;
48593 /* Reject extractions from misaligned positions. */
48594 if (pos
& (size
-1))
48597 if (GET_MODE (dst
) == dstmode
)
48600 d
= gen_reg_rtx (dstmode
);
48602 /* Construct insn pattern. */
48603 pat
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (pos
/ size
)));
48604 pat
= gen_rtx_VEC_SELECT (dstmode
, gen_lowpart (srcmode
, src
), pat
);
48606 /* Let the rtl optimizers know about the zero extension performed. */
48607 if (dstmode
== QImode
|| dstmode
== HImode
)
48609 pat
= gen_rtx_ZERO_EXTEND (SImode
, pat
);
48610 d
= gen_lowpart (SImode
, d
);
48613 emit_insn (gen_rtx_SET (d
, pat
));
48616 emit_move_insn (dst
, gen_lowpart (GET_MODE (dst
), d
));
48625 /* Expand an insert into a vector register through pinsr insn.
48626 Return true if successful. */
48629 ix86_expand_pinsr (rtx
*operands
)
48631 rtx dst
= operands
[0];
48632 rtx src
= operands
[3];
48634 unsigned int size
= INTVAL (operands
[1]);
48635 unsigned int pos
= INTVAL (operands
[2]);
48637 if (SUBREG_P (dst
))
48639 pos
+= SUBREG_BYTE (dst
) * BITS_PER_UNIT
;
48640 dst
= SUBREG_REG (dst
);
48643 switch (GET_MODE (dst
))
48652 machine_mode srcmode
, dstmode
;
48653 rtx (*pinsr
)(rtx
, rtx
, rtx
, rtx
);
48656 if (!int_mode_for_size (size
, 0).exists (&srcmode
))
48662 if (!TARGET_SSE4_1
)
48664 dstmode
= V16QImode
;
48665 pinsr
= gen_sse4_1_pinsrb
;
48671 dstmode
= V8HImode
;
48672 pinsr
= gen_sse2_pinsrw
;
48676 if (!TARGET_SSE4_1
)
48678 dstmode
= V4SImode
;
48679 pinsr
= gen_sse4_1_pinsrd
;
48683 gcc_assert (TARGET_64BIT
);
48684 if (!TARGET_SSE4_1
)
48686 dstmode
= V2DImode
;
48687 pinsr
= gen_sse4_1_pinsrq
;
48694 /* Reject insertions to misaligned positions. */
48695 if (pos
& (size
-1))
48698 if (SUBREG_P (src
))
48700 unsigned int srcpos
= SUBREG_BYTE (src
);
48706 extr_ops
[0] = gen_reg_rtx (srcmode
);
48707 extr_ops
[1] = gen_lowpart (srcmode
, SUBREG_REG (src
));
48708 extr_ops
[2] = GEN_INT (size
);
48709 extr_ops
[3] = GEN_INT (srcpos
* BITS_PER_UNIT
);
48711 if (!ix86_expand_pextr (extr_ops
))
48717 src
= gen_lowpart (srcmode
, SUBREG_REG (src
));
48720 if (GET_MODE (dst
) == dstmode
)
48723 d
= gen_reg_rtx (dstmode
);
48725 emit_insn (pinsr (d
, gen_lowpart (dstmode
, dst
),
48726 gen_lowpart (srcmode
, src
),
48727 GEN_INT (1 << (pos
/ size
))));
48729 emit_move_insn (dst
, gen_lowpart (GET_MODE (dst
), d
));
48738 /* This function returns the calling abi specific va_list type node.
48739 It returns the FNDECL specific va_list type. */
48742 ix86_fn_abi_va_list (tree fndecl
)
48745 return va_list_type_node
;
48746 gcc_assert (fndecl
!= NULL_TREE
);
48748 if (ix86_function_abi ((const_tree
) fndecl
) == MS_ABI
)
48749 return ms_va_list_type_node
;
48751 return sysv_va_list_type_node
;
48754 /* Returns the canonical va_list type specified by TYPE. If there
48755 is no valid TYPE provided, it return NULL_TREE. */
48758 ix86_canonical_va_list_type (tree type
)
48762 if (lookup_attribute ("ms_abi va_list", TYPE_ATTRIBUTES (type
)))
48763 return ms_va_list_type_node
;
48765 if ((TREE_CODE (type
) == ARRAY_TYPE
48766 && integer_zerop (array_type_nelts (type
)))
48767 || POINTER_TYPE_P (type
))
48769 tree elem_type
= TREE_TYPE (type
);
48770 if (TREE_CODE (elem_type
) == RECORD_TYPE
48771 && lookup_attribute ("sysv_abi va_list",
48772 TYPE_ATTRIBUTES (elem_type
)))
48773 return sysv_va_list_type_node
;
48779 return std_canonical_va_list_type (type
);
48782 /* Iterate through the target-specific builtin types for va_list.
48783 IDX denotes the iterator, *PTREE is set to the result type of
48784 the va_list builtin, and *PNAME to its internal type.
48785 Returns zero if there is no element for this index, otherwise
48786 IDX should be increased upon the next call.
48787 Note, do not iterate a base builtin's name like __builtin_va_list.
48788 Used from c_common_nodes_and_builtins. */
48791 ix86_enum_va_list (int idx
, const char **pname
, tree
*ptree
)
48801 *ptree
= ms_va_list_type_node
;
48802 *pname
= "__builtin_ms_va_list";
48806 *ptree
= sysv_va_list_type_node
;
48807 *pname
= "__builtin_sysv_va_list";
48815 #undef TARGET_SCHED_DISPATCH
48816 #define TARGET_SCHED_DISPATCH ix86_bd_has_dispatch
48817 #undef TARGET_SCHED_DISPATCH_DO
48818 #define TARGET_SCHED_DISPATCH_DO ix86_bd_do_dispatch
48819 #undef TARGET_SCHED_REASSOCIATION_WIDTH
48820 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
48821 #undef TARGET_SCHED_REORDER
48822 #define TARGET_SCHED_REORDER ix86_atom_sched_reorder
48823 #undef TARGET_SCHED_ADJUST_PRIORITY
48824 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
48825 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
48826 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
48827 ix86_dependencies_evaluation_hook
48830 /* Implementation of reassociation_width target hook used by
48831 reassoc phase to identify parallelism level in reassociated
48832 tree. Statements tree_code is passed in OPC. Arguments type
48833 is passed in MODE. */
48836 ix86_reassociation_width (unsigned int op
, machine_mode mode
)
48840 if (VECTOR_MODE_P (mode
))
48843 if (INTEGRAL_MODE_P (mode
))
48844 width
= ix86_cost
->reassoc_vec_int
;
48845 else if (FLOAT_MODE_P (mode
))
48846 width
= ix86_cost
->reassoc_vec_fp
;
48851 /* Integer vector instructions execute in FP unit
48852 and can execute 3 additions and one multiplication per cycle. */
48853 if (ix86_tune
== PROCESSOR_ZNVER1
&& INTEGRAL_MODE_P (mode
)
48854 && op
!= PLUS
&& op
!= MINUS
)
48857 /* Account for targets that splits wide vectors into multiple parts. */
48858 if (TARGET_AVX128_OPTIMAL
&& GET_MODE_BITSIZE (mode
) > 128)
48859 div
= GET_MODE_BITSIZE (mode
) / 128;
48860 else if (TARGET_SSE_SPLIT_REGS
&& GET_MODE_BITSIZE (mode
) > 64)
48861 div
= GET_MODE_BITSIZE (mode
) / 64;
48862 width
= (width
+ div
- 1) / div
;
48865 else if (INTEGRAL_MODE_P (mode
))
48866 width
= ix86_cost
->reassoc_int
;
48867 else if (FLOAT_MODE_P (mode
))
48868 width
= ix86_cost
->reassoc_fp
;
48870 /* Avoid using too many registers in 32bit mode. */
48871 if (!TARGET_64BIT
&& width
> 2)
48876 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
48877 place emms and femms instructions. */
48879 static machine_mode
48880 ix86_preferred_simd_mode (scalar_mode mode
)
48888 if (TARGET_AVX512BW
&& !TARGET_PREFER_AVX256
)
48890 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
48896 if (TARGET_AVX512BW
&& !TARGET_PREFER_AVX256
)
48898 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
48904 if (TARGET_AVX512F
&& !TARGET_PREFER_AVX256
)
48906 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
48912 if (TARGET_AVX512F
&& !TARGET_PREFER_AVX256
)
48914 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
48920 if (TARGET_AVX512F
&& !TARGET_PREFER_AVX256
)
48922 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
48928 if (TARGET_AVX512F
&& !TARGET_PREFER_AVX256
)
48930 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
48932 else if (TARGET_SSE2
)
48941 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
48942 vectors. If AVX512F is enabled then try vectorizing with 512bit,
48943 256bit and 128bit vectors. */
48945 static unsigned int
48946 ix86_autovectorize_vector_sizes (void)
48948 unsigned int bytesizes
= 0;
48950 if (TARGET_AVX512F
&& !TARGET_PREFER_AVX256
)
48951 bytesizes
|= (64 | 32 | 16);
48952 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
48953 bytesizes
|= (32 | 16);
48958 /* Implemenation of targetm.vectorize.get_mask_mode. */
48960 static opt_machine_mode
48961 ix86_get_mask_mode (unsigned nunits
, unsigned vector_size
)
48963 unsigned elem_size
= vector_size
/ nunits
;
48965 /* Scalar mask case. */
48966 if ((TARGET_AVX512F
&& vector_size
== 64)
48967 || (TARGET_AVX512VL
&& (vector_size
== 32 || vector_size
== 16)))
48969 if (elem_size
== 4 || elem_size
== 8 || TARGET_AVX512BW
)
48970 return smallest_int_mode_for_size (nunits
);
48973 scalar_int_mode elem_mode
48974 = smallest_int_mode_for_size (elem_size
* BITS_PER_UNIT
);
48976 gcc_assert (elem_size
* nunits
== vector_size
);
48978 return mode_for_vector (elem_mode
, nunits
);
48983 /* Return class of registers which could be used for pseudo of MODE
48984 and of class RCLASS for spilling instead of memory. Return NO_REGS
48985 if it is not possible or non-profitable. */
48987 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
48990 ix86_spill_class (reg_class_t rclass
, machine_mode mode
)
48992 if (0 && TARGET_GENERAL_REGS_SSE_SPILL
48994 && TARGET_INTER_UNIT_MOVES_TO_VEC
48995 && TARGET_INTER_UNIT_MOVES_FROM_VEC
48996 && (mode
== SImode
|| (TARGET_64BIT
&& mode
== DImode
))
48997 && INTEGER_CLASS_P (rclass
))
48998 return ALL_SSE_REGS
;
49002 /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation,
49003 but returns a lower bound. */
49005 static unsigned int
49006 ix86_max_noce_ifcvt_seq_cost (edge e
)
49008 bool predictable_p
= predictable_edge_p (e
);
49010 enum compiler_param param
49012 ? PARAM_MAX_RTL_IF_CONVERSION_PREDICTABLE_COST
49013 : PARAM_MAX_RTL_IF_CONVERSION_UNPREDICTABLE_COST
);
49015 /* If we have a parameter set, use that, otherwise take a guess using
49017 if (global_options_set
.x_param_values
[param
])
49018 return PARAM_VALUE (param
);
49020 return BRANCH_COST (true, predictable_p
) * COSTS_N_INSNS (2);
49023 /* Return true if SEQ is a good candidate as a replacement for the
49024 if-convertible sequence described in IF_INFO. */
49027 ix86_noce_conversion_profitable_p (rtx_insn
*seq
, struct noce_if_info
*if_info
)
49029 if (TARGET_ONE_IF_CONV_INSN
&& if_info
->speed_p
)
49032 /* Punt if SEQ contains more than one CMOV or FCMOV instruction.
49033 Maybe we should allow even more conditional moves as long as they
49034 are used far enough not to stall the CPU, or also consider
49035 IF_INFO->TEST_BB succ edge probabilities. */
49036 for (rtx_insn
*insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
49038 rtx set
= single_set (insn
);
49041 if (GET_CODE (SET_SRC (set
)) != IF_THEN_ELSE
)
49043 rtx src
= SET_SRC (set
);
49044 machine_mode mode
= GET_MODE (src
);
49045 if (GET_MODE_CLASS (mode
) != MODE_INT
49046 && GET_MODE_CLASS (mode
) != MODE_FLOAT
)
49048 if ((!REG_P (XEXP (src
, 1)) && !MEM_P (XEXP (src
, 1)))
49049 || (!REG_P (XEXP (src
, 2)) && !MEM_P (XEXP (src
, 2))))
49051 /* insn is CMOV or FCMOV. */
49052 if (++cmov_cnt
> 1)
49056 return default_noce_conversion_profitable_p (seq
, if_info
);
49059 /* Implement targetm.vectorize.init_cost. */
49062 ix86_init_cost (struct loop
*)
49064 unsigned *cost
= XNEWVEC (unsigned, 3);
49065 cost
[vect_prologue
] = cost
[vect_body
] = cost
[vect_epilogue
] = 0;
49069 /* Implement targetm.vectorize.add_stmt_cost. */
49072 ix86_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
49073 struct _stmt_vec_info
*stmt_info
, int misalign
,
49074 enum vect_cost_model_location where
)
49076 unsigned *cost
= (unsigned *) data
;
49077 unsigned retval
= 0;
49079 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
49080 int stmt_cost
= - 1;
49082 if ((kind
== vector_stmt
|| kind
== scalar_stmt
)
49084 && stmt_info
->stmt
&& gimple_code (stmt_info
->stmt
) == GIMPLE_ASSIGN
)
49086 tree_code subcode
= gimple_assign_rhs_code (stmt_info
->stmt
);
49088 machine_mode mode
= TImode
;
49090 if (vectype
!= NULL
)
49092 fp
= FLOAT_TYPE_P (vectype
);
49093 mode
= TYPE_MODE (vectype
);
49095 /*machine_mode inner_mode = mode;
49096 if (VECTOR_MODE_P (mode))
49097 inner_mode = GET_MODE_INNER (mode);*/
49102 case POINTER_PLUS_EXPR
:
49104 if (kind
== scalar_stmt
)
49106 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
49107 stmt_cost
= ix86_cost
->addss
;
49108 else if (X87_FLOAT_MODE_P (mode
))
49109 stmt_cost
= ix86_cost
->fadd
;
49111 stmt_cost
= ix86_cost
->add
;
49114 stmt_cost
= ix86_vec_cost (mode
,
49115 fp
? ix86_cost
->addss
49116 : ix86_cost
->sse_op
,
49121 case WIDEN_MULT_EXPR
:
49122 case MULT_HIGHPART_EXPR
:
49123 stmt_cost
= ix86_multiplication_cost (ix86_cost
, mode
);
49126 stmt_cost
= ix86_vec_cost (mode
,
49127 mode
== SFmode
? ix86_cost
->fmass
49128 : ix86_cost
->fmasd
,
49132 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
49133 stmt_cost
= ix86_cost
->sse_op
;
49134 else if (X87_FLOAT_MODE_P (mode
))
49135 stmt_cost
= ix86_cost
->fchs
;
49136 else if (VECTOR_MODE_P (mode
))
49137 stmt_cost
= ix86_vec_cost (mode
, ix86_cost
->sse_op
, true);
49139 stmt_cost
= ix86_cost
->add
;
49141 case TRUNC_DIV_EXPR
:
49142 case CEIL_DIV_EXPR
:
49143 case FLOOR_DIV_EXPR
:
49144 case ROUND_DIV_EXPR
:
49145 case TRUNC_MOD_EXPR
:
49146 case CEIL_MOD_EXPR
:
49147 case FLOOR_MOD_EXPR
:
49149 case ROUND_MOD_EXPR
:
49150 case EXACT_DIV_EXPR
:
49151 stmt_cost
= ix86_division_cost (ix86_cost
, mode
);
49159 tree op2
= gimple_assign_rhs2 (stmt_info
->stmt
);
49160 stmt_cost
= ix86_shift_rotate_cost
49162 TREE_CODE (op2
) == INTEGER_CST
,
49163 cst_and_fits_in_hwi (op2
) ? int_cst_value (op2
) : -1,
49164 true, false, false, NULL
, NULL
);
49178 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
49179 stmt_cost
= ix86_cost
->sse_op
;
49180 else if (VECTOR_MODE_P (mode
))
49181 stmt_cost
= ix86_vec_cost (mode
, ix86_cost
->sse_op
, true);
49183 stmt_cost
= ix86_cost
->add
;
49189 if (stmt_cost
== -1)
49190 stmt_cost
= ix86_builtin_vectorization_cost (kind
, vectype
, misalign
);
49192 /* Penalize DFmode vector operations for Bonnell. */
49193 if (TARGET_BONNELL
&& kind
== vector_stmt
49194 && vectype
&& GET_MODE_INNER (TYPE_MODE (vectype
)) == DFmode
)
49195 stmt_cost
*= 5; /* FIXME: The value here is arbitrary. */
49197 /* Statements in an inner loop relative to the loop being
49198 vectorized are weighted more heavily. The value here is
49199 arbitrary and could potentially be improved with analysis. */
49200 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
49201 count
*= 50; /* FIXME. */
49203 retval
= (unsigned) (count
* stmt_cost
);
49205 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
49206 for Silvermont as it has out of order integer pipeline and can execute
49207 2 scalar instruction per tick, but has in order SIMD pipeline. */
49208 if ((TARGET_SILVERMONT
|| TARGET_INTEL
)
49209 && stmt_info
&& stmt_info
->stmt
)
49211 tree lhs_op
= gimple_get_lhs (stmt_info
->stmt
);
49212 if (lhs_op
&& TREE_CODE (TREE_TYPE (lhs_op
)) == INTEGER_TYPE
)
49213 retval
= (retval
* 17) / 10;
49216 cost
[where
] += retval
;
49221 /* Implement targetm.vectorize.finish_cost. */
49224 ix86_finish_cost (void *data
, unsigned *prologue_cost
,
49225 unsigned *body_cost
, unsigned *epilogue_cost
)
49227 unsigned *cost
= (unsigned *) data
;
49228 *prologue_cost
= cost
[vect_prologue
];
49229 *body_cost
= cost
[vect_body
];
49230 *epilogue_cost
= cost
[vect_epilogue
];
49233 /* Implement targetm.vectorize.destroy_cost_data. */
49236 ix86_destroy_cost_data (void *data
)
49241 /* Validate target specific memory model bits in VAL. */
49243 static unsigned HOST_WIDE_INT
49244 ix86_memmodel_check (unsigned HOST_WIDE_INT val
)
49246 enum memmodel model
= memmodel_from_int (val
);
49249 if (val
& ~(unsigned HOST_WIDE_INT
)(IX86_HLE_ACQUIRE
|IX86_HLE_RELEASE
49251 || ((val
& IX86_HLE_ACQUIRE
) && (val
& IX86_HLE_RELEASE
)))
49253 warning (OPT_Winvalid_memory_model
,
49254 "unknown architecture specific memory model");
49255 return MEMMODEL_SEQ_CST
;
49257 strong
= (is_mm_acq_rel (model
) || is_mm_seq_cst (model
));
49258 if (val
& IX86_HLE_ACQUIRE
&& !(is_mm_acquire (model
) || strong
))
49260 warning (OPT_Winvalid_memory_model
,
49261 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
49262 return MEMMODEL_SEQ_CST
| IX86_HLE_ACQUIRE
;
49264 if (val
& IX86_HLE_RELEASE
&& !(is_mm_release (model
) || strong
))
49266 warning (OPT_Winvalid_memory_model
,
49267 "HLE_RELEASE not used with RELEASE or stronger memory model");
49268 return MEMMODEL_SEQ_CST
| IX86_HLE_RELEASE
;
49273 /* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int,
49274 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
49275 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
49276 or number of vecsize_mangle variants that should be emitted. */
49279 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node
*node
,
49280 struct cgraph_simd_clone
*clonei
,
49281 tree base_type
, int num
)
49285 if (clonei
->simdlen
49286 && (clonei
->simdlen
< 2
49287 || clonei
->simdlen
> 1024
49288 || (clonei
->simdlen
& (clonei
->simdlen
- 1)) != 0))
49290 warning_at (DECL_SOURCE_LOCATION (node
->decl
), 0,
49291 "unsupported simdlen %d", clonei
->simdlen
);
49295 tree ret_type
= TREE_TYPE (TREE_TYPE (node
->decl
));
49296 if (TREE_CODE (ret_type
) != VOID_TYPE
)
49297 switch (TYPE_MODE (ret_type
))
49305 /* case E_SCmode: */
49306 /* case E_DCmode: */
49309 warning_at (DECL_SOURCE_LOCATION (node
->decl
), 0,
49310 "unsupported return type %qT for simd\n", ret_type
);
49317 for (t
= DECL_ARGUMENTS (node
->decl
), i
= 0; t
; t
= DECL_CHAIN (t
), i
++)
49318 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
49319 switch (TYPE_MODE (TREE_TYPE (t
)))
49327 /* case E_SCmode: */
49328 /* case E_DCmode: */
49331 warning_at (DECL_SOURCE_LOCATION (node
->decl
), 0,
49332 "unsupported argument type %qT for simd\n", TREE_TYPE (t
));
49336 if (!TREE_PUBLIC (node
->decl
))
49338 /* If the function isn't exported, we can pick up just one ISA
49340 if (TARGET_AVX512F
)
49341 clonei
->vecsize_mangle
= 'e';
49342 else if (TARGET_AVX2
)
49343 clonei
->vecsize_mangle
= 'd';
49344 else if (TARGET_AVX
)
49345 clonei
->vecsize_mangle
= 'c';
49347 clonei
->vecsize_mangle
= 'b';
49352 clonei
->vecsize_mangle
= "bcde"[num
];
49355 clonei
->mask_mode
= VOIDmode
;
49356 switch (clonei
->vecsize_mangle
)
49359 clonei
->vecsize_int
= 128;
49360 clonei
->vecsize_float
= 128;
49363 clonei
->vecsize_int
= 128;
49364 clonei
->vecsize_float
= 256;
49367 clonei
->vecsize_int
= 256;
49368 clonei
->vecsize_float
= 256;
49371 clonei
->vecsize_int
= 512;
49372 clonei
->vecsize_float
= 512;
49373 if (TYPE_MODE (base_type
) == QImode
)
49374 clonei
->mask_mode
= DImode
;
49376 clonei
->mask_mode
= SImode
;
49379 if (clonei
->simdlen
== 0)
49381 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type
)))
49382 clonei
->simdlen
= clonei
->vecsize_int
;
49384 clonei
->simdlen
= clonei
->vecsize_float
;
49385 clonei
->simdlen
/= GET_MODE_BITSIZE (TYPE_MODE (base_type
));
49387 else if (clonei
->simdlen
> 16)
49389 /* For compatibility with ICC, use the same upper bounds
49390 for simdlen. In particular, for CTYPE below, use the return type,
49391 unless the function returns void, in that case use the characteristic
49392 type. If it is possible for given SIMDLEN to pass CTYPE value
49393 in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs
49394 for 64-bit code), accept that SIMDLEN, otherwise warn and don't
49395 emit corresponding clone. */
49396 tree ctype
= ret_type
;
49397 if (TREE_CODE (ret_type
) == VOID_TYPE
)
49399 int cnt
= GET_MODE_BITSIZE (TYPE_MODE (ctype
)) * clonei
->simdlen
;
49400 if (SCALAR_INT_MODE_P (TYPE_MODE (ctype
)))
49401 cnt
/= clonei
->vecsize_int
;
49403 cnt
/= clonei
->vecsize_float
;
49404 if (cnt
> (TARGET_64BIT
? 16 : 8))
49406 warning_at (DECL_SOURCE_LOCATION (node
->decl
), 0,
49407 "unsupported simdlen %d", clonei
->simdlen
);
49414 /* Add target attribute to SIMD clone NODE if needed. */
49417 ix86_simd_clone_adjust (struct cgraph_node
*node
)
49419 const char *str
= NULL
;
49420 gcc_assert (node
->decl
== cfun
->decl
);
49421 switch (node
->simdclone
->vecsize_mangle
)
49436 if (!TARGET_AVX512F
)
49440 gcc_unreachable ();
49445 tree args
= build_tree_list (NULL_TREE
, build_string (strlen (str
), str
));
49446 bool ok
= ix86_valid_target_attribute_p (node
->decl
, NULL
, args
, 0);
49449 ix86_reset_previous_fndecl ();
49450 ix86_set_current_function (node
->decl
);
49453 /* If SIMD clone NODE can't be used in a vectorized loop
49454 in current function, return -1, otherwise return a badness of using it
49455 (0 if it is most desirable from vecsize_mangle point of view, 1
49456 slightly less desirable, etc.). */
49459 ix86_simd_clone_usable (struct cgraph_node
*node
)
49461 switch (node
->simdclone
->vecsize_mangle
)
49468 return TARGET_AVX2
? 2 : 1;
49472 return TARGET_AVX2
? 1 : 0;
49478 if (!TARGET_AVX512F
)
49482 gcc_unreachable ();
49486 /* This function adjusts the unroll factor based on
49487 the hardware capabilities. For ex, bdver3 has
49488 a loop buffer which makes unrolling of smaller
49489 loops less important. This function decides the
49490 unroll factor using number of memory references
49491 (value 32 is used) as a heuristic. */
49494 ix86_loop_unroll_adjust (unsigned nunroll
, struct loop
*loop
)
49499 unsigned mem_count
= 0;
49501 if (!TARGET_ADJUST_UNROLL
)
49504 /* Count the number of memory references within the loop body.
49505 This value determines the unrolling factor for bdver3 and bdver4
49507 subrtx_iterator::array_type array
;
49508 bbs
= get_loop_body (loop
);
49509 for (i
= 0; i
< loop
->num_nodes
; i
++)
49510 FOR_BB_INSNS (bbs
[i
], insn
)
49511 if (NONDEBUG_INSN_P (insn
))
49512 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), NONCONST
)
49513 if (const_rtx x
= *iter
)
49516 machine_mode mode
= GET_MODE (x
);
49517 unsigned int n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
49525 if (mem_count
&& mem_count
<=32)
49526 return 32/mem_count
;
49532 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
49535 ix86_float_exceptions_rounding_supported_p (void)
49537 /* For x87 floating point with standard excess precision handling,
49538 there is no adddf3 pattern (since x87 floating point only has
49539 XFmode operations) so the default hook implementation gets this
49541 return TARGET_80387
|| TARGET_SSE_MATH
;
49544 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
49547 ix86_atomic_assign_expand_fenv (tree
*hold
, tree
*clear
, tree
*update
)
49549 if (!TARGET_80387
&& !TARGET_SSE_MATH
)
49551 tree exceptions_var
= create_tmp_var_raw (integer_type_node
);
49554 tree fenv_index_type
= build_index_type (size_int (6));
49555 tree fenv_type
= build_array_type (unsigned_type_node
, fenv_index_type
);
49556 tree fenv_var
= create_tmp_var_raw (fenv_type
);
49557 TREE_ADDRESSABLE (fenv_var
) = 1;
49558 tree fenv_ptr
= build_pointer_type (fenv_type
);
49559 tree fenv_addr
= build1 (ADDR_EXPR
, fenv_ptr
, fenv_var
);
49560 fenv_addr
= fold_convert (ptr_type_node
, fenv_addr
);
49561 tree fnstenv
= ix86_builtins
[IX86_BUILTIN_FNSTENV
];
49562 tree fldenv
= ix86_builtins
[IX86_BUILTIN_FLDENV
];
49563 tree fnstsw
= ix86_builtins
[IX86_BUILTIN_FNSTSW
];
49564 tree fnclex
= ix86_builtins
[IX86_BUILTIN_FNCLEX
];
49565 tree hold_fnstenv
= build_call_expr (fnstenv
, 1, fenv_addr
);
49566 tree hold_fnclex
= build_call_expr (fnclex
, 0);
49567 fenv_var
= build4 (TARGET_EXPR
, fenv_type
, fenv_var
, hold_fnstenv
,
49568 NULL_TREE
, NULL_TREE
);
49569 *hold
= build2 (COMPOUND_EXPR
, void_type_node
, fenv_var
,
49571 *clear
= build_call_expr (fnclex
, 0);
49572 tree sw_var
= create_tmp_var_raw (short_unsigned_type_node
);
49573 tree fnstsw_call
= build_call_expr (fnstsw
, 0);
49574 tree sw_mod
= build2 (MODIFY_EXPR
, short_unsigned_type_node
,
49575 sw_var
, fnstsw_call
);
49576 tree exceptions_x87
= fold_convert (integer_type_node
, sw_var
);
49577 tree update_mod
= build2 (MODIFY_EXPR
, integer_type_node
,
49578 exceptions_var
, exceptions_x87
);
49579 *update
= build2 (COMPOUND_EXPR
, integer_type_node
,
49580 sw_mod
, update_mod
);
49581 tree update_fldenv
= build_call_expr (fldenv
, 1, fenv_addr
);
49582 *update
= build2 (COMPOUND_EXPR
, void_type_node
, *update
, update_fldenv
);
49584 if (TARGET_SSE_MATH
)
49586 tree mxcsr_orig_var
= create_tmp_var_raw (unsigned_type_node
);
49587 tree mxcsr_mod_var
= create_tmp_var_raw (unsigned_type_node
);
49588 tree stmxcsr
= ix86_builtins
[IX86_BUILTIN_STMXCSR
];
49589 tree ldmxcsr
= ix86_builtins
[IX86_BUILTIN_LDMXCSR
];
49590 tree stmxcsr_hold_call
= build_call_expr (stmxcsr
, 0);
49591 tree hold_assign_orig
= build2 (MODIFY_EXPR
, unsigned_type_node
,
49592 mxcsr_orig_var
, stmxcsr_hold_call
);
49593 tree hold_mod_val
= build2 (BIT_IOR_EXPR
, unsigned_type_node
,
49595 build_int_cst (unsigned_type_node
, 0x1f80));
49596 hold_mod_val
= build2 (BIT_AND_EXPR
, unsigned_type_node
, hold_mod_val
,
49597 build_int_cst (unsigned_type_node
, 0xffffffc0));
49598 tree hold_assign_mod
= build2 (MODIFY_EXPR
, unsigned_type_node
,
49599 mxcsr_mod_var
, hold_mod_val
);
49600 tree ldmxcsr_hold_call
= build_call_expr (ldmxcsr
, 1, mxcsr_mod_var
);
49601 tree hold_all
= build2 (COMPOUND_EXPR
, unsigned_type_node
,
49602 hold_assign_orig
, hold_assign_mod
);
49603 hold_all
= build2 (COMPOUND_EXPR
, void_type_node
, hold_all
,
49604 ldmxcsr_hold_call
);
49606 *hold
= build2 (COMPOUND_EXPR
, void_type_node
, *hold
, hold_all
);
49609 tree ldmxcsr_clear_call
= build_call_expr (ldmxcsr
, 1, mxcsr_mod_var
);
49611 *clear
= build2 (COMPOUND_EXPR
, void_type_node
, *clear
,
49612 ldmxcsr_clear_call
);
49614 *clear
= ldmxcsr_clear_call
;
49615 tree stxmcsr_update_call
= build_call_expr (stmxcsr
, 0);
49616 tree exceptions_sse
= fold_convert (integer_type_node
,
49617 stxmcsr_update_call
);
49620 tree exceptions_mod
= build2 (BIT_IOR_EXPR
, integer_type_node
,
49621 exceptions_var
, exceptions_sse
);
49622 tree exceptions_assign
= build2 (MODIFY_EXPR
, integer_type_node
,
49623 exceptions_var
, exceptions_mod
);
49624 *update
= build2 (COMPOUND_EXPR
, integer_type_node
, *update
,
49625 exceptions_assign
);
49628 *update
= build2 (MODIFY_EXPR
, integer_type_node
,
49629 exceptions_var
, exceptions_sse
);
49630 tree ldmxcsr_update_call
= build_call_expr (ldmxcsr
, 1, mxcsr_orig_var
);
49631 *update
= build2 (COMPOUND_EXPR
, void_type_node
, *update
,
49632 ldmxcsr_update_call
);
49634 tree atomic_feraiseexcept
49635 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT
);
49636 tree atomic_feraiseexcept_call
= build_call_expr (atomic_feraiseexcept
,
49637 1, exceptions_var
);
49638 *update
= build2 (COMPOUND_EXPR
, void_type_node
, *update
,
49639 atomic_feraiseexcept_call
);
49642 /* Return mode to be used for bounds or VOIDmode
49643 if bounds are not supported. */
49645 static machine_mode
49646 ix86_mpx_bound_mode ()
49648 /* Do not support pointer checker if MPX
49652 if (flag_check_pointer_bounds
)
49653 warning (0, "Pointer Checker requires MPX support on this target."
49654 " Use -mmpx options to enable MPX.");
49661 /* Return constant used to statically initialize constant bounds.
49663 This function is used to create special bound values. For now
49664 only INIT bounds and NONE bounds are expected. More special
49665 values may be added later. */
49668 ix86_make_bounds_constant (HOST_WIDE_INT lb
, HOST_WIDE_INT ub
)
49670 tree low
= lb
? build_minus_one_cst (pointer_sized_int_node
)
49671 : build_zero_cst (pointer_sized_int_node
);
49672 tree high
= ub
? build_zero_cst (pointer_sized_int_node
)
49673 : build_minus_one_cst (pointer_sized_int_node
);
49675 /* This function is supposed to be used to create INIT and
49676 NONE bounds only. */
49677 gcc_assert ((lb
== 0 && ub
== -1)
49678 || (lb
== -1 && ub
== 0));
49680 return build_complex (NULL
, low
, high
);
49683 /* Generate a list of statements STMTS to initialize pointer bounds
49684 variable VAR with bounds LB and UB. Return the number of generated
49688 ix86_initialize_bounds (tree var
, tree lb
, tree ub
, tree
*stmts
)
49690 tree bnd_ptr
= build_pointer_type (pointer_sized_int_node
);
49691 tree lhs
, modify
, var_p
;
49693 ub
= build1 (BIT_NOT_EXPR
, pointer_sized_int_node
, ub
);
49694 var_p
= fold_convert (bnd_ptr
, build_fold_addr_expr (var
));
49696 lhs
= build1 (INDIRECT_REF
, pointer_sized_int_node
, var_p
);
49697 modify
= build2 (MODIFY_EXPR
, TREE_TYPE (lhs
), lhs
, lb
);
49698 append_to_statement_list (modify
, stmts
);
49700 lhs
= build1 (INDIRECT_REF
, pointer_sized_int_node
,
49701 build2 (POINTER_PLUS_EXPR
, bnd_ptr
, var_p
,
49702 TYPE_SIZE_UNIT (pointer_sized_int_node
)));
49703 modify
= build2 (MODIFY_EXPR
, TREE_TYPE (lhs
), lhs
, ub
);
49704 append_to_statement_list (modify
, stmts
);
49709 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
49710 /* For i386, common symbol is local only for non-PIE binaries. For
49711 x86-64, common symbol is local only for non-PIE binaries or linker
49712 supports copy reloc in PIE binaries. */
49715 ix86_binds_local_p (const_tree exp
)
49717 return default_binds_local_p_3 (exp
, flag_shlib
!= 0, true, true,
49720 && HAVE_LD_PIE_COPYRELOC
!= 0)));
49724 /* If MEM is in the form of [base+offset], extract the two parts
49725 of address and set to BASE and OFFSET, otherwise return false. */
49728 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
49732 gcc_assert (MEM_P (mem
));
49734 addr
= XEXP (mem
, 0);
49736 if (GET_CODE (addr
) == CONST
)
49737 addr
= XEXP (addr
, 0);
49739 if (REG_P (addr
) || GET_CODE (addr
) == SYMBOL_REF
)
49742 *offset
= const0_rtx
;
49746 if (GET_CODE (addr
) == PLUS
49747 && (REG_P (XEXP (addr
, 0))
49748 || GET_CODE (XEXP (addr
, 0)) == SYMBOL_REF
)
49749 && CONST_INT_P (XEXP (addr
, 1)))
49751 *base
= XEXP (addr
, 0);
49752 *offset
= XEXP (addr
, 1);
49759 /* Given OPERANDS of consecutive load/store, check if we can merge
49760 them into move multiple. LOAD is true if they are load instructions.
49761 MODE is the mode of memory operands. */
49764 ix86_operands_ok_for_move_multiple (rtx
*operands
, bool load
,
49767 HOST_WIDE_INT offval_1
, offval_2
, msize
;
49768 rtx mem_1
, mem_2
, reg_1
, reg_2
, base_1
, base_2
, offset_1
, offset_2
;
49772 mem_1
= operands
[1];
49773 mem_2
= operands
[3];
49774 reg_1
= operands
[0];
49775 reg_2
= operands
[2];
49779 mem_1
= operands
[0];
49780 mem_2
= operands
[2];
49781 reg_1
= operands
[1];
49782 reg_2
= operands
[3];
49785 gcc_assert (REG_P (reg_1
) && REG_P (reg_2
));
49787 if (REGNO (reg_1
) != REGNO (reg_2
))
49790 /* Check if the addresses are in the form of [base+offset]. */
49791 if (!extract_base_offset_in_addr (mem_1
, &base_1
, &offset_1
))
49793 if (!extract_base_offset_in_addr (mem_2
, &base_2
, &offset_2
))
49796 /* Check if the bases are the same. */
49797 if (!rtx_equal_p (base_1
, base_2
))
49800 offval_1
= INTVAL (offset_1
);
49801 offval_2
= INTVAL (offset_2
);
49802 msize
= GET_MODE_SIZE (mode
);
49803 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
49804 if (offval_1
+ msize
!= offval_2
)
49810 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
49813 ix86_optab_supported_p (int op
, machine_mode mode1
, machine_mode
,
49814 optimization_type opt_type
)
49828 return opt_type
== OPTIMIZE_FOR_SPEED
;
49831 if (SSE_FLOAT_MODE_P (mode1
)
49833 && !flag_trapping_math
49835 return opt_type
== OPTIMIZE_FOR_SPEED
;
49841 if (SSE_FLOAT_MODE_P (mode1
)
49843 && !flag_trapping_math
49846 return opt_type
== OPTIMIZE_FOR_SPEED
;
49849 return opt_type
== OPTIMIZE_FOR_SPEED
&& use_rsqrt_p ();
49856 /* Address space support.
49858 This is not "far pointers" in the 16-bit sense, but an easy way
49859 to use %fs and %gs segment prefixes. Therefore:
49861 (a) All address spaces have the same modes,
49862 (b) All address spaces have the same addresss forms,
49863 (c) While %fs and %gs are technically subsets of the generic
49864 address space, they are probably not subsets of each other.
49865 (d) Since we have no access to the segment base register values
49866 without resorting to a system call, we cannot convert a
49867 non-default address space to a default address space.
49868 Therefore we do not claim %fs or %gs are subsets of generic.
49870 Therefore we can (mostly) use the default hooks. */
49872 /* All use of segmentation is assumed to make address 0 valid. */
49875 ix86_addr_space_zero_address_valid (addr_space_t as
)
49877 return as
!= ADDR_SPACE_GENERIC
;
49881 ix86_init_libfuncs (void)
49885 set_optab_libfunc (sdivmod_optab
, TImode
, "__divmodti4");
49886 set_optab_libfunc (udivmod_optab
, TImode
, "__udivmodti4");
49890 set_optab_libfunc (sdivmod_optab
, DImode
, "__divmoddi4");
49891 set_optab_libfunc (udivmod_optab
, DImode
, "__udivmoddi4");
49895 darwin_rename_builtins ();
49899 /* Generate call to __divmoddi4. */
49902 ix86_expand_divmod_libfunc (rtx libfunc
, machine_mode mode
,
49904 rtx
*quot_p
, rtx
*rem_p
)
49906 rtx rem
= assign_386_stack_local (mode
, SLOT_TEMP
);
49908 rtx quot
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_NORMAL
,
49910 op0
, GET_MODE (op0
),
49911 op1
, GET_MODE (op1
),
49912 XEXP (rem
, 0), Pmode
);
49917 /* Set the value of FLT_EVAL_METHOD in float.h. When using only the
49918 FPU, assume that the fpcw is set to extended precision; when using
49919 only SSE, rounding is correct; when using both SSE and the FPU,
49920 the rounding precision is indeterminate, since either may be chosen
49921 apparently at random. */
49923 static enum flt_eval_method
49924 ix86_excess_precision (enum excess_precision_type type
)
49928 case EXCESS_PRECISION_TYPE_FAST
:
49929 /* The fastest type to promote to will always be the native type,
49930 whether that occurs with implicit excess precision or
49932 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
;
49933 case EXCESS_PRECISION_TYPE_STANDARD
:
49934 case EXCESS_PRECISION_TYPE_IMPLICIT
:
49935 /* Otherwise, the excess precision we want when we are
49936 in a standards compliant mode, and the implicit precision we
49937 provide would be identical were it not for the unpredictable
49940 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
;
49941 else if (!TARGET_MIX_SSE_I387
)
49943 if (!TARGET_SSE_MATH
)
49944 return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE
;
49945 else if (TARGET_SSE2
)
49946 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
;
49949 /* If we are in standards compliant mode, but we know we will
49950 calculate in unpredictable precision, return
49951 FLT_EVAL_METHOD_FLOAT. There is no reason to introduce explicit
49952 excess precision if the target can't guarantee it will honor
49954 return (type
== EXCESS_PRECISION_TYPE_STANDARD
49955 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
49956 : FLT_EVAL_METHOD_UNPREDICTABLE
);
49958 gcc_unreachable ();
49961 return FLT_EVAL_METHOD_UNPREDICTABLE
;
49964 /* Target-specific selftests. */
49968 namespace selftest
{
49970 /* Verify that hard regs are dumped as expected (in compact mode). */
49973 ix86_test_dumping_hard_regs ()
49975 ASSERT_RTL_DUMP_EQ ("(reg:SI ax)", gen_raw_REG (SImode
, 0));
49976 ASSERT_RTL_DUMP_EQ ("(reg:SI dx)", gen_raw_REG (SImode
, 1));
49979 /* Test dumping an insn with repeated references to the same SCRATCH,
49980 to verify the rtx_reuse code. */
49983 ix86_test_dumping_memory_blockage ()
49985 set_new_first_and_last_insn (NULL
, NULL
);
49987 rtx pat
= gen_memory_blockage ();
49988 rtx_reuse_manager r
;
49989 r
.preprocess (pat
);
49991 /* Verify that the repeated references to the SCRATCH show use
49992 reuse IDS. The first should be prefixed with a reuse ID,
49993 and the second should be dumped as a "reuse_rtx" of that ID.
49994 The expected string assumes Pmode == DImode. */
49995 if (Pmode
== DImode
)
49996 ASSERT_RTL_DUMP_EQ_WITH_REUSE
49997 ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0 A8])\n"
49999 " (mem/v:BLK (reuse_rtx 0) [0 A8])\n"
50000 " ] UNSPEC_MEMORY_BLOCKAGE)))\n", pat
, &r
);
50003 /* Verify loading an RTL dump; specifically a dump of copying
50004 a param on x86_64 from a hard reg into the frame.
50005 This test is target-specific since the dump contains target-specific
50009 ix86_test_loading_dump_fragment_1 ()
50011 rtl_dump_test
t (SELFTEST_LOCATION
,
50012 locate_file ("x86_64/copy-hard-reg-into-frame.rtl"));
50014 rtx_insn
*insn
= get_insn_by_uid (1);
50016 /* The block structure and indentation here is purely for
50017 readability; it mirrors the structure of the rtx. */
50020 rtx pat
= PATTERN (insn
);
50021 ASSERT_EQ (SET
, GET_CODE (pat
));
50023 rtx dest
= SET_DEST (pat
);
50024 ASSERT_EQ (MEM
, GET_CODE (dest
));
50025 /* Verify the "/c" was parsed. */
50026 ASSERT_TRUE (RTX_FLAG (dest
, call
));
50027 ASSERT_EQ (SImode
, GET_MODE (dest
));
50029 rtx addr
= XEXP (dest
, 0);
50030 ASSERT_EQ (PLUS
, GET_CODE (addr
));
50031 ASSERT_EQ (DImode
, GET_MODE (addr
));
50033 rtx lhs
= XEXP (addr
, 0);
50034 /* Verify that the "frame" REG was consolidated. */
50035 ASSERT_RTX_PTR_EQ (frame_pointer_rtx
, lhs
);
50038 rtx rhs
= XEXP (addr
, 1);
50039 ASSERT_EQ (CONST_INT
, GET_CODE (rhs
));
50040 ASSERT_EQ (-4, INTVAL (rhs
));
50043 /* Verify the "[1 i+0 S4 A32]" was parsed. */
50044 ASSERT_EQ (1, MEM_ALIAS_SET (dest
));
50045 /* "i" should have been handled by synthesizing a global int
50046 variable named "i". */
50047 mem_expr
= MEM_EXPR (dest
);
50048 ASSERT_NE (mem_expr
, NULL
);
50049 ASSERT_EQ (VAR_DECL
, TREE_CODE (mem_expr
));
50050 ASSERT_EQ (integer_type_node
, TREE_TYPE (mem_expr
));
50051 ASSERT_EQ (IDENTIFIER_NODE
, TREE_CODE (DECL_NAME (mem_expr
)));
50052 ASSERT_STREQ ("i", IDENTIFIER_POINTER (DECL_NAME (mem_expr
)));
50054 ASSERT_TRUE (MEM_OFFSET_KNOWN_P (dest
));
50055 ASSERT_EQ (0, MEM_OFFSET (dest
));
50057 ASSERT_EQ (4, MEM_SIZE (dest
));
50059 ASSERT_EQ (32, MEM_ALIGN (dest
));
50062 rtx src
= SET_SRC (pat
);
50063 ASSERT_EQ (REG
, GET_CODE (src
));
50064 ASSERT_EQ (SImode
, GET_MODE (src
));
50065 ASSERT_EQ (5, REGNO (src
));
50066 tree reg_expr
= REG_EXPR (src
);
50067 /* "i" here should point to the same var as for the MEM_EXPR. */
50068 ASSERT_EQ (reg_expr
, mem_expr
);
50073 /* Verify that the RTL loader copes with a call_insn dump.
50074 This test is target-specific since the dump contains a target-specific
50078 ix86_test_loading_call_insn ()
50080 /* The test dump includes register "xmm0", where requires TARGET_SSE
50085 rtl_dump_test
t (SELFTEST_LOCATION
, locate_file ("x86_64/call-insn.rtl"));
50087 rtx_insn
*insn
= get_insns ();
50088 ASSERT_EQ (CALL_INSN
, GET_CODE (insn
));
50091 ASSERT_TRUE (RTX_FLAG (insn
, jump
));
50093 rtx pat
= PATTERN (insn
);
50094 ASSERT_EQ (CALL
, GET_CODE (SET_SRC (pat
)));
50096 /* Verify REG_NOTES. */
50098 /* "(expr_list:REG_CALL_DECL". */
50099 ASSERT_EQ (EXPR_LIST
, GET_CODE (REG_NOTES (insn
)));
50100 rtx_expr_list
*note0
= as_a
<rtx_expr_list
*> (REG_NOTES (insn
));
50101 ASSERT_EQ (REG_CALL_DECL
, REG_NOTE_KIND (note0
));
50103 /* "(expr_list:REG_EH_REGION (const_int 0 [0])". */
50104 rtx_expr_list
*note1
= note0
->next ();
50105 ASSERT_EQ (REG_EH_REGION
, REG_NOTE_KIND (note1
));
50107 ASSERT_EQ (NULL
, note1
->next ());
50110 /* Verify CALL_INSN_FUNCTION_USAGE. */
50112 /* "(expr_list:DF (use (reg:DF 21 xmm0))". */
50113 rtx_expr_list
*usage
50114 = as_a
<rtx_expr_list
*> (CALL_INSN_FUNCTION_USAGE (insn
));
50115 ASSERT_EQ (EXPR_LIST
, GET_CODE (usage
));
50116 ASSERT_EQ (DFmode
, GET_MODE (usage
));
50117 ASSERT_EQ (USE
, GET_CODE (usage
->element ()));
50118 ASSERT_EQ (NULL
, usage
->next ());
50122 /* Verify that the RTL loader copes a dump from print_rtx_function.
50123 This test is target-specific since the dump contains target-specific
50127 ix86_test_loading_full_dump ()
50129 rtl_dump_test
t (SELFTEST_LOCATION
, locate_file ("x86_64/times-two.rtl"));
50131 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun
->decl
)));
50133 rtx_insn
*insn_1
= get_insn_by_uid (1);
50134 ASSERT_EQ (NOTE
, GET_CODE (insn_1
));
50136 rtx_insn
*insn_7
= get_insn_by_uid (7);
50137 ASSERT_EQ (INSN
, GET_CODE (insn_7
));
50138 ASSERT_EQ (PARALLEL
, GET_CODE (PATTERN (insn_7
)));
50140 rtx_insn
*insn_15
= get_insn_by_uid (15);
50141 ASSERT_EQ (INSN
, GET_CODE (insn_15
));
50142 ASSERT_EQ (USE
, GET_CODE (PATTERN (insn_15
)));
50144 /* Verify crtl->return_rtx. */
50145 ASSERT_EQ (REG
, GET_CODE (crtl
->return_rtx
));
50146 ASSERT_EQ (0, REGNO (crtl
->return_rtx
));
50147 ASSERT_EQ (SImode
, GET_MODE (crtl
->return_rtx
));
50150 /* Verify that the RTL loader copes with UNSPEC and UNSPEC_VOLATILE insns.
50151 In particular, verify that it correctly loads the 2nd operand.
50152 This test is target-specific since these are machine-specific
50153 operands (and enums). */
50156 ix86_test_loading_unspec ()
50158 rtl_dump_test
t (SELFTEST_LOCATION
, locate_file ("x86_64/unspec.rtl"));
50160 ASSERT_STREQ ("test_unspec", IDENTIFIER_POINTER (DECL_NAME (cfun
->decl
)));
50162 ASSERT_TRUE (cfun
);
50164 /* Test of an UNSPEC. */
50165 rtx_insn
*insn
= get_insns ();
50166 ASSERT_EQ (INSN
, GET_CODE (insn
));
50167 rtx set
= single_set (insn
);
50168 ASSERT_NE (NULL
, set
);
50169 rtx dst
= SET_DEST (set
);
50170 ASSERT_EQ (MEM
, GET_CODE (dst
));
50171 rtx src
= SET_SRC (set
);
50172 ASSERT_EQ (UNSPEC
, GET_CODE (src
));
50173 ASSERT_EQ (BLKmode
, GET_MODE (src
));
50174 ASSERT_EQ (UNSPEC_MEMORY_BLOCKAGE
, XINT (src
, 1));
50176 rtx v0
= XVECEXP (src
, 0, 0);
50178 /* Verify that the two uses of the first SCRATCH have pointer
50180 rtx scratch_a
= XEXP (dst
, 0);
50181 ASSERT_EQ (SCRATCH
, GET_CODE (scratch_a
));
50183 rtx scratch_b
= XEXP (v0
, 0);
50184 ASSERT_EQ (SCRATCH
, GET_CODE (scratch_b
));
50186 ASSERT_EQ (scratch_a
, scratch_b
);
50188 /* Verify that the two mems are thus treated as equal. */
50189 ASSERT_TRUE (rtx_equal_p (dst
, v0
));
50191 /* Verify the the insn is recognized. */
50192 ASSERT_NE(-1, recog_memoized (insn
));
50194 /* Test of an UNSPEC_VOLATILE, which has its own enum values. */
50195 insn
= NEXT_INSN (insn
);
50196 ASSERT_EQ (INSN
, GET_CODE (insn
));
50198 set
= single_set (insn
);
50199 ASSERT_NE (NULL
, set
);
50201 src
= SET_SRC (set
);
50202 ASSERT_EQ (UNSPEC_VOLATILE
, GET_CODE (src
));
50203 ASSERT_EQ (UNSPECV_RDTSCP
, XINT (src
, 1));
50206 /* Run all target-specific selftests. */
50209 ix86_run_selftests (void)
50211 ix86_test_dumping_hard_regs ();
50212 ix86_test_dumping_memory_blockage ();
50214 /* Various tests of loading RTL dumps, here because they contain
50215 ix86-isms (e.g. names of hard regs). */
50216 ix86_test_loading_dump_fragment_1 ();
50217 ix86_test_loading_call_insn ();
50218 ix86_test_loading_full_dump ();
50219 ix86_test_loading_unspec ();
50222 } // namespace selftest
50224 #endif /* CHECKING_P */
50226 /* Initialize the GCC target structure. */
50227 #undef TARGET_RETURN_IN_MEMORY
50228 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
50230 #undef TARGET_LEGITIMIZE_ADDRESS
50231 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
50233 #undef TARGET_ATTRIBUTE_TABLE
50234 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
50235 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
50236 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
50237 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
50238 # undef TARGET_MERGE_DECL_ATTRIBUTES
50239 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
50242 #undef TARGET_COMP_TYPE_ATTRIBUTES
50243 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
50245 #undef TARGET_INIT_BUILTINS
50246 #define TARGET_INIT_BUILTINS ix86_init_builtins
50247 #undef TARGET_BUILTIN_DECL
50248 #define TARGET_BUILTIN_DECL ix86_builtin_decl
50249 #undef TARGET_EXPAND_BUILTIN
50250 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
50252 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
50253 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
50254 ix86_builtin_vectorized_function
50256 #undef TARGET_VECTORIZE_BUILTIN_GATHER
50257 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
50259 #undef TARGET_VECTORIZE_BUILTIN_SCATTER
50260 #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
50262 #undef TARGET_BUILTIN_RECIPROCAL
50263 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
50265 #undef TARGET_ASM_FUNCTION_EPILOGUE
50266 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
50268 #undef TARGET_ENCODE_SECTION_INFO
50269 #ifndef SUBTARGET_ENCODE_SECTION_INFO
50270 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
50272 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
50275 #undef TARGET_ASM_OPEN_PAREN
50276 #define TARGET_ASM_OPEN_PAREN ""
50277 #undef TARGET_ASM_CLOSE_PAREN
50278 #define TARGET_ASM_CLOSE_PAREN ""
50280 #undef TARGET_ASM_BYTE_OP
50281 #define TARGET_ASM_BYTE_OP ASM_BYTE
50283 #undef TARGET_ASM_ALIGNED_HI_OP
50284 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
50285 #undef TARGET_ASM_ALIGNED_SI_OP
50286 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
50288 #undef TARGET_ASM_ALIGNED_DI_OP
50289 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
50292 #undef TARGET_PROFILE_BEFORE_PROLOGUE
50293 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
50295 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
50296 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
50298 #undef TARGET_ASM_UNALIGNED_HI_OP
50299 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
50300 #undef TARGET_ASM_UNALIGNED_SI_OP
50301 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
50302 #undef TARGET_ASM_UNALIGNED_DI_OP
50303 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
50305 #undef TARGET_PRINT_OPERAND
50306 #define TARGET_PRINT_OPERAND ix86_print_operand
50307 #undef TARGET_PRINT_OPERAND_ADDRESS
50308 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
50309 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
50310 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
50311 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
50312 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
50314 #undef TARGET_SCHED_INIT_GLOBAL
50315 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
50316 #undef TARGET_SCHED_ADJUST_COST
50317 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
50318 #undef TARGET_SCHED_ISSUE_RATE
50319 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
50320 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
50321 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
50322 ia32_multipass_dfa_lookahead
50323 #undef TARGET_SCHED_MACRO_FUSION_P
50324 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
50325 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
50326 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
50328 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
50329 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
50331 #undef TARGET_MEMMODEL_CHECK
50332 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
50334 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
50335 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
50338 #undef TARGET_HAVE_TLS
50339 #define TARGET_HAVE_TLS true
50341 #undef TARGET_CANNOT_FORCE_CONST_MEM
50342 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
50343 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
50344 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
50346 #undef TARGET_DELEGITIMIZE_ADDRESS
50347 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
50349 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
50350 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P ix86_const_not_ok_for_debug_p
50352 #undef TARGET_MS_BITFIELD_LAYOUT_P
50353 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
50356 #undef TARGET_BINDS_LOCAL_P
50357 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
50359 #undef TARGET_BINDS_LOCAL_P
50360 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
50362 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
50363 #undef TARGET_BINDS_LOCAL_P
50364 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
50367 #undef TARGET_ASM_OUTPUT_MI_THUNK
50368 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
50369 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
50370 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
50372 #undef TARGET_ASM_FILE_START
50373 #define TARGET_ASM_FILE_START x86_file_start
50375 #undef TARGET_OPTION_OVERRIDE
50376 #define TARGET_OPTION_OVERRIDE ix86_option_override
50378 #undef TARGET_REGISTER_MOVE_COST
50379 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
50380 #undef TARGET_MEMORY_MOVE_COST
50381 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
50382 #undef TARGET_RTX_COSTS
50383 #define TARGET_RTX_COSTS ix86_rtx_costs
50384 #undef TARGET_ADDRESS_COST
50385 #define TARGET_ADDRESS_COST ix86_address_cost
50387 #undef TARGET_FLAGS_REGNUM
50388 #define TARGET_FLAGS_REGNUM FLAGS_REG
50389 #undef TARGET_FIXED_CONDITION_CODE_REGS
50390 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
50391 #undef TARGET_CC_MODES_COMPATIBLE
50392 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
50394 #undef TARGET_MACHINE_DEPENDENT_REORG
50395 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
50397 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
50398 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
50400 #undef TARGET_BUILD_BUILTIN_VA_LIST
50401 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
50403 #undef TARGET_FOLD_BUILTIN
50404 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
50406 #undef TARGET_GIMPLE_FOLD_BUILTIN
50407 #define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin
50409 #undef TARGET_COMPARE_VERSION_PRIORITY
50410 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
50412 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
50413 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
50414 ix86_generate_version_dispatcher_body
50416 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
50417 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
50418 ix86_get_function_versions_dispatcher
50420 #undef TARGET_ENUM_VA_LIST_P
50421 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
50423 #undef TARGET_FN_ABI_VA_LIST
50424 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
50426 #undef TARGET_CANONICAL_VA_LIST_TYPE
50427 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
50429 #undef TARGET_EXPAND_BUILTIN_VA_START
50430 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
50432 #undef TARGET_MD_ASM_ADJUST
50433 #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
50435 #undef TARGET_C_EXCESS_PRECISION
50436 #define TARGET_C_EXCESS_PRECISION ix86_excess_precision
50437 #undef TARGET_PROMOTE_PROTOTYPES
50438 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
50439 #undef TARGET_SETUP_INCOMING_VARARGS
50440 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
50441 #undef TARGET_MUST_PASS_IN_STACK
50442 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
50443 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
50444 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args
50445 #undef TARGET_FUNCTION_ARG_ADVANCE
50446 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
50447 #undef TARGET_FUNCTION_ARG
50448 #define TARGET_FUNCTION_ARG ix86_function_arg
50449 #undef TARGET_INIT_PIC_REG
50450 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
50451 #undef TARGET_USE_PSEUDO_PIC_REG
50452 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
50453 #undef TARGET_FUNCTION_ARG_BOUNDARY
50454 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
50455 #undef TARGET_PASS_BY_REFERENCE
50456 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
50457 #undef TARGET_INTERNAL_ARG_POINTER
50458 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
50459 #undef TARGET_UPDATE_STACK_BOUNDARY
50460 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
50461 #undef TARGET_GET_DRAP_RTX
50462 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
50463 #undef TARGET_STRICT_ARGUMENT_NAMING
50464 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
50465 #undef TARGET_STATIC_CHAIN
50466 #define TARGET_STATIC_CHAIN ix86_static_chain
50467 #undef TARGET_TRAMPOLINE_INIT
50468 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
50469 #undef TARGET_RETURN_POPS_ARGS
50470 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
50472 #undef TARGET_WARN_FUNC_RETURN
50473 #define TARGET_WARN_FUNC_RETURN ix86_warn_func_return
50475 #undef TARGET_LEGITIMATE_COMBINED_INSN
50476 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
50478 #undef TARGET_ASAN_SHADOW_OFFSET
50479 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
50481 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
50482 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
50484 #undef TARGET_SCALAR_MODE_SUPPORTED_P
50485 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
50487 #undef TARGET_VECTOR_MODE_SUPPORTED_P
50488 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
50490 #undef TARGET_C_MODE_FOR_SUFFIX
50491 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
50494 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
50495 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
50498 #ifdef SUBTARGET_INSERT_ATTRIBUTES
50499 #undef TARGET_INSERT_ATTRIBUTES
50500 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
50503 #undef TARGET_MANGLE_TYPE
50504 #define TARGET_MANGLE_TYPE ix86_mangle_type
50506 #undef TARGET_STACK_PROTECT_GUARD
50507 #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
50510 #undef TARGET_STACK_PROTECT_FAIL
50511 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
50514 #undef TARGET_FUNCTION_VALUE
50515 #define TARGET_FUNCTION_VALUE ix86_function_value
50517 #undef TARGET_FUNCTION_VALUE_REGNO_P
50518 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
50520 #undef TARGET_PROMOTE_FUNCTION_MODE
50521 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
50523 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
50524 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
50526 #undef TARGET_MEMBER_TYPE_FORCES_BLK
50527 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
50529 #undef TARGET_INSTANTIATE_DECLS
50530 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
50532 #undef TARGET_SECONDARY_RELOAD
50533 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
50534 #undef TARGET_SECONDARY_MEMORY_NEEDED
50535 #define TARGET_SECONDARY_MEMORY_NEEDED ix86_secondary_memory_needed
50536 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
50537 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE ix86_secondary_memory_needed_mode
50539 #undef TARGET_CLASS_MAX_NREGS
50540 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
50542 #undef TARGET_PREFERRED_RELOAD_CLASS
50543 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
50544 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
50545 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
50546 #undef TARGET_CLASS_LIKELY_SPILLED_P
50547 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
50549 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
50550 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
50551 ix86_builtin_vectorization_cost
50552 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
50553 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
50554 ix86_vectorize_vec_perm_const_ok
50555 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
50556 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
50557 ix86_preferred_simd_mode
50558 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
50559 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
50560 ix86_autovectorize_vector_sizes
50561 #undef TARGET_VECTORIZE_GET_MASK_MODE
50562 #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
50563 #undef TARGET_VECTORIZE_INIT_COST
50564 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
50565 #undef TARGET_VECTORIZE_ADD_STMT_COST
50566 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
50567 #undef TARGET_VECTORIZE_FINISH_COST
50568 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
50569 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
50570 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
50572 #undef TARGET_SET_CURRENT_FUNCTION
50573 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
50575 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
50576 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
50578 #undef TARGET_OPTION_SAVE
50579 #define TARGET_OPTION_SAVE ix86_function_specific_save
50581 #undef TARGET_OPTION_RESTORE
50582 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
50584 #undef TARGET_OPTION_POST_STREAM_IN
50585 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
50587 #undef TARGET_OPTION_PRINT
50588 #define TARGET_OPTION_PRINT ix86_function_specific_print
50590 #undef TARGET_OPTION_FUNCTION_VERSIONS
50591 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
50593 #undef TARGET_CAN_INLINE_P
50594 #define TARGET_CAN_INLINE_P ix86_can_inline_p
50596 #undef TARGET_LEGITIMATE_ADDRESS_P
50597 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
50599 #undef TARGET_REGISTER_PRIORITY
50600 #define TARGET_REGISTER_PRIORITY ix86_register_priority
50602 #undef TARGET_REGISTER_USAGE_LEVELING_P
50603 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
50605 #undef TARGET_LEGITIMATE_CONSTANT_P
50606 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
50608 #undef TARGET_COMPUTE_FRAME_LAYOUT
50609 #define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout
50611 #undef TARGET_FRAME_POINTER_REQUIRED
50612 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
50614 #undef TARGET_CAN_ELIMINATE
50615 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
50617 #undef TARGET_EXTRA_LIVE_ON_ENTRY
50618 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
50620 #undef TARGET_ASM_CODE_END
50621 #define TARGET_ASM_CODE_END ix86_code_end
50623 #undef TARGET_CONDITIONAL_REGISTER_USAGE
50624 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
50626 #undef TARGET_CANONICALIZE_COMPARISON
50627 #define TARGET_CANONICALIZE_COMPARISON ix86_canonicalize_comparison
50629 #undef TARGET_LOOP_UNROLL_ADJUST
50630 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
50632 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
50633 #undef TARGET_SPILL_CLASS
50634 #define TARGET_SPILL_CLASS ix86_spill_class
50636 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
50637 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
50638 ix86_simd_clone_compute_vecsize_and_simdlen
50640 #undef TARGET_SIMD_CLONE_ADJUST
50641 #define TARGET_SIMD_CLONE_ADJUST \
50642 ix86_simd_clone_adjust
50644 #undef TARGET_SIMD_CLONE_USABLE
50645 #define TARGET_SIMD_CLONE_USABLE \
50646 ix86_simd_clone_usable
50648 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
50649 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
50650 ix86_float_exceptions_rounding_supported_p
50652 #undef TARGET_MODE_EMIT
50653 #define TARGET_MODE_EMIT ix86_emit_mode_set
50655 #undef TARGET_MODE_NEEDED
50656 #define TARGET_MODE_NEEDED ix86_mode_needed
50658 #undef TARGET_MODE_AFTER
50659 #define TARGET_MODE_AFTER ix86_mode_after
50661 #undef TARGET_MODE_ENTRY
50662 #define TARGET_MODE_ENTRY ix86_mode_entry
50664 #undef TARGET_MODE_EXIT
50665 #define TARGET_MODE_EXIT ix86_mode_exit
50667 #undef TARGET_MODE_PRIORITY
50668 #define TARGET_MODE_PRIORITY ix86_mode_priority
50670 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
50671 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
50673 #undef TARGET_LOAD_BOUNDS_FOR_ARG
50674 #define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
50676 #undef TARGET_STORE_BOUNDS_FOR_ARG
50677 #define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
50679 #undef TARGET_LOAD_RETURNED_BOUNDS
50680 #define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
50682 #undef TARGET_STORE_RETURNED_BOUNDS
50683 #define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
50685 #undef TARGET_CHKP_BOUND_MODE
50686 #define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
50688 #undef TARGET_BUILTIN_CHKP_FUNCTION
50689 #define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
50691 #undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
50692 #define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
50694 #undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
50695 #define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
50697 #undef TARGET_CHKP_INITIALIZE_BOUNDS
50698 #define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
50700 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
50701 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
50703 #undef TARGET_OFFLOAD_OPTIONS
50704 #define TARGET_OFFLOAD_OPTIONS \
50705 ix86_offload_options
50707 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
50708 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
50710 #undef TARGET_OPTAB_SUPPORTED_P
50711 #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
50713 #undef TARGET_HARD_REGNO_SCRATCH_OK
50714 #define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok
50716 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
50717 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
50719 #undef TARGET_ADDITIONAL_ALLOCNO_CLASS_P
50720 #define TARGET_ADDITIONAL_ALLOCNO_CLASS_P ix86_additional_allocno_class_p
50722 #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
50723 #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
50725 #undef TARGET_INIT_LIBFUNCS
50726 #define TARGET_INIT_LIBFUNCS ix86_init_libfuncs
50728 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
50729 #define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc
50731 #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
50732 #define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost
50734 #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
50735 #define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p
50737 #undef TARGET_HARD_REGNO_NREGS
50738 #define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs
50739 #undef TARGET_HARD_REGNO_MODE_OK
50740 #define TARGET_HARD_REGNO_MODE_OK ix86_hard_regno_mode_ok
50742 #undef TARGET_MODES_TIEABLE_P
50743 #define TARGET_MODES_TIEABLE_P ix86_modes_tieable_p
50745 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
50746 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
50747 ix86_hard_regno_call_part_clobbered
50749 #undef TARGET_CAN_CHANGE_MODE_CLASS
50750 #define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class
50752 #undef TARGET_STATIC_RTX_ALIGNMENT
50753 #define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment
50754 #undef TARGET_CONSTANT_ALIGNMENT
50755 #define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment
50757 #undef TARGET_EMPTY_RECORD_P
50758 #define TARGET_EMPTY_RECORD_P ix86_is_empty_record
50760 #undef TARGET_WARN_PARAMETER_PASSING_ABI
50761 #define TARGET_WARN_PARAMETER_PASSING_ABI ix86_warn_parameter_passing_abi
50764 #undef TARGET_RUN_TARGET_SELFTESTS
50765 #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
50766 #endif /* #if CHECKING_P */
50768 struct gcc_target targetm
= TARGET_INITIALIZER
;
50770 #include "gt-i386.h"