]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/i386.c
ef321d32c0bace051ff66d2949c253041deff611
[thirdparty/gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2017 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #define IN_TARGET_CODE 1
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "memmodel.h"
29 #include "gimple.h"
30 #include "cfghooks.h"
31 #include "cfgloop.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "expmed.h"
36 #include "optabs.h"
37 #include "regs.h"
38 #include "emit-rtl.h"
39 #include "recog.h"
40 #include "cgraph.h"
41 #include "diagnostic.h"
42 #include "cfgbuild.h"
43 #include "alias.h"
44 #include "fold-const.h"
45 #include "attribs.h"
46 #include "calls.h"
47 #include "stor-layout.h"
48 #include "varasm.h"
49 #include "output.h"
50 #include "insn-attr.h"
51 #include "flags.h"
52 #include "except.h"
53 #include "explow.h"
54 #include "expr.h"
55 #include "cfgrtl.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "gimplify.h"
60 #include "dwarf2.h"
61 #include "tm-constrs.h"
62 #include "params.h"
63 #include "cselib.h"
64 #include "sched-int.h"
65 #include "opts.h"
66 #include "tree-pass.h"
67 #include "context.h"
68 #include "pass_manager.h"
69 #include "target-globals.h"
70 #include "gimple-iterator.h"
71 #include "tree-vectorizer.h"
72 #include "shrink-wrap.h"
73 #include "builtins.h"
74 #include "rtl-iter.h"
75 #include "tree-iterator.h"
76 #include "tree-chkp.h"
77 #include "rtl-chkp.h"
78 #include "dbgcnt.h"
79 #include "case-cfn-macros.h"
80 #include "regrename.h"
81 #include "dojump.h"
82 #include "fold-const-call.h"
83 #include "tree-vrp.h"
84 #include "tree-ssanames.h"
85 #include "selftest.h"
86 #include "selftest-rtl.h"
87 #include "print-rtl.h"
88 #include "intl.h"
89 #include "ifcvt.h"
90 #include "symbol-summary.h"
91 #include "ipa-prop.h"
92 #include "ipa-fnsummary.h"
93
94 /* This file should be included last. */
95 #include "target-def.h"
96
97 #include "x86-tune-costs.h"
98
99 static rtx legitimize_dllimport_symbol (rtx, bool);
100 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
101 static rtx legitimize_pe_coff_symbol (rtx, bool);
102 static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool);
103 static bool ix86_save_reg (unsigned int, bool, bool);
104 static bool ix86_function_naked (const_tree);
105 static bool ix86_notrack_prefixed_insn_p (rtx);
106 static void ix86_emit_restore_reg_using_pop (rtx);
107
108
109 #ifndef CHECK_STACK_LIMIT
110 #define CHECK_STACK_LIMIT (-1)
111 #endif
112
113 /* Return index of given mode in mult and division cost tables. */
114 #define MODE_INDEX(mode) \
115 ((mode) == QImode ? 0 \
116 : (mode) == HImode ? 1 \
117 : (mode) == SImode ? 2 \
118 : (mode) == DImode ? 3 \
119 : 4)
120
121
122 /* Set by -mtune. */
123 const struct processor_costs *ix86_tune_cost = NULL;
124
125 /* Set by -mtune or -Os. */
126 const struct processor_costs *ix86_cost = NULL;
127
128 /* Processor feature/optimization bitmasks. */
129 #define m_386 (1U<<PROCESSOR_I386)
130 #define m_486 (1U<<PROCESSOR_I486)
131 #define m_PENT (1U<<PROCESSOR_PENTIUM)
132 #define m_LAKEMONT (1U<<PROCESSOR_LAKEMONT)
133 #define m_PPRO (1U<<PROCESSOR_PENTIUMPRO)
134 #define m_PENT4 (1U<<PROCESSOR_PENTIUM4)
135 #define m_NOCONA (1U<<PROCESSOR_NOCONA)
136 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
137 #define m_CORE2 (1U<<PROCESSOR_CORE2)
138 #define m_NEHALEM (1U<<PROCESSOR_NEHALEM)
139 #define m_SANDYBRIDGE (1U<<PROCESSOR_SANDYBRIDGE)
140 #define m_HASWELL (1U<<PROCESSOR_HASWELL)
141 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
142 #define m_BONNELL (1U<<PROCESSOR_BONNELL)
143 #define m_SILVERMONT (1U<<PROCESSOR_SILVERMONT)
144 #define m_KNL (1U<<PROCESSOR_KNL)
145 #define m_KNM (1U<<PROCESSOR_KNM)
146 #define m_SKYLAKE_AVX512 (1U<<PROCESSOR_SKYLAKE_AVX512)
147 #define m_CANNONLAKE (1U<<PROCESSOR_CANNONLAKE)
148 #define m_INTEL (1U<<PROCESSOR_INTEL)
149
150 #define m_GEODE (1U<<PROCESSOR_GEODE)
151 #define m_K6 (1U<<PROCESSOR_K6)
152 #define m_K6_GEODE (m_K6 | m_GEODE)
153 #define m_K8 (1U<<PROCESSOR_K8)
154 #define m_ATHLON (1U<<PROCESSOR_ATHLON)
155 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
156 #define m_AMDFAM10 (1U<<PROCESSOR_AMDFAM10)
157 #define m_BDVER1 (1U<<PROCESSOR_BDVER1)
158 #define m_BDVER2 (1U<<PROCESSOR_BDVER2)
159 #define m_BDVER3 (1U<<PROCESSOR_BDVER3)
160 #define m_BDVER4 (1U<<PROCESSOR_BDVER4)
161 #define m_ZNVER1 (1U<<PROCESSOR_ZNVER1)
162 #define m_BTVER1 (1U<<PROCESSOR_BTVER1)
163 #define m_BTVER2 (1U<<PROCESSOR_BTVER2)
164 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
165 #define m_BTVER (m_BTVER1 | m_BTVER2)
166 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER \
167 | m_ZNVER1)
168
169 #define m_GENERIC (1U<<PROCESSOR_GENERIC)
170
171 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
172 #undef DEF_TUNE
173 #define DEF_TUNE(tune, name, selector) name,
174 #include "x86-tune.def"
175 #undef DEF_TUNE
176 };
177
178 /* Feature tests against the various tunings. */
179 unsigned char ix86_tune_features[X86_TUNE_LAST];
180
181 /* Feature tests against the various tunings used to create ix86_tune_features
182 based on the processor mask. */
183 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
184 #undef DEF_TUNE
185 #define DEF_TUNE(tune, name, selector) selector,
186 #include "x86-tune.def"
187 #undef DEF_TUNE
188 };
189
190 /* Feature tests against the various architecture variations. */
191 unsigned char ix86_arch_features[X86_ARCH_LAST];
192
193 /* Feature tests against the various architecture variations, used to create
194 ix86_arch_features based on the processor mask. */
195 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
196 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
197 ~(m_386 | m_486 | m_PENT | m_LAKEMONT | m_K6),
198
199 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
200 ~m_386,
201
202 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
203 ~(m_386 | m_486),
204
205 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
206 ~m_386,
207
208 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
209 ~m_386,
210 };
211
212 /* In case the average insn count for single function invocation is
213 lower than this constant, emit fast (but longer) prologue and
214 epilogue code. */
215 #define FAST_PROLOGUE_INSN_COUNT 20
216
217 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
218 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
219 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
220 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
221
222 /* Array of the smallest class containing reg number REGNO, indexed by
223 REGNO. Used by REGNO_REG_CLASS in i386.h. */
224
225 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
226 {
227 /* ax, dx, cx, bx */
228 AREG, DREG, CREG, BREG,
229 /* si, di, bp, sp */
230 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
231 /* FP registers */
232 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
233 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
234 /* arg pointer */
235 NON_Q_REGS,
236 /* flags, fpsr, fpcr, frame */
237 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
238 /* SSE registers */
239 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
240 SSE_REGS, SSE_REGS,
241 /* MMX registers */
242 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
243 MMX_REGS, MMX_REGS,
244 /* REX registers */
245 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
246 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
247 /* SSE REX registers */
248 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
249 SSE_REGS, SSE_REGS,
250 /* AVX-512 SSE registers */
251 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
252 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
253 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
254 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
255 /* Mask registers. */
256 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
257 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
258 /* MPX bound registers */
259 BND_REGS, BND_REGS, BND_REGS, BND_REGS,
260 };
261
262 /* The "default" register map used in 32bit mode. */
263
264 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
265 {
266 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
267 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
268 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
269 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
270 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
271 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
272 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
273 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
274 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
275 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
276 101, 102, 103, 104, /* bound registers */
277 };
278
279 /* The "default" register map used in 64bit mode. */
280
281 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
282 {
283 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
284 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
285 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
286 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
287 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
288 8,9,10,11,12,13,14,15, /* extended integer registers */
289 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
290 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
291 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
292 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
293 126, 127, 128, 129, /* bound registers */
294 };
295
296 /* Define the register numbers to be used in Dwarf debugging information.
297 The SVR4 reference port C compiler uses the following register numbers
298 in its Dwarf output code:
299 0 for %eax (gcc regno = 0)
300 1 for %ecx (gcc regno = 2)
301 2 for %edx (gcc regno = 1)
302 3 for %ebx (gcc regno = 3)
303 4 for %esp (gcc regno = 7)
304 5 for %ebp (gcc regno = 6)
305 6 for %esi (gcc regno = 4)
306 7 for %edi (gcc regno = 5)
307 The following three DWARF register numbers are never generated by
308 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
309 believed these numbers have these meanings.
310 8 for %eip (no gcc equivalent)
311 9 for %eflags (gcc regno = 17)
312 10 for %trapno (no gcc equivalent)
313 It is not at all clear how we should number the FP stack registers
314 for the x86 architecture. If the version of SDB on x86/svr4 were
315 a bit less brain dead with respect to floating-point then we would
316 have a precedent to follow with respect to DWARF register numbers
317 for x86 FP registers, but the SDB on x86/svr4 was so completely
318 broken with respect to FP registers that it is hardly worth thinking
319 of it as something to strive for compatibility with.
320 The version of x86/svr4 SDB I had does (partially)
321 seem to believe that DWARF register number 11 is associated with
322 the x86 register %st(0), but that's about all. Higher DWARF
323 register numbers don't seem to be associated with anything in
324 particular, and even for DWARF regno 11, SDB only seemed to under-
325 stand that it should say that a variable lives in %st(0) (when
326 asked via an `=' command) if we said it was in DWARF regno 11,
327 but SDB still printed garbage when asked for the value of the
328 variable in question (via a `/' command).
329 (Also note that the labels SDB printed for various FP stack regs
330 when doing an `x' command were all wrong.)
331 Note that these problems generally don't affect the native SVR4
332 C compiler because it doesn't allow the use of -O with -g and
333 because when it is *not* optimizing, it allocates a memory
334 location for each floating-point variable, and the memory
335 location is what gets described in the DWARF AT_location
336 attribute for the variable in question.
337 Regardless of the severe mental illness of the x86/svr4 SDB, we
338 do something sensible here and we use the following DWARF
339 register numbers. Note that these are all stack-top-relative
340 numbers.
341 11 for %st(0) (gcc regno = 8)
342 12 for %st(1) (gcc regno = 9)
343 13 for %st(2) (gcc regno = 10)
344 14 for %st(3) (gcc regno = 11)
345 15 for %st(4) (gcc regno = 12)
346 16 for %st(5) (gcc regno = 13)
347 17 for %st(6) (gcc regno = 14)
348 18 for %st(7) (gcc regno = 15)
349 */
350 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
351 {
352 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
353 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
354 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
355 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
356 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
357 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
358 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
359 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
360 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
361 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
362 101, 102, 103, 104, /* bound registers */
363 };
364
365 /* Define parameter passing and return registers. */
366
367 static int const x86_64_int_parameter_registers[6] =
368 {
369 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
370 };
371
372 static int const x86_64_ms_abi_int_parameter_registers[4] =
373 {
374 CX_REG, DX_REG, R8_REG, R9_REG
375 };
376
377 static int const x86_64_int_return_registers[4] =
378 {
379 AX_REG, DX_REG, DI_REG, SI_REG
380 };
381
382 /* Additional registers that are clobbered by SYSV calls. */
383
384 #define NUM_X86_64_MS_CLOBBERED_REGS 12
385 static int const x86_64_ms_sysv_extra_clobbered_registers
386 [NUM_X86_64_MS_CLOBBERED_REGS] =
387 {
388 SI_REG, DI_REG,
389 XMM6_REG, XMM7_REG,
390 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
391 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
392 };
393
394 enum xlogue_stub {
395 XLOGUE_STUB_SAVE,
396 XLOGUE_STUB_RESTORE,
397 XLOGUE_STUB_RESTORE_TAIL,
398 XLOGUE_STUB_SAVE_HFP,
399 XLOGUE_STUB_RESTORE_HFP,
400 XLOGUE_STUB_RESTORE_HFP_TAIL,
401
402 XLOGUE_STUB_COUNT
403 };
404
405 enum xlogue_stub_sets {
406 XLOGUE_SET_ALIGNED,
407 XLOGUE_SET_ALIGNED_PLUS_8,
408 XLOGUE_SET_HFP_ALIGNED_OR_REALIGN,
409 XLOGUE_SET_HFP_ALIGNED_PLUS_8,
410
411 XLOGUE_SET_COUNT
412 };
413
414 /* Register save/restore layout used by out-of-line stubs. */
415 class xlogue_layout {
416 public:
417 struct reginfo
418 {
419 unsigned regno;
420 HOST_WIDE_INT offset; /* Offset used by stub base pointer (rax or
421 rsi) to where each register is stored. */
422 };
423
424 unsigned get_nregs () const {return m_nregs;}
425 HOST_WIDE_INT get_stack_align_off_in () const {return m_stack_align_off_in;}
426
427 const reginfo &get_reginfo (unsigned reg) const
428 {
429 gcc_assert (reg < m_nregs);
430 return m_regs[reg];
431 }
432
433 static const char *get_stub_name (enum xlogue_stub stub,
434 unsigned n_extra_args);
435
436 /* Returns an rtx for the stub's symbol based upon
437 1.) the specified stub (save, restore or restore_ret) and
438 2.) the value of cfun->machine->call_ms2sysv_extra_regs and
439 3.) rather or not stack alignment is being performed. */
440 static rtx get_stub_rtx (enum xlogue_stub stub);
441
442 /* Returns the amount of stack space (including padding) that the stub
443 needs to store registers based upon data in the machine_function. */
444 HOST_WIDE_INT get_stack_space_used () const
445 {
446 const struct machine_function *m = cfun->machine;
447 unsigned last_reg = m->call_ms2sysv_extra_regs + MIN_REGS - 1;
448
449 gcc_assert (m->call_ms2sysv_extra_regs <= MAX_EXTRA_REGS);
450 return m_regs[last_reg].offset + STUB_INDEX_OFFSET;
451 }
452
453 /* Returns the offset for the base pointer used by the stub. */
454 HOST_WIDE_INT get_stub_ptr_offset () const
455 {
456 return STUB_INDEX_OFFSET + m_stack_align_off_in;
457 }
458
459 static const struct xlogue_layout &get_instance ();
460 static unsigned count_stub_managed_regs ();
461 static bool is_stub_managed_reg (unsigned regno, unsigned count);
462
463 static const HOST_WIDE_INT STUB_INDEX_OFFSET = 0x70;
464 static const unsigned MIN_REGS = NUM_X86_64_MS_CLOBBERED_REGS;
465 static const unsigned MAX_REGS = 18;
466 static const unsigned MAX_EXTRA_REGS = MAX_REGS - MIN_REGS;
467 static const unsigned VARIANT_COUNT = MAX_EXTRA_REGS + 1;
468 static const unsigned STUB_NAME_MAX_LEN = 20;
469 static const char * const STUB_BASE_NAMES[XLOGUE_STUB_COUNT];
470 static const unsigned REG_ORDER[MAX_REGS];
471 static const unsigned REG_ORDER_REALIGN[MAX_REGS];
472
473 private:
474 xlogue_layout ();
475 xlogue_layout (HOST_WIDE_INT stack_align_off_in, bool hfp);
476 xlogue_layout (const xlogue_layout &);
477
478 /* True if hard frame pointer is used. */
479 bool m_hfp;
480
481 /* Max number of register this layout manages. */
482 unsigned m_nregs;
483
484 /* Incoming offset from 16-byte alignment. */
485 HOST_WIDE_INT m_stack_align_off_in;
486
487 /* Register order and offsets. */
488 struct reginfo m_regs[MAX_REGS];
489
490 /* Lazy-inited cache of symbol names for stubs. */
491 static char s_stub_names[2][XLOGUE_STUB_COUNT][VARIANT_COUNT]
492 [STUB_NAME_MAX_LEN];
493
494 static const xlogue_layout s_instances[XLOGUE_SET_COUNT];
495 };
496
497 const char * const xlogue_layout::STUB_BASE_NAMES[XLOGUE_STUB_COUNT] = {
498 "savms64",
499 "resms64",
500 "resms64x",
501 "savms64f",
502 "resms64f",
503 "resms64fx"
504 };
505
506 const unsigned xlogue_layout::REG_ORDER[xlogue_layout::MAX_REGS] = {
507 /* The below offset values are where each register is stored for the layout
508 relative to incoming stack pointer. The value of each m_regs[].offset will
509 be relative to the incoming base pointer (rax or rsi) used by the stub.
510
511 s_instances: 0 1 2 3
512 Offset: realigned or aligned + 8
513 Register aligned aligned + 8 aligned w/HFP w/HFP */
514 XMM15_REG, /* 0x10 0x18 0x10 0x18 */
515 XMM14_REG, /* 0x20 0x28 0x20 0x28 */
516 XMM13_REG, /* 0x30 0x38 0x30 0x38 */
517 XMM12_REG, /* 0x40 0x48 0x40 0x48 */
518 XMM11_REG, /* 0x50 0x58 0x50 0x58 */
519 XMM10_REG, /* 0x60 0x68 0x60 0x68 */
520 XMM9_REG, /* 0x70 0x78 0x70 0x78 */
521 XMM8_REG, /* 0x80 0x88 0x80 0x88 */
522 XMM7_REG, /* 0x90 0x98 0x90 0x98 */
523 XMM6_REG, /* 0xa0 0xa8 0xa0 0xa8 */
524 SI_REG, /* 0xa8 0xb0 0xa8 0xb0 */
525 DI_REG, /* 0xb0 0xb8 0xb0 0xb8 */
526 BX_REG, /* 0xb8 0xc0 0xb8 0xc0 */
527 BP_REG, /* 0xc0 0xc8 N/A N/A */
528 R12_REG, /* 0xc8 0xd0 0xc0 0xc8 */
529 R13_REG, /* 0xd0 0xd8 0xc8 0xd0 */
530 R14_REG, /* 0xd8 0xe0 0xd0 0xd8 */
531 R15_REG, /* 0xe0 0xe8 0xd8 0xe0 */
532 };
533
534 /* Instantiate static const values. */
535 const HOST_WIDE_INT xlogue_layout::STUB_INDEX_OFFSET;
536 const unsigned xlogue_layout::MIN_REGS;
537 const unsigned xlogue_layout::MAX_REGS;
538 const unsigned xlogue_layout::MAX_EXTRA_REGS;
539 const unsigned xlogue_layout::VARIANT_COUNT;
540 const unsigned xlogue_layout::STUB_NAME_MAX_LEN;
541
542 /* Initialize xlogue_layout::s_stub_names to zero. */
543 char xlogue_layout::s_stub_names[2][XLOGUE_STUB_COUNT][VARIANT_COUNT]
544 [STUB_NAME_MAX_LEN];
545
546 /* Instantiates all xlogue_layout instances. */
547 const xlogue_layout xlogue_layout::s_instances[XLOGUE_SET_COUNT] = {
548 xlogue_layout (0, false),
549 xlogue_layout (8, false),
550 xlogue_layout (0, true),
551 xlogue_layout (8, true)
552 };
553
554 /* Return an appropriate const instance of xlogue_layout based upon values
555 in cfun->machine and crtl. */
556 const struct xlogue_layout &
557 xlogue_layout::get_instance ()
558 {
559 enum xlogue_stub_sets stub_set;
560 bool aligned_plus_8 = cfun->machine->call_ms2sysv_pad_in;
561
562 if (stack_realign_fp)
563 stub_set = XLOGUE_SET_HFP_ALIGNED_OR_REALIGN;
564 else if (frame_pointer_needed)
565 stub_set = aligned_plus_8
566 ? XLOGUE_SET_HFP_ALIGNED_PLUS_8
567 : XLOGUE_SET_HFP_ALIGNED_OR_REALIGN;
568 else
569 stub_set = aligned_plus_8 ? XLOGUE_SET_ALIGNED_PLUS_8 : XLOGUE_SET_ALIGNED;
570
571 return s_instances[stub_set];
572 }
573
574 /* Determine how many clobbered registers can be saved by the stub.
575 Returns the count of registers the stub will save and restore. */
576 unsigned
577 xlogue_layout::count_stub_managed_regs ()
578 {
579 bool hfp = frame_pointer_needed || stack_realign_fp;
580 unsigned i, count;
581 unsigned regno;
582
583 for (count = i = MIN_REGS; i < MAX_REGS; ++i)
584 {
585 regno = REG_ORDER[i];
586 if (regno == BP_REG && hfp)
587 continue;
588 if (!ix86_save_reg (regno, false, false))
589 break;
590 ++count;
591 }
592 return count;
593 }
594
595 /* Determine if register REGNO is a stub managed register given the
596 total COUNT of stub managed registers. */
597 bool
598 xlogue_layout::is_stub_managed_reg (unsigned regno, unsigned count)
599 {
600 bool hfp = frame_pointer_needed || stack_realign_fp;
601 unsigned i;
602
603 for (i = 0; i < count; ++i)
604 {
605 gcc_assert (i < MAX_REGS);
606 if (REG_ORDER[i] == BP_REG && hfp)
607 ++count;
608 else if (REG_ORDER[i] == regno)
609 return true;
610 }
611 return false;
612 }
613
614 /* Constructor for xlogue_layout. */
615 xlogue_layout::xlogue_layout (HOST_WIDE_INT stack_align_off_in, bool hfp)
616 : m_hfp (hfp) , m_nregs (hfp ? 17 : 18),
617 m_stack_align_off_in (stack_align_off_in)
618 {
619 HOST_WIDE_INT offset = stack_align_off_in;
620 unsigned i, j;
621
622 for (i = j = 0; i < MAX_REGS; ++i)
623 {
624 unsigned regno = REG_ORDER[i];
625
626 if (regno == BP_REG && hfp)
627 continue;
628 if (SSE_REGNO_P (regno))
629 {
630 offset += 16;
631 /* Verify that SSE regs are always aligned. */
632 gcc_assert (!((stack_align_off_in + offset) & 15));
633 }
634 else
635 offset += 8;
636
637 m_regs[j].regno = regno;
638 m_regs[j++].offset = offset - STUB_INDEX_OFFSET;
639 }
640 gcc_assert (j == m_nregs);
641 }
642
643 const char *
644 xlogue_layout::get_stub_name (enum xlogue_stub stub,
645 unsigned n_extra_regs)
646 {
647 const int have_avx = TARGET_AVX;
648 char *name = s_stub_names[!!have_avx][stub][n_extra_regs];
649
650 /* Lazy init */
651 if (!*name)
652 {
653 int res = snprintf (name, STUB_NAME_MAX_LEN, "__%s_%s_%u",
654 (have_avx ? "avx" : "sse"),
655 STUB_BASE_NAMES[stub],
656 MIN_REGS + n_extra_regs);
657 gcc_checking_assert (res < (int)STUB_NAME_MAX_LEN);
658 }
659
660 return name;
661 }
662
663 /* Return rtx of a symbol ref for the entry point (based upon
664 cfun->machine->call_ms2sysv_extra_regs) of the specified stub. */
665 rtx
666 xlogue_layout::get_stub_rtx (enum xlogue_stub stub)
667 {
668 const unsigned n_extra_regs = cfun->machine->call_ms2sysv_extra_regs;
669 gcc_checking_assert (n_extra_regs <= MAX_EXTRA_REGS);
670 gcc_assert (stub < XLOGUE_STUB_COUNT);
671 gcc_assert (crtl->stack_realign_finalized);
672
673 return gen_rtx_SYMBOL_REF (Pmode, get_stub_name (stub, n_extra_regs));
674 }
675
676 /* Define the structure for the machine field in struct function. */
677
678 struct GTY(()) stack_local_entry {
679 unsigned short mode;
680 unsigned short n;
681 rtx rtl;
682 struct stack_local_entry *next;
683 };
684
685 /* Which cpu are we scheduling for. */
686 enum attr_cpu ix86_schedule;
687
688 /* Which cpu are we optimizing for. */
689 enum processor_type ix86_tune;
690
691 /* Which instruction set architecture to use. */
692 enum processor_type ix86_arch;
693
694 /* True if processor has SSE prefetch instruction. */
695 unsigned char x86_prefetch_sse;
696
697 /* -mstackrealign option */
698 static const char ix86_force_align_arg_pointer_string[]
699 = "force_align_arg_pointer";
700
701 static rtx (*ix86_gen_leave) (void);
702 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
703 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
704 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
705 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
706 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
707 static rtx (*ix86_gen_monitorx) (rtx, rtx, rtx);
708 static rtx (*ix86_gen_clzero) (rtx);
709 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
710 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
711 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
712 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
713 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
714 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
715
716 /* Preferred alignment for stack boundary in bits. */
717 unsigned int ix86_preferred_stack_boundary;
718
719 /* Alignment for incoming stack boundary in bits specified at
720 command line. */
721 static unsigned int ix86_user_incoming_stack_boundary;
722
723 /* Default alignment for incoming stack boundary in bits. */
724 static unsigned int ix86_default_incoming_stack_boundary;
725
726 /* Alignment for incoming stack boundary in bits. */
727 unsigned int ix86_incoming_stack_boundary;
728
729 /* Calling abi specific va_list type nodes. */
730 static GTY(()) tree sysv_va_list_type_node;
731 static GTY(()) tree ms_va_list_type_node;
732
733 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
734 char internal_label_prefix[16];
735 int internal_label_prefix_len;
736
737 /* Fence to use after loop using movnt. */
738 tree x86_mfence;
739
740 /* Register class used for passing given 64bit part of the argument.
741 These represent classes as documented by the PS ABI, with the exception
742 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
743 use SF or DFmode move instead of DImode to avoid reformatting penalties.
744
745 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
746 whenever possible (upper half does contain padding). */
747 enum x86_64_reg_class
748 {
749 X86_64_NO_CLASS,
750 X86_64_INTEGER_CLASS,
751 X86_64_INTEGERSI_CLASS,
752 X86_64_SSE_CLASS,
753 X86_64_SSESF_CLASS,
754 X86_64_SSEDF_CLASS,
755 X86_64_SSEUP_CLASS,
756 X86_64_X87_CLASS,
757 X86_64_X87UP_CLASS,
758 X86_64_COMPLEX_X87_CLASS,
759 X86_64_MEMORY_CLASS
760 };
761
762 #define MAX_CLASSES 8
763
764 /* Table of constants used by fldpi, fldln2, etc.... */
765 static REAL_VALUE_TYPE ext_80387_constants_table [5];
766 static bool ext_80387_constants_init;
767
768 \f
769 static struct machine_function * ix86_init_machine_status (void);
770 static rtx ix86_function_value (const_tree, const_tree, bool);
771 static bool ix86_function_value_regno_p (const unsigned int);
772 static unsigned int ix86_function_arg_boundary (machine_mode,
773 const_tree);
774 static rtx ix86_static_chain (const_tree, bool);
775 static int ix86_function_regparm (const_tree, const_tree);
776 static void ix86_compute_frame_layout (void);
777 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
778 rtx, rtx, int);
779 static void ix86_add_new_builtins (HOST_WIDE_INT, HOST_WIDE_INT);
780 static tree ix86_canonical_va_list_type (tree);
781 static void predict_jump (int);
782 static unsigned int split_stack_prologue_scratch_regno (void);
783 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
784
785 enum ix86_function_specific_strings
786 {
787 IX86_FUNCTION_SPECIFIC_ARCH,
788 IX86_FUNCTION_SPECIFIC_TUNE,
789 IX86_FUNCTION_SPECIFIC_MAX
790 };
791
792 static char *ix86_target_string (HOST_WIDE_INT, HOST_WIDE_INT, int, int,
793 const char *, const char *, enum fpmath_unit,
794 bool);
795 static void ix86_function_specific_save (struct cl_target_option *,
796 struct gcc_options *opts);
797 static void ix86_function_specific_restore (struct gcc_options *opts,
798 struct cl_target_option *);
799 static void ix86_function_specific_post_stream_in (struct cl_target_option *);
800 static void ix86_function_specific_print (FILE *, int,
801 struct cl_target_option *);
802 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
803 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
804 struct gcc_options *,
805 struct gcc_options *,
806 struct gcc_options *);
807 static bool ix86_can_inline_p (tree, tree);
808 static void ix86_set_current_function (tree);
809 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
810
811 static enum calling_abi ix86_function_abi (const_tree);
812
813 \f
814 #ifndef SUBTARGET32_DEFAULT_CPU
815 #define SUBTARGET32_DEFAULT_CPU "i386"
816 #endif
817
818 /* Whether -mtune= or -march= were specified */
819 static int ix86_tune_defaulted;
820 static int ix86_arch_specified;
821
822 /* Vectorization library interface and handlers. */
823 static tree (*ix86_veclib_handler) (combined_fn, tree, tree);
824
825 static tree ix86_veclibabi_svml (combined_fn, tree, tree);
826 static tree ix86_veclibabi_acml (combined_fn, tree, tree);
827
828 /* Processor target table, indexed by processor number */
829 struct ptt
830 {
831 const char *const name; /* processor name */
832 const struct processor_costs *cost; /* Processor costs */
833 const int align_loop; /* Default alignments. */
834 const int align_loop_max_skip;
835 const int align_jump;
836 const int align_jump_max_skip;
837 const int align_func;
838 };
839
840 /* This table must be in sync with enum processor_type in i386.h. */
841 static const struct ptt processor_target_table[PROCESSOR_max] =
842 {
843 {"generic", &generic_cost, 16, 10, 16, 10, 16},
844 {"i386", &i386_cost, 4, 3, 4, 3, 4},
845 {"i486", &i486_cost, 16, 15, 16, 15, 16},
846 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
847 {"lakemont", &lakemont_cost, 16, 7, 16, 7, 16},
848 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
849 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
850 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
851 {"core2", &core_cost, 16, 10, 16, 10, 16},
852 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
853 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
854 {"haswell", &core_cost, 16, 10, 16, 10, 16},
855 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
856 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
857 {"knl", &slm_cost, 16, 15, 16, 7, 16},
858 {"knm", &slm_cost, 16, 15, 16, 7, 16},
859 {"skylake-avx512", &skylake_cost, 16, 10, 16, 10, 16},
860 {"cannonlake", &core_cost, 16, 10, 16, 10, 16},
861 {"intel", &intel_cost, 16, 15, 16, 7, 16},
862 {"geode", &geode_cost, 0, 0, 0, 0, 0},
863 {"k6", &k6_cost, 32, 7, 32, 7, 32},
864 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
865 {"k8", &k8_cost, 16, 7, 16, 7, 16},
866 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
867 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
868 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
869 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
870 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
871 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
872 {"btver2", &btver2_cost, 16, 10, 16, 7, 11},
873 {"znver1", &znver1_cost, 16, 15, 16, 15, 16}
874 };
875 \f
876 static unsigned int
877 rest_of_handle_insert_vzeroupper (void)
878 {
879 int i;
880
881 /* vzeroupper instructions are inserted immediately after reload to
882 account for possible spills from 256bit or 512bit registers. The pass
883 reuses mode switching infrastructure by re-running mode insertion
884 pass, so disable entities that have already been processed. */
885 for (i = 0; i < MAX_386_ENTITIES; i++)
886 ix86_optimize_mode_switching[i] = 0;
887
888 ix86_optimize_mode_switching[AVX_U128] = 1;
889
890 /* Call optimize_mode_switching. */
891 g->get_passes ()->execute_pass_mode_switching ();
892 return 0;
893 }
894
895 /* Return 1 if INSN uses or defines a hard register.
896 Hard register uses in a memory address are ignored.
897 Clobbers and flags definitions are ignored. */
898
899 static bool
900 has_non_address_hard_reg (rtx_insn *insn)
901 {
902 df_ref ref;
903 FOR_EACH_INSN_DEF (ref, insn)
904 if (HARD_REGISTER_P (DF_REF_REAL_REG (ref))
905 && !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER)
906 && DF_REF_REGNO (ref) != FLAGS_REG)
907 return true;
908
909 FOR_EACH_INSN_USE (ref, insn)
910 if (!DF_REF_REG_MEM_P (ref) && HARD_REGISTER_P (DF_REF_REAL_REG (ref)))
911 return true;
912
913 return false;
914 }
915
916 /* Check if comparison INSN may be transformed
917 into vector comparison. Currently we transform
918 zero checks only which look like:
919
920 (set (reg:CCZ 17 flags)
921 (compare:CCZ (ior:SI (subreg:SI (reg:DI x) 4)
922 (subreg:SI (reg:DI x) 0))
923 (const_int 0 [0]))) */
924
925 static bool
926 convertible_comparison_p (rtx_insn *insn)
927 {
928 if (!TARGET_SSE4_1)
929 return false;
930
931 rtx def_set = single_set (insn);
932
933 gcc_assert (def_set);
934
935 rtx src = SET_SRC (def_set);
936 rtx dst = SET_DEST (def_set);
937
938 gcc_assert (GET_CODE (src) == COMPARE);
939
940 if (GET_CODE (dst) != REG
941 || REGNO (dst) != FLAGS_REG
942 || GET_MODE (dst) != CCZmode)
943 return false;
944
945 rtx op1 = XEXP (src, 0);
946 rtx op2 = XEXP (src, 1);
947
948 if (op2 != CONST0_RTX (GET_MODE (op2)))
949 return false;
950
951 if (GET_CODE (op1) != IOR)
952 return false;
953
954 op2 = XEXP (op1, 1);
955 op1 = XEXP (op1, 0);
956
957 if (!SUBREG_P (op1)
958 || !SUBREG_P (op2)
959 || GET_MODE (op1) != SImode
960 || GET_MODE (op2) != SImode
961 || ((SUBREG_BYTE (op1) != 0
962 || SUBREG_BYTE (op2) != GET_MODE_SIZE (SImode))
963 && (SUBREG_BYTE (op2) != 0
964 || SUBREG_BYTE (op1) != GET_MODE_SIZE (SImode))))
965 return false;
966
967 op1 = SUBREG_REG (op1);
968 op2 = SUBREG_REG (op2);
969
970 if (op1 != op2
971 || !REG_P (op1)
972 || GET_MODE (op1) != DImode)
973 return false;
974
975 return true;
976 }
977
978 /* The DImode version of scalar_to_vector_candidate_p. */
979
980 static bool
981 dimode_scalar_to_vector_candidate_p (rtx_insn *insn)
982 {
983 rtx def_set = single_set (insn);
984
985 if (!def_set)
986 return false;
987
988 if (has_non_address_hard_reg (insn))
989 return false;
990
991 rtx src = SET_SRC (def_set);
992 rtx dst = SET_DEST (def_set);
993
994 if (GET_CODE (src) == COMPARE)
995 return convertible_comparison_p (insn);
996
997 /* We are interested in DImode promotion only. */
998 if ((GET_MODE (src) != DImode
999 && !CONST_INT_P (src))
1000 || GET_MODE (dst) != DImode)
1001 return false;
1002
1003 if (!REG_P (dst) && !MEM_P (dst))
1004 return false;
1005
1006 switch (GET_CODE (src))
1007 {
1008 case ASHIFTRT:
1009 if (!TARGET_AVX512VL)
1010 return false;
1011 /* FALLTHRU */
1012
1013 case ASHIFT:
1014 case LSHIFTRT:
1015 if (!REG_P (XEXP (src, 1))
1016 && (!SUBREG_P (XEXP (src, 1))
1017 || SUBREG_BYTE (XEXP (src, 1)) != 0
1018 || !REG_P (SUBREG_REG (XEXP (src, 1))))
1019 && (!CONST_INT_P (XEXP (src, 1))
1020 || !IN_RANGE (INTVAL (XEXP (src, 1)), 0, 63)))
1021 return false;
1022
1023 if (GET_MODE (XEXP (src, 1)) != QImode
1024 && !CONST_INT_P (XEXP (src, 1)))
1025 return false;
1026 break;
1027
1028 case PLUS:
1029 case MINUS:
1030 case IOR:
1031 case XOR:
1032 case AND:
1033 if (!REG_P (XEXP (src, 1))
1034 && !MEM_P (XEXP (src, 1))
1035 && !CONST_INT_P (XEXP (src, 1)))
1036 return false;
1037
1038 if (GET_MODE (XEXP (src, 1)) != DImode
1039 && !CONST_INT_P (XEXP (src, 1)))
1040 return false;
1041 break;
1042
1043 case NEG:
1044 case NOT:
1045 break;
1046
1047 case REG:
1048 return true;
1049
1050 case MEM:
1051 case CONST_INT:
1052 return REG_P (dst);
1053
1054 default:
1055 return false;
1056 }
1057
1058 if (!REG_P (XEXP (src, 0))
1059 && !MEM_P (XEXP (src, 0))
1060 && !CONST_INT_P (XEXP (src, 0))
1061 /* Check for andnot case. */
1062 && (GET_CODE (src) != AND
1063 || GET_CODE (XEXP (src, 0)) != NOT
1064 || !REG_P (XEXP (XEXP (src, 0), 0))))
1065 return false;
1066
1067 if (GET_MODE (XEXP (src, 0)) != DImode
1068 && !CONST_INT_P (XEXP (src, 0)))
1069 return false;
1070
1071 return true;
1072 }
1073
1074 /* The TImode version of scalar_to_vector_candidate_p. */
1075
1076 static bool
1077 timode_scalar_to_vector_candidate_p (rtx_insn *insn)
1078 {
1079 rtx def_set = single_set (insn);
1080
1081 if (!def_set)
1082 return false;
1083
1084 if (has_non_address_hard_reg (insn))
1085 return false;
1086
1087 rtx src = SET_SRC (def_set);
1088 rtx dst = SET_DEST (def_set);
1089
1090 /* Only TImode load and store are allowed. */
1091 if (GET_MODE (dst) != TImode)
1092 return false;
1093
1094 if (MEM_P (dst))
1095 {
1096 /* Check for store. Memory must be aligned or unaligned store
1097 is optimal. Only support store from register, standard SSE
1098 constant or CONST_WIDE_INT generated from piecewise store.
1099
1100 ??? Verify performance impact before enabling CONST_INT for
1101 __int128 store. */
1102 if (misaligned_operand (dst, TImode)
1103 && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
1104 return false;
1105
1106 switch (GET_CODE (src))
1107 {
1108 default:
1109 return false;
1110
1111 case REG:
1112 case CONST_WIDE_INT:
1113 return true;
1114
1115 case CONST_INT:
1116 return standard_sse_constant_p (src, TImode);
1117 }
1118 }
1119 else if (MEM_P (src))
1120 {
1121 /* Check for load. Memory must be aligned or unaligned load is
1122 optimal. */
1123 return (REG_P (dst)
1124 && (!misaligned_operand (src, TImode)
1125 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL));
1126 }
1127
1128 return false;
1129 }
1130
1131 /* Return 1 if INSN may be converted into vector
1132 instruction. */
1133
1134 static bool
1135 scalar_to_vector_candidate_p (rtx_insn *insn)
1136 {
1137 if (TARGET_64BIT)
1138 return timode_scalar_to_vector_candidate_p (insn);
1139 else
1140 return dimode_scalar_to_vector_candidate_p (insn);
1141 }
1142
1143 /* The DImode version of remove_non_convertible_regs. */
1144
1145 static void
1146 dimode_remove_non_convertible_regs (bitmap candidates)
1147 {
1148 bitmap_iterator bi;
1149 unsigned id;
1150 bitmap regs = BITMAP_ALLOC (NULL);
1151
1152 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi)
1153 {
1154 rtx def_set = single_set (DF_INSN_UID_GET (id)->insn);
1155 rtx reg = SET_DEST (def_set);
1156
1157 if (!REG_P (reg)
1158 || bitmap_bit_p (regs, REGNO (reg))
1159 || HARD_REGISTER_P (reg))
1160 continue;
1161
1162 for (df_ref def = DF_REG_DEF_CHAIN (REGNO (reg));
1163 def;
1164 def = DF_REF_NEXT_REG (def))
1165 {
1166 if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1167 {
1168 if (dump_file)
1169 fprintf (dump_file,
1170 "r%d has non convertible definition in insn %d\n",
1171 REGNO (reg), DF_REF_INSN_UID (def));
1172
1173 bitmap_set_bit (regs, REGNO (reg));
1174 break;
1175 }
1176 }
1177 }
1178
1179 EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi)
1180 {
1181 for (df_ref def = DF_REG_DEF_CHAIN (id);
1182 def;
1183 def = DF_REF_NEXT_REG (def))
1184 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1185 {
1186 if (dump_file)
1187 fprintf (dump_file, "Removing insn %d from candidates list\n",
1188 DF_REF_INSN_UID (def));
1189
1190 bitmap_clear_bit (candidates, DF_REF_INSN_UID (def));
1191 }
1192 }
1193
1194 BITMAP_FREE (regs);
1195 }
1196
1197 /* For a register REGNO, scan instructions for its defs and uses.
1198 Put REGNO in REGS if a def or use isn't in CANDIDATES. */
1199
1200 static void
1201 timode_check_non_convertible_regs (bitmap candidates, bitmap regs,
1202 unsigned int regno)
1203 {
1204 for (df_ref def = DF_REG_DEF_CHAIN (regno);
1205 def;
1206 def = DF_REF_NEXT_REG (def))
1207 {
1208 if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1209 {
1210 if (dump_file)
1211 fprintf (dump_file,
1212 "r%d has non convertible def in insn %d\n",
1213 regno, DF_REF_INSN_UID (def));
1214
1215 bitmap_set_bit (regs, regno);
1216 break;
1217 }
1218 }
1219
1220 for (df_ref ref = DF_REG_USE_CHAIN (regno);
1221 ref;
1222 ref = DF_REF_NEXT_REG (ref))
1223 {
1224 /* Debug instructions are skipped. */
1225 if (NONDEBUG_INSN_P (DF_REF_INSN (ref))
1226 && !bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
1227 {
1228 if (dump_file)
1229 fprintf (dump_file,
1230 "r%d has non convertible use in insn %d\n",
1231 regno, DF_REF_INSN_UID (ref));
1232
1233 bitmap_set_bit (regs, regno);
1234 break;
1235 }
1236 }
1237 }
1238
1239 /* The TImode version of remove_non_convertible_regs. */
1240
1241 static void
1242 timode_remove_non_convertible_regs (bitmap candidates)
1243 {
1244 bitmap_iterator bi;
1245 unsigned id;
1246 bitmap regs = BITMAP_ALLOC (NULL);
1247
1248 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi)
1249 {
1250 rtx def_set = single_set (DF_INSN_UID_GET (id)->insn);
1251 rtx dest = SET_DEST (def_set);
1252 rtx src = SET_SRC (def_set);
1253
1254 if ((!REG_P (dest)
1255 || bitmap_bit_p (regs, REGNO (dest))
1256 || HARD_REGISTER_P (dest))
1257 && (!REG_P (src)
1258 || bitmap_bit_p (regs, REGNO (src))
1259 || HARD_REGISTER_P (src)))
1260 continue;
1261
1262 if (REG_P (dest))
1263 timode_check_non_convertible_regs (candidates, regs,
1264 REGNO (dest));
1265
1266 if (REG_P (src))
1267 timode_check_non_convertible_regs (candidates, regs,
1268 REGNO (src));
1269 }
1270
1271 EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi)
1272 {
1273 for (df_ref def = DF_REG_DEF_CHAIN (id);
1274 def;
1275 def = DF_REF_NEXT_REG (def))
1276 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1277 {
1278 if (dump_file)
1279 fprintf (dump_file, "Removing insn %d from candidates list\n",
1280 DF_REF_INSN_UID (def));
1281
1282 bitmap_clear_bit (candidates, DF_REF_INSN_UID (def));
1283 }
1284
1285 for (df_ref ref = DF_REG_USE_CHAIN (id);
1286 ref;
1287 ref = DF_REF_NEXT_REG (ref))
1288 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
1289 {
1290 if (dump_file)
1291 fprintf (dump_file, "Removing insn %d from candidates list\n",
1292 DF_REF_INSN_UID (ref));
1293
1294 bitmap_clear_bit (candidates, DF_REF_INSN_UID (ref));
1295 }
1296 }
1297
1298 BITMAP_FREE (regs);
1299 }
1300
1301 /* For a given bitmap of insn UIDs scans all instruction and
1302 remove insn from CANDIDATES in case it has both convertible
1303 and not convertible definitions.
1304
1305 All insns in a bitmap are conversion candidates according to
1306 scalar_to_vector_candidate_p. Currently it implies all insns
1307 are single_set. */
1308
1309 static void
1310 remove_non_convertible_regs (bitmap candidates)
1311 {
1312 if (TARGET_64BIT)
1313 timode_remove_non_convertible_regs (candidates);
1314 else
1315 dimode_remove_non_convertible_regs (candidates);
1316 }
1317
1318 class scalar_chain
1319 {
1320 public:
1321 scalar_chain ();
1322 virtual ~scalar_chain ();
1323
1324 static unsigned max_id;
1325
1326 /* ID of a chain. */
1327 unsigned int chain_id;
1328 /* A queue of instructions to be included into a chain. */
1329 bitmap queue;
1330 /* Instructions included into a chain. */
1331 bitmap insns;
1332 /* All registers defined by a chain. */
1333 bitmap defs;
1334 /* Registers used in both vector and sclar modes. */
1335 bitmap defs_conv;
1336
1337 void build (bitmap candidates, unsigned insn_uid);
1338 virtual int compute_convert_gain () = 0;
1339 int convert ();
1340
1341 protected:
1342 void add_to_queue (unsigned insn_uid);
1343 void emit_conversion_insns (rtx insns, rtx_insn *pos);
1344
1345 private:
1346 void add_insn (bitmap candidates, unsigned insn_uid);
1347 void analyze_register_chain (bitmap candidates, df_ref ref);
1348 virtual void mark_dual_mode_def (df_ref def) = 0;
1349 virtual void convert_insn (rtx_insn *insn) = 0;
1350 virtual void convert_registers () = 0;
1351 };
1352
1353 class dimode_scalar_chain : public scalar_chain
1354 {
1355 public:
1356 int compute_convert_gain ();
1357 private:
1358 void mark_dual_mode_def (df_ref def);
1359 rtx replace_with_subreg (rtx x, rtx reg, rtx subreg);
1360 void replace_with_subreg_in_insn (rtx_insn *insn, rtx reg, rtx subreg);
1361 void convert_insn (rtx_insn *insn);
1362 void convert_op (rtx *op, rtx_insn *insn);
1363 void convert_reg (unsigned regno);
1364 void make_vector_copies (unsigned regno);
1365 void convert_registers ();
1366 int vector_const_cost (rtx exp);
1367 };
1368
1369 class timode_scalar_chain : public scalar_chain
1370 {
1371 public:
1372 /* Convert from TImode to V1TImode is always faster. */
1373 int compute_convert_gain () { return 1; }
1374
1375 private:
1376 void mark_dual_mode_def (df_ref def);
1377 void fix_debug_reg_uses (rtx reg);
1378 void convert_insn (rtx_insn *insn);
1379 /* We don't convert registers to difference size. */
1380 void convert_registers () {}
1381 };
1382
1383 unsigned scalar_chain::max_id = 0;
1384
1385 /* Initialize new chain. */
1386
1387 scalar_chain::scalar_chain ()
1388 {
1389 chain_id = ++max_id;
1390
1391 if (dump_file)
1392 fprintf (dump_file, "Created a new instruction chain #%d\n", chain_id);
1393
1394 bitmap_obstack_initialize (NULL);
1395 insns = BITMAP_ALLOC (NULL);
1396 defs = BITMAP_ALLOC (NULL);
1397 defs_conv = BITMAP_ALLOC (NULL);
1398 queue = NULL;
1399 }
1400
1401 /* Free chain's data. */
1402
1403 scalar_chain::~scalar_chain ()
1404 {
1405 BITMAP_FREE (insns);
1406 BITMAP_FREE (defs);
1407 BITMAP_FREE (defs_conv);
1408 bitmap_obstack_release (NULL);
1409 }
1410
1411 /* Add instruction into chains' queue. */
1412
1413 void
1414 scalar_chain::add_to_queue (unsigned insn_uid)
1415 {
1416 if (bitmap_bit_p (insns, insn_uid)
1417 || bitmap_bit_p (queue, insn_uid))
1418 return;
1419
1420 if (dump_file)
1421 fprintf (dump_file, " Adding insn %d into chain's #%d queue\n",
1422 insn_uid, chain_id);
1423 bitmap_set_bit (queue, insn_uid);
1424 }
1425
1426 /* For DImode conversion, mark register defined by DEF as requiring
1427 conversion. */
1428
1429 void
1430 dimode_scalar_chain::mark_dual_mode_def (df_ref def)
1431 {
1432 gcc_assert (DF_REF_REG_DEF_P (def));
1433
1434 if (bitmap_bit_p (defs_conv, DF_REF_REGNO (def)))
1435 return;
1436
1437 if (dump_file)
1438 fprintf (dump_file,
1439 " Mark r%d def in insn %d as requiring both modes in chain #%d\n",
1440 DF_REF_REGNO (def), DF_REF_INSN_UID (def), chain_id);
1441
1442 bitmap_set_bit (defs_conv, DF_REF_REGNO (def));
1443 }
1444
1445 /* For TImode conversion, it is unused. */
1446
1447 void
1448 timode_scalar_chain::mark_dual_mode_def (df_ref)
1449 {
1450 gcc_unreachable ();
1451 }
1452
1453 /* Check REF's chain to add new insns into a queue
1454 and find registers requiring conversion. */
1455
1456 void
1457 scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref)
1458 {
1459 df_link *chain;
1460
1461 gcc_assert (bitmap_bit_p (insns, DF_REF_INSN_UID (ref))
1462 || bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)));
1463 add_to_queue (DF_REF_INSN_UID (ref));
1464
1465 for (chain = DF_REF_CHAIN (ref); chain; chain = chain->next)
1466 {
1467 unsigned uid = DF_REF_INSN_UID (chain->ref);
1468
1469 if (!NONDEBUG_INSN_P (DF_REF_INSN (chain->ref)))
1470 continue;
1471
1472 if (!DF_REF_REG_MEM_P (chain->ref))
1473 {
1474 if (bitmap_bit_p (insns, uid))
1475 continue;
1476
1477 if (bitmap_bit_p (candidates, uid))
1478 {
1479 add_to_queue (uid);
1480 continue;
1481 }
1482 }
1483
1484 if (DF_REF_REG_DEF_P (chain->ref))
1485 {
1486 if (dump_file)
1487 fprintf (dump_file, " r%d def in insn %d isn't convertible\n",
1488 DF_REF_REGNO (chain->ref), uid);
1489 mark_dual_mode_def (chain->ref);
1490 }
1491 else
1492 {
1493 if (dump_file)
1494 fprintf (dump_file, " r%d use in insn %d isn't convertible\n",
1495 DF_REF_REGNO (chain->ref), uid);
1496 mark_dual_mode_def (ref);
1497 }
1498 }
1499 }
1500
1501 /* Add instruction into a chain. */
1502
1503 void
1504 scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid)
1505 {
1506 if (bitmap_bit_p (insns, insn_uid))
1507 return;
1508
1509 if (dump_file)
1510 fprintf (dump_file, " Adding insn %d to chain #%d\n", insn_uid, chain_id);
1511
1512 bitmap_set_bit (insns, insn_uid);
1513
1514 rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
1515 rtx def_set = single_set (insn);
1516 if (def_set && REG_P (SET_DEST (def_set))
1517 && !HARD_REGISTER_P (SET_DEST (def_set)))
1518 bitmap_set_bit (defs, REGNO (SET_DEST (def_set)));
1519
1520 df_ref ref;
1521 df_ref def;
1522 for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
1523 if (!HARD_REGISTER_P (DF_REF_REG (ref)))
1524 for (def = DF_REG_DEF_CHAIN (DF_REF_REGNO (ref));
1525 def;
1526 def = DF_REF_NEXT_REG (def))
1527 analyze_register_chain (candidates, def);
1528 for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
1529 if (!DF_REF_REG_MEM_P (ref))
1530 analyze_register_chain (candidates, ref);
1531 }
1532
1533 /* Build new chain starting from insn INSN_UID recursively
1534 adding all dependent uses and definitions. */
1535
1536 void
1537 scalar_chain::build (bitmap candidates, unsigned insn_uid)
1538 {
1539 queue = BITMAP_ALLOC (NULL);
1540 bitmap_set_bit (queue, insn_uid);
1541
1542 if (dump_file)
1543 fprintf (dump_file, "Building chain #%d...\n", chain_id);
1544
1545 while (!bitmap_empty_p (queue))
1546 {
1547 insn_uid = bitmap_first_set_bit (queue);
1548 bitmap_clear_bit (queue, insn_uid);
1549 bitmap_clear_bit (candidates, insn_uid);
1550 add_insn (candidates, insn_uid);
1551 }
1552
1553 if (dump_file)
1554 {
1555 fprintf (dump_file, "Collected chain #%d...\n", chain_id);
1556 fprintf (dump_file, " insns: ");
1557 dump_bitmap (dump_file, insns);
1558 if (!bitmap_empty_p (defs_conv))
1559 {
1560 bitmap_iterator bi;
1561 unsigned id;
1562 const char *comma = "";
1563 fprintf (dump_file, " defs to convert: ");
1564 EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi)
1565 {
1566 fprintf (dump_file, "%sr%d", comma, id);
1567 comma = ", ";
1568 }
1569 fprintf (dump_file, "\n");
1570 }
1571 }
1572
1573 BITMAP_FREE (queue);
1574 }
1575
1576 /* Return a cost of building a vector costant
1577 instead of using a scalar one. */
1578
1579 int
1580 dimode_scalar_chain::vector_const_cost (rtx exp)
1581 {
1582 gcc_assert (CONST_INT_P (exp));
1583
1584 if (standard_sse_constant_p (exp, V2DImode))
1585 return COSTS_N_INSNS (1);
1586 return ix86_cost->sse_load[1];
1587 }
1588
1589 /* Compute a gain for chain conversion. */
1590
1591 int
1592 dimode_scalar_chain::compute_convert_gain ()
1593 {
1594 bitmap_iterator bi;
1595 unsigned insn_uid;
1596 int gain = 0;
1597 int cost = 0;
1598
1599 if (dump_file)
1600 fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id);
1601
1602 EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi)
1603 {
1604 rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
1605 rtx def_set = single_set (insn);
1606 rtx src = SET_SRC (def_set);
1607 rtx dst = SET_DEST (def_set);
1608
1609 if (REG_P (src) && REG_P (dst))
1610 gain += COSTS_N_INSNS (2) - ix86_cost->xmm_move;
1611 else if (REG_P (src) && MEM_P (dst))
1612 gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1];
1613 else if (MEM_P (src) && REG_P (dst))
1614 gain += 2 * ix86_cost->int_load[2] - ix86_cost->sse_load[1];
1615 else if (GET_CODE (src) == ASHIFT
1616 || GET_CODE (src) == ASHIFTRT
1617 || GET_CODE (src) == LSHIFTRT)
1618 {
1619 if (CONST_INT_P (XEXP (src, 0)))
1620 gain -= vector_const_cost (XEXP (src, 0));
1621 if (CONST_INT_P (XEXP (src, 1)))
1622 {
1623 gain += ix86_cost->shift_const;
1624 if (INTVAL (XEXP (src, 1)) >= 32)
1625 gain -= COSTS_N_INSNS (1);
1626 }
1627 else
1628 /* Additional gain for omitting two CMOVs. */
1629 gain += ix86_cost->shift_var + COSTS_N_INSNS (2);
1630 }
1631 else if (GET_CODE (src) == PLUS
1632 || GET_CODE (src) == MINUS
1633 || GET_CODE (src) == IOR
1634 || GET_CODE (src) == XOR
1635 || GET_CODE (src) == AND)
1636 {
1637 gain += ix86_cost->add;
1638 /* Additional gain for andnot for targets without BMI. */
1639 if (GET_CODE (XEXP (src, 0)) == NOT
1640 && !TARGET_BMI)
1641 gain += 2 * ix86_cost->add;
1642
1643 if (CONST_INT_P (XEXP (src, 0)))
1644 gain -= vector_const_cost (XEXP (src, 0));
1645 if (CONST_INT_P (XEXP (src, 1)))
1646 gain -= vector_const_cost (XEXP (src, 1));
1647 }
1648 else if (GET_CODE (src) == NEG
1649 || GET_CODE (src) == NOT)
1650 gain += ix86_cost->add - COSTS_N_INSNS (1);
1651 else if (GET_CODE (src) == COMPARE)
1652 {
1653 /* Assume comparison cost is the same. */
1654 }
1655 else if (CONST_INT_P (src))
1656 {
1657 if (REG_P (dst))
1658 gain += COSTS_N_INSNS (2);
1659 else if (MEM_P (dst))
1660 gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1];
1661 gain -= vector_const_cost (src);
1662 }
1663 else
1664 gcc_unreachable ();
1665 }
1666
1667 if (dump_file)
1668 fprintf (dump_file, " Instruction conversion gain: %d\n", gain);
1669
1670 EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, insn_uid, bi)
1671 cost += DF_REG_DEF_COUNT (insn_uid) * ix86_cost->mmxsse_to_integer;
1672
1673 if (dump_file)
1674 fprintf (dump_file, " Registers conversion cost: %d\n", cost);
1675
1676 gain -= cost;
1677
1678 if (dump_file)
1679 fprintf (dump_file, " Total gain: %d\n", gain);
1680
1681 return gain;
1682 }
1683
1684 /* Replace REG in X with a V2DI subreg of NEW_REG. */
1685
1686 rtx
1687 dimode_scalar_chain::replace_with_subreg (rtx x, rtx reg, rtx new_reg)
1688 {
1689 if (x == reg)
1690 return gen_rtx_SUBREG (V2DImode, new_reg, 0);
1691
1692 const char *fmt = GET_RTX_FORMAT (GET_CODE (x));
1693 int i, j;
1694 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
1695 {
1696 if (fmt[i] == 'e')
1697 XEXP (x, i) = replace_with_subreg (XEXP (x, i), reg, new_reg);
1698 else if (fmt[i] == 'E')
1699 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
1700 XVECEXP (x, i, j) = replace_with_subreg (XVECEXP (x, i, j),
1701 reg, new_reg);
1702 }
1703
1704 return x;
1705 }
1706
1707 /* Replace REG in INSN with a V2DI subreg of NEW_REG. */
1708
1709 void
1710 dimode_scalar_chain::replace_with_subreg_in_insn (rtx_insn *insn,
1711 rtx reg, rtx new_reg)
1712 {
1713 replace_with_subreg (single_set (insn), reg, new_reg);
1714 }
1715
1716 /* Insert generated conversion instruction sequence INSNS
1717 after instruction AFTER. New BB may be required in case
1718 instruction has EH region attached. */
1719
1720 void
1721 scalar_chain::emit_conversion_insns (rtx insns, rtx_insn *after)
1722 {
1723 if (!control_flow_insn_p (after))
1724 {
1725 emit_insn_after (insns, after);
1726 return;
1727 }
1728
1729 basic_block bb = BLOCK_FOR_INSN (after);
1730 edge e = find_fallthru_edge (bb->succs);
1731 gcc_assert (e);
1732
1733 basic_block new_bb = split_edge (e);
1734 emit_insn_after (insns, BB_HEAD (new_bb));
1735 }
1736
1737 /* Make vector copies for all register REGNO definitions
1738 and replace its uses in a chain. */
1739
1740 void
1741 dimode_scalar_chain::make_vector_copies (unsigned regno)
1742 {
1743 rtx reg = regno_reg_rtx[regno];
1744 rtx vreg = gen_reg_rtx (DImode);
1745 bool count_reg = false;
1746 df_ref ref;
1747
1748 for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
1749 if (!bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
1750 {
1751 df_ref use;
1752
1753 /* Detect the count register of a shift instruction. */
1754 for (use = DF_REG_USE_CHAIN (regno); use; use = DF_REF_NEXT_REG (use))
1755 if (bitmap_bit_p (insns, DF_REF_INSN_UID (use)))
1756 {
1757 rtx_insn *insn = DF_REF_INSN (use);
1758 rtx def_set = single_set (insn);
1759
1760 gcc_assert (def_set);
1761
1762 rtx src = SET_SRC (def_set);
1763
1764 if ((GET_CODE (src) == ASHIFT
1765 || GET_CODE (src) == ASHIFTRT
1766 || GET_CODE (src) == LSHIFTRT)
1767 && !CONST_INT_P (XEXP (src, 1))
1768 && reg_or_subregno (XEXP (src, 1)) == regno)
1769 count_reg = true;
1770 }
1771
1772 start_sequence ();
1773 if (count_reg)
1774 {
1775 rtx qreg = gen_lowpart (QImode, reg);
1776 rtx tmp = gen_reg_rtx (SImode);
1777
1778 if (TARGET_ZERO_EXTEND_WITH_AND
1779 && optimize_function_for_speed_p (cfun))
1780 {
1781 emit_move_insn (tmp, const0_rtx);
1782 emit_insn (gen_movstrictqi
1783 (gen_lowpart (QImode, tmp), qreg));
1784 }
1785 else
1786 emit_insn (gen_rtx_SET
1787 (tmp, gen_rtx_ZERO_EXTEND (SImode, qreg)));
1788
1789 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
1790 {
1791 rtx slot = assign_386_stack_local (SImode, SLOT_STV_TEMP);
1792 emit_move_insn (slot, tmp);
1793 tmp = copy_rtx (slot);
1794 }
1795
1796 emit_insn (gen_zero_extendsidi2 (vreg, tmp));
1797 }
1798 else if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
1799 {
1800 rtx tmp = assign_386_stack_local (DImode, SLOT_STV_TEMP);
1801 emit_move_insn (adjust_address (tmp, SImode, 0),
1802 gen_rtx_SUBREG (SImode, reg, 0));
1803 emit_move_insn (adjust_address (tmp, SImode, 4),
1804 gen_rtx_SUBREG (SImode, reg, 4));
1805 emit_move_insn (vreg, tmp);
1806 }
1807 else if (TARGET_SSE4_1)
1808 {
1809 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
1810 CONST0_RTX (V4SImode),
1811 gen_rtx_SUBREG (SImode, reg, 0)));
1812 emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode, vreg, 0),
1813 gen_rtx_SUBREG (V4SImode, vreg, 0),
1814 gen_rtx_SUBREG (SImode, reg, 4),
1815 GEN_INT (2)));
1816 }
1817 else
1818 {
1819 rtx tmp = gen_reg_rtx (DImode);
1820 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
1821 CONST0_RTX (V4SImode),
1822 gen_rtx_SUBREG (SImode, reg, 0)));
1823 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, tmp, 0),
1824 CONST0_RTX (V4SImode),
1825 gen_rtx_SUBREG (SImode, reg, 4)));
1826 emit_insn (gen_vec_interleave_lowv4si
1827 (gen_rtx_SUBREG (V4SImode, vreg, 0),
1828 gen_rtx_SUBREG (V4SImode, vreg, 0),
1829 gen_rtx_SUBREG (V4SImode, tmp, 0)));
1830 }
1831 rtx_insn *seq = get_insns ();
1832 end_sequence ();
1833 rtx_insn *insn = DF_REF_INSN (ref);
1834 emit_conversion_insns (seq, insn);
1835
1836 if (dump_file)
1837 fprintf (dump_file,
1838 " Copied r%d to a vector register r%d for insn %d\n",
1839 regno, REGNO (vreg), INSN_UID (insn));
1840 }
1841
1842 for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
1843 if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
1844 {
1845 rtx_insn *insn = DF_REF_INSN (ref);
1846 if (count_reg)
1847 {
1848 rtx def_set = single_set (insn);
1849 gcc_assert (def_set);
1850
1851 rtx src = SET_SRC (def_set);
1852
1853 if ((GET_CODE (src) == ASHIFT
1854 || GET_CODE (src) == ASHIFTRT
1855 || GET_CODE (src) == LSHIFTRT)
1856 && !CONST_INT_P (XEXP (src, 1))
1857 && reg_or_subregno (XEXP (src, 1)) == regno)
1858 XEXP (src, 1) = vreg;
1859 }
1860 else
1861 replace_with_subreg_in_insn (insn, reg, vreg);
1862
1863 if (dump_file)
1864 fprintf (dump_file, " Replaced r%d with r%d in insn %d\n",
1865 regno, REGNO (vreg), INSN_UID (insn));
1866 }
1867 }
1868
1869 /* Convert all definitions of register REGNO
1870 and fix its uses. Scalar copies may be created
1871 in case register is used in not convertible insn. */
1872
1873 void
1874 dimode_scalar_chain::convert_reg (unsigned regno)
1875 {
1876 bool scalar_copy = bitmap_bit_p (defs_conv, regno);
1877 rtx reg = regno_reg_rtx[regno];
1878 rtx scopy = NULL_RTX;
1879 df_ref ref;
1880 bitmap conv;
1881
1882 conv = BITMAP_ALLOC (NULL);
1883 bitmap_copy (conv, insns);
1884
1885 if (scalar_copy)
1886 scopy = gen_reg_rtx (DImode);
1887
1888 for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
1889 {
1890 rtx_insn *insn = DF_REF_INSN (ref);
1891 rtx def_set = single_set (insn);
1892 rtx src = SET_SRC (def_set);
1893 rtx reg = DF_REF_REG (ref);
1894
1895 if (!MEM_P (src))
1896 {
1897 replace_with_subreg_in_insn (insn, reg, reg);
1898 bitmap_clear_bit (conv, INSN_UID (insn));
1899 }
1900
1901 if (scalar_copy)
1902 {
1903 start_sequence ();
1904 if (!TARGET_INTER_UNIT_MOVES_FROM_VEC)
1905 {
1906 rtx tmp = assign_386_stack_local (DImode, SLOT_STV_TEMP);
1907 emit_move_insn (tmp, reg);
1908 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0),
1909 adjust_address (tmp, SImode, 0));
1910 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4),
1911 adjust_address (tmp, SImode, 4));
1912 }
1913 else if (TARGET_SSE4_1)
1914 {
1915 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
1916 emit_insn
1917 (gen_rtx_SET
1918 (gen_rtx_SUBREG (SImode, scopy, 0),
1919 gen_rtx_VEC_SELECT (SImode,
1920 gen_rtx_SUBREG (V4SImode, reg, 0), tmp)));
1921
1922 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const1_rtx));
1923 emit_insn
1924 (gen_rtx_SET
1925 (gen_rtx_SUBREG (SImode, scopy, 4),
1926 gen_rtx_VEC_SELECT (SImode,
1927 gen_rtx_SUBREG (V4SImode, reg, 0), tmp)));
1928 }
1929 else
1930 {
1931 rtx vcopy = gen_reg_rtx (V2DImode);
1932 emit_move_insn (vcopy, gen_rtx_SUBREG (V2DImode, reg, 0));
1933 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0),
1934 gen_rtx_SUBREG (SImode, vcopy, 0));
1935 emit_move_insn (vcopy,
1936 gen_rtx_LSHIFTRT (V2DImode, vcopy, GEN_INT (32)));
1937 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4),
1938 gen_rtx_SUBREG (SImode, vcopy, 0));
1939 }
1940 rtx_insn *seq = get_insns ();
1941 end_sequence ();
1942 emit_conversion_insns (seq, insn);
1943
1944 if (dump_file)
1945 fprintf (dump_file,
1946 " Copied r%d to a scalar register r%d for insn %d\n",
1947 regno, REGNO (scopy), INSN_UID (insn));
1948 }
1949 }
1950
1951 for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
1952 if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
1953 {
1954 if (bitmap_bit_p (conv, DF_REF_INSN_UID (ref)))
1955 {
1956 rtx_insn *insn = DF_REF_INSN (ref);
1957
1958 rtx def_set = single_set (insn);
1959 gcc_assert (def_set);
1960
1961 rtx src = SET_SRC (def_set);
1962 rtx dst = SET_DEST (def_set);
1963
1964 if ((GET_CODE (src) == ASHIFT
1965 || GET_CODE (src) == ASHIFTRT
1966 || GET_CODE (src) == LSHIFTRT)
1967 && !CONST_INT_P (XEXP (src, 1))
1968 && reg_or_subregno (XEXP (src, 1)) == regno)
1969 {
1970 rtx tmp2 = gen_reg_rtx (V2DImode);
1971
1972 start_sequence ();
1973
1974 if (TARGET_SSE4_1)
1975 emit_insn (gen_sse4_1_zero_extendv2qiv2di2
1976 (tmp2, gen_rtx_SUBREG (V16QImode, reg, 0)));
1977 else
1978 {
1979 rtx vec_cst
1980 = gen_rtx_CONST_VECTOR (V2DImode,
1981 gen_rtvec (2, GEN_INT (0xff),
1982 const0_rtx));
1983 vec_cst
1984 = validize_mem (force_const_mem (V2DImode, vec_cst));
1985
1986 emit_insn (gen_rtx_SET
1987 (tmp2,
1988 gen_rtx_AND (V2DImode,
1989 gen_rtx_SUBREG (V2DImode, reg, 0),
1990 vec_cst)));
1991 }
1992 rtx_insn *seq = get_insns ();
1993 end_sequence ();
1994
1995 emit_insn_before (seq, insn);
1996
1997 XEXP (src, 1) = gen_rtx_SUBREG (DImode, tmp2, 0);
1998 }
1999 else if (!MEM_P (dst) || !REG_P (src))
2000 replace_with_subreg_in_insn (insn, reg, reg);
2001
2002 bitmap_clear_bit (conv, INSN_UID (insn));
2003 }
2004 }
2005 /* Skip debug insns and uninitialized uses. */
2006 else if (DF_REF_CHAIN (ref)
2007 && NONDEBUG_INSN_P (DF_REF_INSN (ref)))
2008 {
2009 gcc_assert (scopy);
2010 replace_rtx (DF_REF_INSN (ref), reg, scopy);
2011 df_insn_rescan (DF_REF_INSN (ref));
2012 }
2013
2014 BITMAP_FREE (conv);
2015 }
2016
2017 /* Convert operand OP in INSN. We should handle
2018 memory operands and uninitialized registers.
2019 All other register uses are converted during
2020 registers conversion. */
2021
2022 void
2023 dimode_scalar_chain::convert_op (rtx *op, rtx_insn *insn)
2024 {
2025 *op = copy_rtx_if_shared (*op);
2026
2027 if (GET_CODE (*op) == NOT)
2028 {
2029 convert_op (&XEXP (*op, 0), insn);
2030 PUT_MODE (*op, V2DImode);
2031 }
2032 else if (MEM_P (*op))
2033 {
2034 rtx tmp = gen_reg_rtx (DImode);
2035
2036 emit_insn_before (gen_move_insn (tmp, *op), insn);
2037 *op = gen_rtx_SUBREG (V2DImode, tmp, 0);
2038
2039 if (dump_file)
2040 fprintf (dump_file, " Preloading operand for insn %d into r%d\n",
2041 INSN_UID (insn), REGNO (tmp));
2042 }
2043 else if (REG_P (*op))
2044 {
2045 /* We may have not converted register usage in case
2046 this register has no definition. Otherwise it
2047 should be converted in convert_reg. */
2048 df_ref ref;
2049 FOR_EACH_INSN_USE (ref, insn)
2050 if (DF_REF_REGNO (ref) == REGNO (*op))
2051 {
2052 gcc_assert (!DF_REF_CHAIN (ref));
2053 break;
2054 }
2055 *op = gen_rtx_SUBREG (V2DImode, *op, 0);
2056 }
2057 else if (CONST_INT_P (*op))
2058 {
2059 rtx vec_cst;
2060 rtx tmp = gen_rtx_SUBREG (V2DImode, gen_reg_rtx (DImode), 0);
2061
2062 /* Prefer all ones vector in case of -1. */
2063 if (constm1_operand (*op, GET_MODE (*op)))
2064 vec_cst = CONSTM1_RTX (V2DImode);
2065 else
2066 vec_cst = gen_rtx_CONST_VECTOR (V2DImode,
2067 gen_rtvec (2, *op, const0_rtx));
2068
2069 if (!standard_sse_constant_p (vec_cst, V2DImode))
2070 {
2071 start_sequence ();
2072 vec_cst = validize_mem (force_const_mem (V2DImode, vec_cst));
2073 rtx_insn *seq = get_insns ();
2074 end_sequence ();
2075 emit_insn_before (seq, insn);
2076 }
2077
2078 emit_insn_before (gen_move_insn (copy_rtx (tmp), vec_cst), insn);
2079 *op = tmp;
2080 }
2081 else
2082 {
2083 gcc_assert (SUBREG_P (*op));
2084 gcc_assert (GET_MODE (*op) == V2DImode);
2085 }
2086 }
2087
2088 /* Convert INSN to vector mode. */
2089
2090 void
2091 dimode_scalar_chain::convert_insn (rtx_insn *insn)
2092 {
2093 rtx def_set = single_set (insn);
2094 rtx src = SET_SRC (def_set);
2095 rtx dst = SET_DEST (def_set);
2096 rtx subreg;
2097
2098 if (MEM_P (dst) && !REG_P (src))
2099 {
2100 /* There are no scalar integer instructions and therefore
2101 temporary register usage is required. */
2102 rtx tmp = gen_reg_rtx (DImode);
2103 emit_conversion_insns (gen_move_insn (dst, tmp), insn);
2104 dst = gen_rtx_SUBREG (V2DImode, tmp, 0);
2105 }
2106
2107 switch (GET_CODE (src))
2108 {
2109 case ASHIFT:
2110 case ASHIFTRT:
2111 case LSHIFTRT:
2112 convert_op (&XEXP (src, 0), insn);
2113 PUT_MODE (src, V2DImode);
2114 break;
2115
2116 case PLUS:
2117 case MINUS:
2118 case IOR:
2119 case XOR:
2120 case AND:
2121 convert_op (&XEXP (src, 0), insn);
2122 convert_op (&XEXP (src, 1), insn);
2123 PUT_MODE (src, V2DImode);
2124 break;
2125
2126 case NEG:
2127 src = XEXP (src, 0);
2128 convert_op (&src, insn);
2129 subreg = gen_reg_rtx (V2DImode);
2130 emit_insn_before (gen_move_insn (subreg, CONST0_RTX (V2DImode)), insn);
2131 src = gen_rtx_MINUS (V2DImode, subreg, src);
2132 break;
2133
2134 case NOT:
2135 src = XEXP (src, 0);
2136 convert_op (&src, insn);
2137 subreg = gen_reg_rtx (V2DImode);
2138 emit_insn_before (gen_move_insn (subreg, CONSTM1_RTX (V2DImode)), insn);
2139 src = gen_rtx_XOR (V2DImode, src, subreg);
2140 break;
2141
2142 case MEM:
2143 if (!REG_P (dst))
2144 convert_op (&src, insn);
2145 break;
2146
2147 case REG:
2148 if (!MEM_P (dst))
2149 convert_op (&src, insn);
2150 break;
2151
2152 case SUBREG:
2153 gcc_assert (GET_MODE (src) == V2DImode);
2154 break;
2155
2156 case COMPARE:
2157 src = SUBREG_REG (XEXP (XEXP (src, 0), 0));
2158
2159 gcc_assert ((REG_P (src) && GET_MODE (src) == DImode)
2160 || (SUBREG_P (src) && GET_MODE (src) == V2DImode));
2161
2162 if (REG_P (src))
2163 subreg = gen_rtx_SUBREG (V2DImode, src, 0);
2164 else
2165 subreg = copy_rtx_if_shared (src);
2166 emit_insn_before (gen_vec_interleave_lowv2di (copy_rtx_if_shared (subreg),
2167 copy_rtx_if_shared (subreg),
2168 copy_rtx_if_shared (subreg)),
2169 insn);
2170 dst = gen_rtx_REG (CCmode, FLAGS_REG);
2171 src = gen_rtx_UNSPEC (CCmode, gen_rtvec (2, copy_rtx_if_shared (src),
2172 copy_rtx_if_shared (src)),
2173 UNSPEC_PTEST);
2174 break;
2175
2176 case CONST_INT:
2177 convert_op (&src, insn);
2178 break;
2179
2180 default:
2181 gcc_unreachable ();
2182 }
2183
2184 SET_SRC (def_set) = src;
2185 SET_DEST (def_set) = dst;
2186
2187 /* Drop possible dead definitions. */
2188 PATTERN (insn) = def_set;
2189
2190 INSN_CODE (insn) = -1;
2191 recog_memoized (insn);
2192 df_insn_rescan (insn);
2193 }
2194
2195 /* Fix uses of converted REG in debug insns. */
2196
2197 void
2198 timode_scalar_chain::fix_debug_reg_uses (rtx reg)
2199 {
2200 if (!flag_var_tracking)
2201 return;
2202
2203 df_ref ref, next;
2204 for (ref = DF_REG_USE_CHAIN (REGNO (reg)); ref; ref = next)
2205 {
2206 rtx_insn *insn = DF_REF_INSN (ref);
2207 /* Make sure the next ref is for a different instruction,
2208 so that we're not affected by the rescan. */
2209 next = DF_REF_NEXT_REG (ref);
2210 while (next && DF_REF_INSN (next) == insn)
2211 next = DF_REF_NEXT_REG (next);
2212
2213 if (DEBUG_INSN_P (insn))
2214 {
2215 /* It may be a debug insn with a TImode variable in
2216 register. */
2217 bool changed = false;
2218 for (; ref != next; ref = DF_REF_NEXT_REG (ref))
2219 {
2220 rtx *loc = DF_REF_LOC (ref);
2221 if (REG_P (*loc) && GET_MODE (*loc) == V1TImode)
2222 {
2223 *loc = gen_rtx_SUBREG (TImode, *loc, 0);
2224 changed = true;
2225 }
2226 }
2227 if (changed)
2228 df_insn_rescan (insn);
2229 }
2230 }
2231 }
2232
2233 /* Convert INSN from TImode to V1T1mode. */
2234
2235 void
2236 timode_scalar_chain::convert_insn (rtx_insn *insn)
2237 {
2238 rtx def_set = single_set (insn);
2239 rtx src = SET_SRC (def_set);
2240 rtx dst = SET_DEST (def_set);
2241
2242 switch (GET_CODE (dst))
2243 {
2244 case REG:
2245 {
2246 rtx tmp = find_reg_equal_equiv_note (insn);
2247 if (tmp)
2248 PUT_MODE (XEXP (tmp, 0), V1TImode);
2249 PUT_MODE (dst, V1TImode);
2250 fix_debug_reg_uses (dst);
2251 }
2252 break;
2253 case MEM:
2254 PUT_MODE (dst, V1TImode);
2255 break;
2256
2257 default:
2258 gcc_unreachable ();
2259 }
2260
2261 switch (GET_CODE (src))
2262 {
2263 case REG:
2264 PUT_MODE (src, V1TImode);
2265 /* Call fix_debug_reg_uses only if SRC is never defined. */
2266 if (!DF_REG_DEF_CHAIN (REGNO (src)))
2267 fix_debug_reg_uses (src);
2268 break;
2269
2270 case MEM:
2271 PUT_MODE (src, V1TImode);
2272 break;
2273
2274 case CONST_WIDE_INT:
2275 if (NONDEBUG_INSN_P (insn))
2276 {
2277 /* Since there are no instructions to store 128-bit constant,
2278 temporary register usage is required. */
2279 rtx tmp = gen_reg_rtx (V1TImode);
2280 start_sequence ();
2281 src = gen_rtx_CONST_VECTOR (V1TImode, gen_rtvec (1, src));
2282 src = validize_mem (force_const_mem (V1TImode, src));
2283 rtx_insn *seq = get_insns ();
2284 end_sequence ();
2285 if (seq)
2286 emit_insn_before (seq, insn);
2287 emit_conversion_insns (gen_rtx_SET (dst, tmp), insn);
2288 dst = tmp;
2289 }
2290 break;
2291
2292 case CONST_INT:
2293 switch (standard_sse_constant_p (src, TImode))
2294 {
2295 case 1:
2296 src = CONST0_RTX (GET_MODE (dst));
2297 break;
2298 case 2:
2299 src = CONSTM1_RTX (GET_MODE (dst));
2300 break;
2301 default:
2302 gcc_unreachable ();
2303 }
2304 if (NONDEBUG_INSN_P (insn))
2305 {
2306 rtx tmp = gen_reg_rtx (V1TImode);
2307 /* Since there are no instructions to store standard SSE
2308 constant, temporary register usage is required. */
2309 emit_conversion_insns (gen_rtx_SET (dst, tmp), insn);
2310 dst = tmp;
2311 }
2312 break;
2313
2314 default:
2315 gcc_unreachable ();
2316 }
2317
2318 SET_SRC (def_set) = src;
2319 SET_DEST (def_set) = dst;
2320
2321 /* Drop possible dead definitions. */
2322 PATTERN (insn) = def_set;
2323
2324 INSN_CODE (insn) = -1;
2325 recog_memoized (insn);
2326 df_insn_rescan (insn);
2327 }
2328
2329 void
2330 dimode_scalar_chain::convert_registers ()
2331 {
2332 bitmap_iterator bi;
2333 unsigned id;
2334
2335 EXECUTE_IF_SET_IN_BITMAP (defs, 0, id, bi)
2336 convert_reg (id);
2337
2338 EXECUTE_IF_AND_COMPL_IN_BITMAP (defs_conv, defs, 0, id, bi)
2339 make_vector_copies (id);
2340 }
2341
2342 /* Convert whole chain creating required register
2343 conversions and copies. */
2344
2345 int
2346 scalar_chain::convert ()
2347 {
2348 bitmap_iterator bi;
2349 unsigned id;
2350 int converted_insns = 0;
2351
2352 if (!dbg_cnt (stv_conversion))
2353 return 0;
2354
2355 if (dump_file)
2356 fprintf (dump_file, "Converting chain #%d...\n", chain_id);
2357
2358 convert_registers ();
2359
2360 EXECUTE_IF_SET_IN_BITMAP (insns, 0, id, bi)
2361 {
2362 convert_insn (DF_INSN_UID_GET (id)->insn);
2363 converted_insns++;
2364 }
2365
2366 return converted_insns;
2367 }
2368
2369 /* Main STV pass function. Find and convert scalar
2370 instructions into vector mode when profitable. */
2371
2372 static unsigned int
2373 convert_scalars_to_vector ()
2374 {
2375 basic_block bb;
2376 bitmap candidates;
2377 int converted_insns = 0;
2378
2379 bitmap_obstack_initialize (NULL);
2380 candidates = BITMAP_ALLOC (NULL);
2381
2382 calculate_dominance_info (CDI_DOMINATORS);
2383 df_set_flags (DF_DEFER_INSN_RESCAN);
2384 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
2385 df_md_add_problem ();
2386 df_analyze ();
2387
2388 /* Find all instructions we want to convert into vector mode. */
2389 if (dump_file)
2390 fprintf (dump_file, "Searching for mode conversion candidates...\n");
2391
2392 FOR_EACH_BB_FN (bb, cfun)
2393 {
2394 rtx_insn *insn;
2395 FOR_BB_INSNS (bb, insn)
2396 if (scalar_to_vector_candidate_p (insn))
2397 {
2398 if (dump_file)
2399 fprintf (dump_file, " insn %d is marked as a candidate\n",
2400 INSN_UID (insn));
2401
2402 bitmap_set_bit (candidates, INSN_UID (insn));
2403 }
2404 }
2405
2406 remove_non_convertible_regs (candidates);
2407
2408 if (bitmap_empty_p (candidates))
2409 if (dump_file)
2410 fprintf (dump_file, "There are no candidates for optimization.\n");
2411
2412 while (!bitmap_empty_p (candidates))
2413 {
2414 unsigned uid = bitmap_first_set_bit (candidates);
2415 scalar_chain *chain;
2416
2417 if (TARGET_64BIT)
2418 chain = new timode_scalar_chain;
2419 else
2420 chain = new dimode_scalar_chain;
2421
2422 /* Find instructions chain we want to convert to vector mode.
2423 Check all uses and definitions to estimate all required
2424 conversions. */
2425 chain->build (candidates, uid);
2426
2427 if (chain->compute_convert_gain () > 0)
2428 converted_insns += chain->convert ();
2429 else
2430 if (dump_file)
2431 fprintf (dump_file, "Chain #%d conversion is not profitable\n",
2432 chain->chain_id);
2433
2434 delete chain;
2435 }
2436
2437 if (dump_file)
2438 fprintf (dump_file, "Total insns converted: %d\n", converted_insns);
2439
2440 BITMAP_FREE (candidates);
2441 bitmap_obstack_release (NULL);
2442 df_process_deferred_rescans ();
2443
2444 /* Conversion means we may have 128bit register spills/fills
2445 which require aligned stack. */
2446 if (converted_insns)
2447 {
2448 if (crtl->stack_alignment_needed < 128)
2449 crtl->stack_alignment_needed = 128;
2450 if (crtl->stack_alignment_estimated < 128)
2451 crtl->stack_alignment_estimated = 128;
2452 /* Fix up DECL_RTL/DECL_INCOMING_RTL of arguments. */
2453 if (TARGET_64BIT)
2454 for (tree parm = DECL_ARGUMENTS (current_function_decl);
2455 parm; parm = DECL_CHAIN (parm))
2456 {
2457 if (TYPE_MODE (TREE_TYPE (parm)) != TImode)
2458 continue;
2459 if (DECL_RTL_SET_P (parm)
2460 && GET_MODE (DECL_RTL (parm)) == V1TImode)
2461 {
2462 rtx r = DECL_RTL (parm);
2463 if (REG_P (r))
2464 SET_DECL_RTL (parm, gen_rtx_SUBREG (TImode, r, 0));
2465 }
2466 if (DECL_INCOMING_RTL (parm)
2467 && GET_MODE (DECL_INCOMING_RTL (parm)) == V1TImode)
2468 {
2469 rtx r = DECL_INCOMING_RTL (parm);
2470 if (REG_P (r))
2471 DECL_INCOMING_RTL (parm) = gen_rtx_SUBREG (TImode, r, 0);
2472 }
2473 }
2474 }
2475
2476 return 0;
2477 }
2478
2479 namespace {
2480
2481 const pass_data pass_data_insert_vzeroupper =
2482 {
2483 RTL_PASS, /* type */
2484 "vzeroupper", /* name */
2485 OPTGROUP_NONE, /* optinfo_flags */
2486 TV_MACH_DEP, /* tv_id */
2487 0, /* properties_required */
2488 0, /* properties_provided */
2489 0, /* properties_destroyed */
2490 0, /* todo_flags_start */
2491 TODO_df_finish, /* todo_flags_finish */
2492 };
2493
2494 class pass_insert_vzeroupper : public rtl_opt_pass
2495 {
2496 public:
2497 pass_insert_vzeroupper(gcc::context *ctxt)
2498 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2499 {}
2500
2501 /* opt_pass methods: */
2502 virtual bool gate (function *)
2503 {
2504 return TARGET_AVX
2505 && TARGET_VZEROUPPER && flag_expensive_optimizations
2506 && !optimize_size;
2507 }
2508
2509 virtual unsigned int execute (function *)
2510 {
2511 return rest_of_handle_insert_vzeroupper ();
2512 }
2513
2514 }; // class pass_insert_vzeroupper
2515
2516 const pass_data pass_data_stv =
2517 {
2518 RTL_PASS, /* type */
2519 "stv", /* name */
2520 OPTGROUP_NONE, /* optinfo_flags */
2521 TV_MACH_DEP, /* tv_id */
2522 0, /* properties_required */
2523 0, /* properties_provided */
2524 0, /* properties_destroyed */
2525 0, /* todo_flags_start */
2526 TODO_df_finish, /* todo_flags_finish */
2527 };
2528
2529 class pass_stv : public rtl_opt_pass
2530 {
2531 public:
2532 pass_stv (gcc::context *ctxt)
2533 : rtl_opt_pass (pass_data_stv, ctxt),
2534 timode_p (false)
2535 {}
2536
2537 /* opt_pass methods: */
2538 virtual bool gate (function *)
2539 {
2540 return (timode_p == !!TARGET_64BIT
2541 && TARGET_STV && TARGET_SSE2 && optimize > 1);
2542 }
2543
2544 virtual unsigned int execute (function *)
2545 {
2546 return convert_scalars_to_vector ();
2547 }
2548
2549 opt_pass *clone ()
2550 {
2551 return new pass_stv (m_ctxt);
2552 }
2553
2554 void set_pass_param (unsigned int n, bool param)
2555 {
2556 gcc_assert (n == 0);
2557 timode_p = param;
2558 }
2559
2560 private:
2561 bool timode_p;
2562 }; // class pass_stv
2563
2564 } // anon namespace
2565
2566 rtl_opt_pass *
2567 make_pass_insert_vzeroupper (gcc::context *ctxt)
2568 {
2569 return new pass_insert_vzeroupper (ctxt);
2570 }
2571
2572 rtl_opt_pass *
2573 make_pass_stv (gcc::context *ctxt)
2574 {
2575 return new pass_stv (ctxt);
2576 }
2577
2578 /* Inserting ENDBRANCH instructions. */
2579
2580 static unsigned int
2581 rest_of_insert_endbranch (void)
2582 {
2583 timevar_push (TV_MACH_DEP);
2584
2585 rtx cet_eb;
2586 rtx_insn *insn;
2587 basic_block bb;
2588
2589 /* Currently emit EB if it's a tracking function, i.e. 'nocf_check' is
2590 absent among function attributes. Later an optimization will be
2591 introduced to make analysis if an address of a static function is
2592 taken. A static function whose address is not taken will get a
2593 nocf_check attribute. This will allow to reduce the number of EB. */
2594
2595 if (!lookup_attribute ("nocf_check",
2596 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
2597 && !cgraph_node::get (cfun->decl)->only_called_directly_p ())
2598 {
2599 cet_eb = gen_nop_endbr ();
2600
2601 bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
2602 insn = BB_HEAD (bb);
2603 emit_insn_before (cet_eb, insn);
2604 }
2605
2606 bb = 0;
2607 FOR_EACH_BB_FN (bb, cfun)
2608 {
2609 for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
2610 insn = NEXT_INSN (insn))
2611 {
2612 if (INSN_P (insn) && GET_CODE (insn) == CALL_INSN)
2613 {
2614 if (find_reg_note (insn, REG_SETJMP, NULL) == NULL)
2615 continue;
2616 /* Generate ENDBRANCH after CALL, which can return more than
2617 twice, setjmp-like functions. */
2618
2619 /* Skip notes and debug insns that must be next to the
2620 call insn. ??? This might skip a lot more than
2621 that... ??? Skipping barriers and emitting code
2622 after them surely looks like a mistake; we probably
2623 won't ever hit it, for we'll hit BB_END first. */
2624 rtx_insn *next_insn = insn;
2625 while ((next_insn != BB_END (bb))
2626 && (DEBUG_INSN_P (NEXT_INSN (next_insn))
2627 || NOTE_P (NEXT_INSN (next_insn))
2628 || BARRIER_P (NEXT_INSN (next_insn))))
2629 next_insn = NEXT_INSN (next_insn);
2630
2631 cet_eb = gen_nop_endbr ();
2632 emit_insn_after_setloc (cet_eb, next_insn, INSN_LOCATION (insn));
2633 continue;
2634 }
2635
2636 if (INSN_P (insn) && JUMP_P (insn) && flag_cet_switch)
2637 {
2638 rtx target = JUMP_LABEL (insn);
2639 if (target == NULL_RTX || ANY_RETURN_P (target))
2640 continue;
2641
2642 /* Check the jump is a switch table. */
2643 rtx_insn *label = as_a<rtx_insn *> (target);
2644 rtx_insn *table = next_insn (label);
2645 if (table == NULL_RTX || !JUMP_TABLE_DATA_P (table))
2646 continue;
2647
2648 /* For the indirect jump find out all places it jumps and insert
2649 ENDBRANCH there. It should be done under a special flag to
2650 control ENDBRANCH generation for switch stmts. */
2651 edge_iterator ei;
2652 edge e;
2653 basic_block dest_blk;
2654
2655 FOR_EACH_EDGE (e, ei, bb->succs)
2656 {
2657 rtx_insn *insn;
2658
2659 dest_blk = e->dest;
2660 insn = BB_HEAD (dest_blk);
2661 gcc_assert (LABEL_P (insn));
2662 cet_eb = gen_nop_endbr ();
2663 emit_insn_after (cet_eb, insn);
2664 }
2665 continue;
2666 }
2667
2668 if ((LABEL_P (insn) && LABEL_PRESERVE_P (insn))
2669 || (NOTE_P (insn)
2670 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
2671 /* TODO. Check /s bit also. */
2672 {
2673 cet_eb = gen_nop_endbr ();
2674 emit_insn_after (cet_eb, insn);
2675 continue;
2676 }
2677 }
2678 }
2679
2680 timevar_pop (TV_MACH_DEP);
2681 return 0;
2682 }
2683
2684 namespace {
2685
2686 const pass_data pass_data_insert_endbranch =
2687 {
2688 RTL_PASS, /* type. */
2689 "cet", /* name. */
2690 OPTGROUP_NONE, /* optinfo_flags. */
2691 TV_MACH_DEP, /* tv_id. */
2692 0, /* properties_required. */
2693 0, /* properties_provided. */
2694 0, /* properties_destroyed. */
2695 0, /* todo_flags_start. */
2696 0, /* todo_flags_finish. */
2697 };
2698
2699 class pass_insert_endbranch : public rtl_opt_pass
2700 {
2701 public:
2702 pass_insert_endbranch (gcc::context *ctxt)
2703 : rtl_opt_pass (pass_data_insert_endbranch, ctxt)
2704 {}
2705
2706 /* opt_pass methods: */
2707 virtual bool gate (function *)
2708 {
2709 return ((flag_cf_protection & CF_BRANCH) && TARGET_IBT);
2710 }
2711
2712 virtual unsigned int execute (function *)
2713 {
2714 return rest_of_insert_endbranch ();
2715 }
2716
2717 }; // class pass_insert_endbranch
2718
2719 } // anon namespace
2720
2721 rtl_opt_pass *
2722 make_pass_insert_endbranch (gcc::context *ctxt)
2723 {
2724 return new pass_insert_endbranch (ctxt);
2725 }
2726
2727 /* Return true if a red-zone is in use. */
2728
2729 bool
2730 ix86_using_red_zone (void)
2731 {
2732 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2733 }
2734 \f
2735 /* Return a string that documents the current -m options. The caller is
2736 responsible for freeing the string. */
2737
2738 static char *
2739 ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2,
2740 int flags, int flags2,
2741 const char *arch, const char *tune,
2742 enum fpmath_unit fpmath, bool add_nl_p)
2743 {
2744 struct ix86_target_opts
2745 {
2746 const char *option; /* option string */
2747 HOST_WIDE_INT mask; /* isa mask options */
2748 };
2749
2750 /* This table is ordered so that options like -msse4.2 that imply other
2751 ISAs come first. Target string will be displayed in the same order. */
2752 static struct ix86_target_opts isa2_opts[] =
2753 {
2754 { "-mcx16", OPTION_MASK_ISA_CX16 },
2755 { "-mmpx", OPTION_MASK_ISA_MPX },
2756 { "-mavx512vbmi2", OPTION_MASK_ISA_AVX512VBMI2 },
2757 { "-mavx512vnni", OPTION_MASK_ISA_AVX512VNNI },
2758 { "-mvaes", OPTION_MASK_ISA_VAES },
2759 { "-mrdpid", OPTION_MASK_ISA_RDPID },
2760 { "-msgx", OPTION_MASK_ISA_SGX },
2761 { "-mavx5124vnniw", OPTION_MASK_ISA_AVX5124VNNIW },
2762 { "-mavx5124fmaps", OPTION_MASK_ISA_AVX5124FMAPS },
2763 { "-mavx512vpopcntdq", OPTION_MASK_ISA_AVX512VPOPCNTDQ },
2764 { "-mibt", OPTION_MASK_ISA_IBT },
2765 { "-mshstk", OPTION_MASK_ISA_SHSTK }
2766 };
2767 static struct ix86_target_opts isa_opts[] =
2768 {
2769 { "-mvpclmulqdq", OPTION_MASK_ISA_VPCLMULQDQ },
2770 { "-mgfni", OPTION_MASK_ISA_GFNI },
2771 { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI },
2772 { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA },
2773 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2774 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2775 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2776 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2777 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2778 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2779 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2780 { "-mavx2", OPTION_MASK_ISA_AVX2 },
2781 { "-mfma", OPTION_MASK_ISA_FMA },
2782 { "-mxop", OPTION_MASK_ISA_XOP },
2783 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2784 { "-mf16c", OPTION_MASK_ISA_F16C },
2785 { "-mavx", OPTION_MASK_ISA_AVX },
2786 /* { "-msse4" OPTION_MASK_ISA_SSE4 }, */
2787 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2788 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2789 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2790 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2791 { "-msse3", OPTION_MASK_ISA_SSE3 },
2792 { "-maes", OPTION_MASK_ISA_AES },
2793 { "-msha", OPTION_MASK_ISA_SHA },
2794 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2795 { "-msse2", OPTION_MASK_ISA_SSE2 },
2796 { "-msse", OPTION_MASK_ISA_SSE },
2797 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2798 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2799 { "-mmmx", OPTION_MASK_ISA_MMX },
2800 { "-mrtm", OPTION_MASK_ISA_RTM },
2801 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2802 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2803 { "-madx", OPTION_MASK_ISA_ADX },
2804 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2805 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2806 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2807 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2808 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2809 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2810 { "-mabm", OPTION_MASK_ISA_ABM },
2811 { "-mbmi", OPTION_MASK_ISA_BMI },
2812 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2813 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2814 { "-mtbm", OPTION_MASK_ISA_TBM },
2815 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2816 { "-msahf", OPTION_MASK_ISA_SAHF },
2817 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2818 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2819 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2820 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2821 { "-mmwaitx", OPTION_MASK_ISA_MWAITX },
2822 { "-mclzero", OPTION_MASK_ISA_CLZERO },
2823 { "-mpku", OPTION_MASK_ISA_PKU },
2824 { "-mlwp", OPTION_MASK_ISA_LWP },
2825 { "-mhle", OPTION_MASK_ISA_HLE },
2826 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2827 { "-mclwb", OPTION_MASK_ISA_CLWB }
2828 };
2829
2830 /* Flag options. */
2831 static struct ix86_target_opts flag_opts[] =
2832 {
2833 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2834 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2835 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2836 { "-m80387", MASK_80387 },
2837 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2838 { "-malign-double", MASK_ALIGN_DOUBLE },
2839 { "-mcld", MASK_CLD },
2840 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2841 { "-mieee-fp", MASK_IEEE_FP },
2842 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2843 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2844 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2845 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2846 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2847 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2848 { "-mno-red-zone", MASK_NO_RED_ZONE },
2849 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2850 { "-mrecip", MASK_RECIP },
2851 { "-mrtd", MASK_RTD },
2852 { "-msseregparm", MASK_SSEREGPARM },
2853 { "-mstack-arg-probe", MASK_STACK_PROBE },
2854 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2855 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2856 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2857 { "-mvzeroupper", MASK_VZEROUPPER },
2858 { "-mstv", MASK_STV },
2859 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD },
2860 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE },
2861 { "-mcall-ms2sysv-xlogues", MASK_CALL_MS2SYSV_XLOGUES }
2862 };
2863
2864 /* Additional flag options. */
2865 static struct ix86_target_opts flag2_opts[] =
2866 {
2867 { "-mgeneral-regs-only", OPTION_MASK_GENERAL_REGS_ONLY }
2868 };
2869
2870 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (isa2_opts)
2871 + ARRAY_SIZE (flag_opts) + ARRAY_SIZE (flag2_opts) + 6][2];
2872
2873 char isa_other[40];
2874 char isa2_other[40];
2875 char flags_other[40];
2876 char flags2_other[40];
2877 unsigned num = 0;
2878 unsigned i, j;
2879 char *ret;
2880 char *ptr;
2881 size_t len;
2882 size_t line_len;
2883 size_t sep_len;
2884 const char *abi;
2885
2886 memset (opts, '\0', sizeof (opts));
2887
2888 /* Add -march= option. */
2889 if (arch)
2890 {
2891 opts[num][0] = "-march=";
2892 opts[num++][1] = arch;
2893 }
2894
2895 /* Add -mtune= option. */
2896 if (tune)
2897 {
2898 opts[num][0] = "-mtune=";
2899 opts[num++][1] = tune;
2900 }
2901
2902 /* Add -m32/-m64/-mx32. */
2903 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2904 {
2905 if ((isa & OPTION_MASK_ABI_64) != 0)
2906 abi = "-m64";
2907 else
2908 abi = "-mx32";
2909 isa &= ~ (OPTION_MASK_ISA_64BIT
2910 | OPTION_MASK_ABI_64
2911 | OPTION_MASK_ABI_X32);
2912 }
2913 else
2914 abi = "-m32";
2915 opts[num++][0] = abi;
2916
2917 /* Pick out the options in isa2 options. */
2918 for (i = 0; i < ARRAY_SIZE (isa2_opts); i++)
2919 {
2920 if ((isa2 & isa2_opts[i].mask) != 0)
2921 {
2922 opts[num++][0] = isa2_opts[i].option;
2923 isa2 &= ~ isa2_opts[i].mask;
2924 }
2925 }
2926
2927 if (isa2 && add_nl_p)
2928 {
2929 opts[num++][0] = isa2_other;
2930 sprintf (isa2_other, "(other isa2: %#" HOST_WIDE_INT_PRINT "x)", isa2);
2931 }
2932
2933 /* Pick out the options in isa options. */
2934 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2935 {
2936 if ((isa & isa_opts[i].mask) != 0)
2937 {
2938 opts[num++][0] = isa_opts[i].option;
2939 isa &= ~ isa_opts[i].mask;
2940 }
2941 }
2942
2943 if (isa && add_nl_p)
2944 {
2945 opts[num++][0] = isa_other;
2946 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)", isa);
2947 }
2948
2949 /* Add flag options. */
2950 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2951 {
2952 if ((flags & flag_opts[i].mask) != 0)
2953 {
2954 opts[num++][0] = flag_opts[i].option;
2955 flags &= ~ flag_opts[i].mask;
2956 }
2957 }
2958
2959 if (flags && add_nl_p)
2960 {
2961 opts[num++][0] = flags_other;
2962 sprintf (flags_other, "(other flags: %#x)", flags);
2963 }
2964
2965 /* Add additional flag options. */
2966 for (i = 0; i < ARRAY_SIZE (flag2_opts); i++)
2967 {
2968 if ((flags2 & flag2_opts[i].mask) != 0)
2969 {
2970 opts[num++][0] = flag2_opts[i].option;
2971 flags2 &= ~ flag2_opts[i].mask;
2972 }
2973 }
2974
2975 if (flags2 && add_nl_p)
2976 {
2977 opts[num++][0] = flags2_other;
2978 sprintf (flags2_other, "(other flags2: %#x)", flags2);
2979 }
2980
2981 /* Add -fpmath= option. */
2982 if (fpmath)
2983 {
2984 opts[num][0] = "-mfpmath=";
2985 switch ((int) fpmath)
2986 {
2987 case FPMATH_387:
2988 opts[num++][1] = "387";
2989 break;
2990
2991 case FPMATH_SSE:
2992 opts[num++][1] = "sse";
2993 break;
2994
2995 case FPMATH_387 | FPMATH_SSE:
2996 opts[num++][1] = "sse+387";
2997 break;
2998
2999 default:
3000 gcc_unreachable ();
3001 }
3002 }
3003
3004 /* Any options? */
3005 if (num == 0)
3006 return NULL;
3007
3008 gcc_assert (num < ARRAY_SIZE (opts));
3009
3010 /* Size the string. */
3011 len = 0;
3012 sep_len = (add_nl_p) ? 3 : 1;
3013 for (i = 0; i < num; i++)
3014 {
3015 len += sep_len;
3016 for (j = 0; j < 2; j++)
3017 if (opts[i][j])
3018 len += strlen (opts[i][j]);
3019 }
3020
3021 /* Build the string. */
3022 ret = ptr = (char *) xmalloc (len);
3023 line_len = 0;
3024
3025 for (i = 0; i < num; i++)
3026 {
3027 size_t len2[2];
3028
3029 for (j = 0; j < 2; j++)
3030 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
3031
3032 if (i != 0)
3033 {
3034 *ptr++ = ' ';
3035 line_len++;
3036
3037 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
3038 {
3039 *ptr++ = '\\';
3040 *ptr++ = '\n';
3041 line_len = 0;
3042 }
3043 }
3044
3045 for (j = 0; j < 2; j++)
3046 if (opts[i][j])
3047 {
3048 memcpy (ptr, opts[i][j], len2[j]);
3049 ptr += len2[j];
3050 line_len += len2[j];
3051 }
3052 }
3053
3054 *ptr = '\0';
3055 gcc_assert (ret + len >= ptr);
3056
3057 return ret;
3058 }
3059
3060 /* Return true, if profiling code should be emitted before
3061 prologue. Otherwise it returns false.
3062 Note: For x86 with "hotfix" it is sorried. */
3063 static bool
3064 ix86_profile_before_prologue (void)
3065 {
3066 return flag_fentry != 0;
3067 }
3068
3069 /* Function that is callable from the debugger to print the current
3070 options. */
3071 void ATTRIBUTE_UNUSED
3072 ix86_debug_options (void)
3073 {
3074 char *opts = ix86_target_string (ix86_isa_flags, ix86_isa_flags2,
3075 target_flags, ix86_target_flags,
3076 ix86_arch_string,ix86_tune_string,
3077 ix86_fpmath, true);
3078
3079 if (opts)
3080 {
3081 fprintf (stderr, "%s\n\n", opts);
3082 free (opts);
3083 }
3084 else
3085 fputs ("<no options>\n\n", stderr);
3086
3087 return;
3088 }
3089
3090 /* Return true if T is one of the bytes we should avoid with
3091 -fmitigate-rop. */
3092
3093 static bool
3094 ix86_rop_should_change_byte_p (int t)
3095 {
3096 return t == 0xc2 || t == 0xc3 || t == 0xca || t == 0xcb;
3097 }
3098
3099 static const char *stringop_alg_names[] = {
3100 #define DEF_ENUM
3101 #define DEF_ALG(alg, name) #name,
3102 #include "stringop.def"
3103 #undef DEF_ENUM
3104 #undef DEF_ALG
3105 };
3106
3107 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
3108 The string is of the following form (or comma separated list of it):
3109
3110 strategy_alg:max_size:[align|noalign]
3111
3112 where the full size range for the strategy is either [0, max_size] or
3113 [min_size, max_size], in which min_size is the max_size + 1 of the
3114 preceding range. The last size range must have max_size == -1.
3115
3116 Examples:
3117
3118 1.
3119 -mmemcpy-strategy=libcall:-1:noalign
3120
3121 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
3122
3123
3124 2.
3125 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
3126
3127 This is to tell the compiler to use the following strategy for memset
3128 1) when the expected size is between [1, 16], use rep_8byte strategy;
3129 2) when the size is between [17, 2048], use vector_loop;
3130 3) when the size is > 2048, use libcall. */
3131
3132 struct stringop_size_range
3133 {
3134 int max;
3135 stringop_alg alg;
3136 bool noalign;
3137 };
3138
3139 static void
3140 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
3141 {
3142 const struct stringop_algs *default_algs;
3143 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
3144 char *curr_range_str, *next_range_str;
3145 const char *opt = is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=";
3146 int i = 0, n = 0;
3147
3148 if (is_memset)
3149 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
3150 else
3151 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
3152
3153 curr_range_str = strategy_str;
3154
3155 do
3156 {
3157 int maxs;
3158 char alg_name[128];
3159 char align[16];
3160 next_range_str = strchr (curr_range_str, ',');
3161 if (next_range_str)
3162 *next_range_str++ = '\0';
3163
3164 if (sscanf (curr_range_str, "%20[^:]:%d:%10s", alg_name, &maxs,
3165 align) != 3)
3166 {
3167 error ("wrong argument %qs to option %qs", curr_range_str, opt);
3168 return;
3169 }
3170
3171 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
3172 {
3173 error ("size ranges of option %qs should be increasing", opt);
3174 return;
3175 }
3176
3177 for (i = 0; i < last_alg; i++)
3178 if (!strcmp (alg_name, stringop_alg_names[i]))
3179 break;
3180
3181 if (i == last_alg)
3182 {
3183 error ("wrong strategy name %qs specified for option %qs",
3184 alg_name, opt);
3185
3186 auto_vec <const char *> candidates;
3187 for (i = 0; i < last_alg; i++)
3188 if ((stringop_alg) i != rep_prefix_8_byte || TARGET_64BIT)
3189 candidates.safe_push (stringop_alg_names[i]);
3190
3191 char *s;
3192 const char *hint
3193 = candidates_list_and_hint (alg_name, s, candidates);
3194 if (hint)
3195 inform (input_location,
3196 "valid arguments to %qs are: %s; did you mean %qs?",
3197 opt, s, hint);
3198 else
3199 inform (input_location, "valid arguments to %qs are: %s",
3200 opt, s);
3201 XDELETEVEC (s);
3202 return;
3203 }
3204
3205 if ((stringop_alg) i == rep_prefix_8_byte
3206 && !TARGET_64BIT)
3207 {
3208 /* rep; movq isn't available in 32-bit code. */
3209 error ("strategy name %qs specified for option %qs "
3210 "not supported for 32-bit code", alg_name, opt);
3211 return;
3212 }
3213
3214 input_ranges[n].max = maxs;
3215 input_ranges[n].alg = (stringop_alg) i;
3216 if (!strcmp (align, "align"))
3217 input_ranges[n].noalign = false;
3218 else if (!strcmp (align, "noalign"))
3219 input_ranges[n].noalign = true;
3220 else
3221 {
3222 error ("unknown alignment %qs specified for option %qs", align, opt);
3223 return;
3224 }
3225 n++;
3226 curr_range_str = next_range_str;
3227 }
3228 while (curr_range_str);
3229
3230 if (input_ranges[n - 1].max != -1)
3231 {
3232 error ("the max value for the last size range should be -1"
3233 " for option %qs", opt);
3234 return;
3235 }
3236
3237 if (n > MAX_STRINGOP_ALGS)
3238 {
3239 error ("too many size ranges specified in option %qs", opt);
3240 return;
3241 }
3242
3243 /* Now override the default algs array. */
3244 for (i = 0; i < n; i++)
3245 {
3246 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
3247 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
3248 = input_ranges[i].alg;
3249 *const_cast<int *>(&default_algs->size[i].noalign)
3250 = input_ranges[i].noalign;
3251 }
3252 }
3253
3254 \f
3255 /* parse -mtune-ctrl= option. When DUMP is true,
3256 print the features that are explicitly set. */
3257
3258 static void
3259 parse_mtune_ctrl_str (bool dump)
3260 {
3261 if (!ix86_tune_ctrl_string)
3262 return;
3263
3264 char *next_feature_string = NULL;
3265 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3266 char *orig = curr_feature_string;
3267 int i;
3268 do
3269 {
3270 bool clear = false;
3271
3272 next_feature_string = strchr (curr_feature_string, ',');
3273 if (next_feature_string)
3274 *next_feature_string++ = '\0';
3275 if (*curr_feature_string == '^')
3276 {
3277 curr_feature_string++;
3278 clear = true;
3279 }
3280 for (i = 0; i < X86_TUNE_LAST; i++)
3281 {
3282 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3283 {
3284 ix86_tune_features[i] = !clear;
3285 if (dump)
3286 fprintf (stderr, "Explicitly %s feature %s\n",
3287 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3288 break;
3289 }
3290 }
3291 if (i == X86_TUNE_LAST)
3292 error ("unknown parameter to option -mtune-ctrl: %s",
3293 clear ? curr_feature_string - 1 : curr_feature_string);
3294 curr_feature_string = next_feature_string;
3295 }
3296 while (curr_feature_string);
3297 free (orig);
3298 }
3299
3300 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3301 processor type. */
3302
3303 static void
3304 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3305 {
3306 unsigned int ix86_tune_mask = 1u << ix86_tune;
3307 int i;
3308
3309 for (i = 0; i < X86_TUNE_LAST; ++i)
3310 {
3311 if (ix86_tune_no_default)
3312 ix86_tune_features[i] = 0;
3313 else
3314 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3315 }
3316
3317 if (dump)
3318 {
3319 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3320 for (i = 0; i < X86_TUNE_LAST; i++)
3321 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3322 ix86_tune_features[i] ? "on" : "off");
3323 }
3324
3325 parse_mtune_ctrl_str (dump);
3326 }
3327
3328
3329 /* Default align_* from the processor table. */
3330
3331 static void
3332 ix86_default_align (struct gcc_options *opts)
3333 {
3334 if (opts->x_align_loops == 0)
3335 {
3336 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3337 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3338 }
3339 if (opts->x_align_jumps == 0)
3340 {
3341 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3342 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3343 }
3344 if (opts->x_align_functions == 0)
3345 {
3346 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3347 }
3348 }
3349
3350 /* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook. */
3351
3352 static void
3353 ix86_override_options_after_change (void)
3354 {
3355 ix86_default_align (&global_options);
3356 }
3357
3358 /* Override various settings based on options. If MAIN_ARGS_P, the
3359 options are from the command line, otherwise they are from
3360 attributes. Return true if there's an error related to march
3361 option. */
3362
3363 static bool
3364 ix86_option_override_internal (bool main_args_p,
3365 struct gcc_options *opts,
3366 struct gcc_options *opts_set)
3367 {
3368 int i;
3369 unsigned int ix86_arch_mask;
3370 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3371
3372 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3373 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3374 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3375 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3376 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3377 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3378 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3379 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3380 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3381 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3382 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3383 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3384 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3385 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3386 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3387 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3388 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3389 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3390 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3391 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3392 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3393 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3394 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3395 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3396 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3397 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3398 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3399 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3400 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3401 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3402 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3403 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3404 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3405 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3406 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3407 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3408 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3409 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3410 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3411 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3412 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3413 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3414 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3415 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3416 #define PTA_MPX (HOST_WIDE_INT_1 << 44)
3417 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3418 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3419 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3420 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3421 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3422 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3423 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
3424 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
3425 #define PTA_AVX512IFMA (HOST_WIDE_INT_1 << 53)
3426 #define PTA_AVX512VBMI (HOST_WIDE_INT_1 << 54)
3427 #define PTA_CLWB (HOST_WIDE_INT_1 << 55)
3428 #define PTA_MWAITX (HOST_WIDE_INT_1 << 56)
3429 #define PTA_CLZERO (HOST_WIDE_INT_1 << 57)
3430 #define PTA_NO_80387 (HOST_WIDE_INT_1 << 58)
3431 #define PTA_PKU (HOST_WIDE_INT_1 << 59)
3432 #define PTA_AVX5124VNNIW (HOST_WIDE_INT_1 << 60)
3433 #define PTA_AVX5124FMAPS (HOST_WIDE_INT_1 << 61)
3434 #define PTA_AVX512VPOPCNTDQ (HOST_WIDE_INT_1 << 62)
3435 #define PTA_SGX (HOST_WIDE_INT_1 << 63)
3436
3437 #define PTA_CORE2 \
3438 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3439 | PTA_CX16 | PTA_FXSR)
3440 #define PTA_NEHALEM \
3441 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3442 #define PTA_WESTMERE \
3443 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3444 #define PTA_SANDYBRIDGE \
3445 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3446 #define PTA_IVYBRIDGE \
3447 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3448 #define PTA_HASWELL \
3449 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3450 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3451 #define PTA_BROADWELL \
3452 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3453 #define PTA_SKYLAKE \
3454 (PTA_BROADWELL | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES)
3455 #define PTA_SKYLAKE_AVX512 \
3456 (PTA_SKYLAKE | PTA_AVX512F | PTA_AVX512CD | PTA_AVX512VL \
3457 | PTA_AVX512BW | PTA_AVX512DQ | PTA_PKU | PTA_CLWB)
3458 #define PTA_CANNONLAKE \
3459 (PTA_SKYLAKE_AVX512 | PTA_AVX512VBMI | PTA_AVX512IFMA | PTA_SHA)
3460 #define PTA_KNL \
3461 (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD)
3462 #define PTA_BONNELL \
3463 (PTA_CORE2 | PTA_MOVBE)
3464 #define PTA_SILVERMONT \
3465 (PTA_WESTMERE | PTA_MOVBE)
3466 #define PTA_KNM \
3467 (PTA_KNL | PTA_AVX5124VNNIW | PTA_AVX5124FMAPS | PTA_AVX512VPOPCNTDQ)
3468
3469 /* if this reaches 64, need to widen struct pta flags below */
3470
3471 static struct pta
3472 {
3473 const char *const name; /* processor name or nickname. */
3474 const enum processor_type processor;
3475 const enum attr_cpu schedule;
3476 const unsigned HOST_WIDE_INT flags;
3477 }
3478 const processor_alias_table[] =
3479 {
3480 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3481 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3482 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3483 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3484 {"lakemont", PROCESSOR_LAKEMONT, CPU_PENTIUM, PTA_NO_80387},
3485 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3486 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3487 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3488 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3489 {"samuel-2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3490 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3491 PTA_MMX | PTA_SSE | PTA_FXSR},
3492 {"nehemiah", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3493 PTA_MMX | PTA_SSE | PTA_FXSR},
3494 {"c7", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3495 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3496 {"esther", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3497 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3498 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3499 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3500 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3501 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3502 PTA_MMX | PTA_SSE | PTA_FXSR},
3503 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3504 PTA_MMX | PTA_SSE | PTA_FXSR},
3505 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3506 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3507 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3508 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3509 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3510 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3511 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3512 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3513 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3514 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3515 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3516 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3517 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3518 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3519 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3520 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3521 PTA_SANDYBRIDGE},
3522 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3523 PTA_SANDYBRIDGE},
3524 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3525 PTA_IVYBRIDGE},
3526 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3527 PTA_IVYBRIDGE},
3528 {"haswell", PROCESSOR_HASWELL, CPU_HASWELL, PTA_HASWELL},
3529 {"core-avx2", PROCESSOR_HASWELL, CPU_HASWELL, PTA_HASWELL},
3530 {"broadwell", PROCESSOR_HASWELL, CPU_HASWELL, PTA_BROADWELL},
3531 {"skylake", PROCESSOR_HASWELL, CPU_HASWELL, PTA_SKYLAKE},
3532 {"skylake-avx512", PROCESSOR_SKYLAKE_AVX512, CPU_HASWELL,
3533 PTA_SKYLAKE_AVX512},
3534 {"cannonlake", PROCESSOR_HASWELL, CPU_HASWELL, PTA_CANNONLAKE},
3535 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3536 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3537 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3538 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3539 {"knl", PROCESSOR_KNL, CPU_SLM, PTA_KNL},
3540 {"knm", PROCESSOR_KNM, CPU_SLM, PTA_KNM},
3541 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3542 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3543 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3544 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3545 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3546 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3547 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3548 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3549 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3550 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3551 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3552 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_FXSR},
3553 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3554 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_FXSR},
3555 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3556 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_FXSR},
3557 {"x86-64", PROCESSOR_K8, CPU_K8,
3558 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3559 {"eden-x2", PROCESSOR_K8, CPU_K8,
3560 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3561 {"nano", PROCESSOR_K8, CPU_K8,
3562 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3563 | PTA_SSSE3 | PTA_FXSR},
3564 {"nano-1000", PROCESSOR_K8, CPU_K8,
3565 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3566 | PTA_SSSE3 | PTA_FXSR},
3567 {"nano-2000", PROCESSOR_K8, CPU_K8,
3568 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3569 | PTA_SSSE3 | PTA_FXSR},
3570 {"nano-3000", PROCESSOR_K8, CPU_K8,
3571 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3572 | PTA_SSSE3 | PTA_SSE4_1 | PTA_FXSR},
3573 {"nano-x2", PROCESSOR_K8, CPU_K8,
3574 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3575 | PTA_SSSE3 | PTA_SSE4_1 | PTA_FXSR},
3576 {"eden-x4", PROCESSOR_K8, CPU_K8,
3577 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3578 | PTA_SSSE3 | PTA_SSE4_1 | PTA_FXSR},
3579 {"nano-x4", PROCESSOR_K8, CPU_K8,
3580 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3581 | PTA_SSSE3 | PTA_SSE4_1 | PTA_FXSR},
3582 {"k8", PROCESSOR_K8, CPU_K8,
3583 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3584 | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3585 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3586 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3587 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_FXSR},
3588 {"opteron", PROCESSOR_K8, CPU_K8,
3589 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3590 | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3591 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3592 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3593 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_FXSR},
3594 {"athlon64", PROCESSOR_K8, CPU_K8,
3595 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3596 | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3597 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3598 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3599 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_FXSR},
3600 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3601 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3602 | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3603 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3604 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3605 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3606 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3607 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3608 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3609 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3610 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3611 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3612 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3613 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3614 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3615 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3616 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3617 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3618 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3619 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3620 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3621 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3622 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3623 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3624 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3625 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3626 | PTA_XSAVEOPT | PTA_FSGSBASE},
3627 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3628 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3629 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3630 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3631 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3632 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3633 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3634 | PTA_MOVBE | PTA_MWAITX},
3635 {"znver1", PROCESSOR_ZNVER1, CPU_ZNVER1,
3636 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3637 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3638 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3639 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_PRFCHW
3640 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE
3641 | PTA_RDRND | PTA_MOVBE | PTA_MWAITX | PTA_ADX | PTA_RDSEED
3642 | PTA_CLZERO | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES
3643 | PTA_SHA | PTA_LZCNT | PTA_POPCNT},
3644 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3645 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3646 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3647 | PTA_FXSR | PTA_XSAVE},
3648 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3649 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3650 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3651 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3652 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3653 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3654
3655 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3656 PTA_64BIT
3657 | PTA_HLE /* flags are only used for -march switch. */ },
3658 };
3659
3660 /* -mrecip options. */
3661 static struct
3662 {
3663 const char *string; /* option name */
3664 unsigned int mask; /* mask bits to set */
3665 }
3666 const recip_options[] =
3667 {
3668 { "all", RECIP_MASK_ALL },
3669 { "none", RECIP_MASK_NONE },
3670 { "div", RECIP_MASK_DIV },
3671 { "sqrt", RECIP_MASK_SQRT },
3672 { "vec-div", RECIP_MASK_VEC_DIV },
3673 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3674 };
3675
3676 int const pta_size = ARRAY_SIZE (processor_alias_table);
3677
3678 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3679 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3680 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3681 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3682 #ifdef TARGET_BI_ARCH
3683 else
3684 {
3685 #if TARGET_BI_ARCH == 1
3686 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3687 is on and OPTION_MASK_ABI_X32 is off. We turn off
3688 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3689 -mx32. */
3690 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3691 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3692 #else
3693 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3694 on and OPTION_MASK_ABI_64 is off. We turn off
3695 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3696 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
3697 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3698 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3699 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3700 #endif
3701 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3702 && TARGET_IAMCU_P (opts->x_target_flags))
3703 sorry ("Intel MCU psABI isn%'t supported in %s mode",
3704 TARGET_X32_P (opts->x_ix86_isa_flags) ? "x32" : "64-bit");
3705 }
3706 #endif
3707
3708 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3709 {
3710 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3711 OPTION_MASK_ABI_64 for TARGET_X32. */
3712 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3713 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3714 }
3715 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3716 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3717 | OPTION_MASK_ABI_X32
3718 | OPTION_MASK_ABI_64);
3719 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3720 {
3721 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3722 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3723 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3724 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3725 }
3726
3727 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3728 SUBTARGET_OVERRIDE_OPTIONS;
3729 #endif
3730
3731 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3732 SUBSUBTARGET_OVERRIDE_OPTIONS;
3733 #endif
3734
3735 /* -fPIC is the default for x86_64. */
3736 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3737 opts->x_flag_pic = 2;
3738
3739 /* Need to check -mtune=generic first. */
3740 if (opts->x_ix86_tune_string)
3741 {
3742 /* As special support for cross compilers we read -mtune=native
3743 as -mtune=generic. With native compilers we won't see the
3744 -mtune=native, as it was changed by the driver. */
3745 if (!strcmp (opts->x_ix86_tune_string, "native"))
3746 {
3747 opts->x_ix86_tune_string = "generic";
3748 }
3749 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3750 warning (OPT_Wdeprecated,
3751 main_args_p
3752 ? G_("%<-mtune=x86-64%> is deprecated; use %<-mtune=k8%> "
3753 "or %<-mtune=generic%> instead as appropriate")
3754 : G_("%<target(\"tune=x86-64\")%> is deprecated; use "
3755 "%<target(\"tune=k8\")%> or %<target(\"tune=generic\")%>"
3756 " instead as appropriate"));
3757 }
3758 else
3759 {
3760 if (opts->x_ix86_arch_string)
3761 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3762 if (!opts->x_ix86_tune_string)
3763 {
3764 opts->x_ix86_tune_string
3765 = processor_target_table[TARGET_CPU_DEFAULT].name;
3766 ix86_tune_defaulted = 1;
3767 }
3768
3769 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3770 or defaulted. We need to use a sensible tune option. */
3771 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3772 {
3773 opts->x_ix86_tune_string = "generic";
3774 }
3775 }
3776
3777 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3778 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3779 {
3780 /* rep; movq isn't available in 32-bit code. */
3781 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3782 opts->x_ix86_stringop_alg = no_stringop;
3783 }
3784
3785 if (!opts->x_ix86_arch_string)
3786 opts->x_ix86_arch_string
3787 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3788 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3789 else
3790 ix86_arch_specified = 1;
3791
3792 if (opts_set->x_ix86_pmode)
3793 {
3794 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3795 && opts->x_ix86_pmode == PMODE_SI)
3796 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3797 && opts->x_ix86_pmode == PMODE_DI))
3798 error ("address mode %qs not supported in the %s bit mode",
3799 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3800 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3801 }
3802 else
3803 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3804 ? PMODE_DI : PMODE_SI;
3805
3806 if (!opts_set->x_ix86_abi)
3807 opts->x_ix86_abi = DEFAULT_ABI;
3808
3809 if (opts->x_ix86_abi == MS_ABI && TARGET_X32_P (opts->x_ix86_isa_flags))
3810 error ("-mabi=ms not supported with X32 ABI");
3811 gcc_assert (opts->x_ix86_abi == SYSV_ABI || opts->x_ix86_abi == MS_ABI);
3812
3813 /* For targets using ms ABI enable ms-extensions, if not
3814 explicit turned off. For non-ms ABI we turn off this
3815 option. */
3816 if (!opts_set->x_flag_ms_extensions)
3817 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3818
3819 if (opts_set->x_ix86_cmodel)
3820 {
3821 switch (opts->x_ix86_cmodel)
3822 {
3823 case CM_SMALL:
3824 case CM_SMALL_PIC:
3825 if (opts->x_flag_pic)
3826 opts->x_ix86_cmodel = CM_SMALL_PIC;
3827 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3828 error ("code model %qs not supported in the %s bit mode",
3829 "small", "32");
3830 break;
3831
3832 case CM_MEDIUM:
3833 case CM_MEDIUM_PIC:
3834 if (opts->x_flag_pic)
3835 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3836 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3837 error ("code model %qs not supported in the %s bit mode",
3838 "medium", "32");
3839 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3840 error ("code model %qs not supported in x32 mode",
3841 "medium");
3842 break;
3843
3844 case CM_LARGE:
3845 case CM_LARGE_PIC:
3846 if (opts->x_flag_pic)
3847 opts->x_ix86_cmodel = CM_LARGE_PIC;
3848 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3849 error ("code model %qs not supported in the %s bit mode",
3850 "large", "32");
3851 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3852 error ("code model %qs not supported in x32 mode",
3853 "large");
3854 break;
3855
3856 case CM_32:
3857 if (opts->x_flag_pic)
3858 error ("code model %s does not support PIC mode", "32");
3859 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3860 error ("code model %qs not supported in the %s bit mode",
3861 "32", "64");
3862 break;
3863
3864 case CM_KERNEL:
3865 if (opts->x_flag_pic)
3866 {
3867 error ("code model %s does not support PIC mode", "kernel");
3868 opts->x_ix86_cmodel = CM_32;
3869 }
3870 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3871 error ("code model %qs not supported in the %s bit mode",
3872 "kernel", "32");
3873 break;
3874
3875 default:
3876 gcc_unreachable ();
3877 }
3878 }
3879 else
3880 {
3881 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3882 use of rip-relative addressing. This eliminates fixups that
3883 would otherwise be needed if this object is to be placed in a
3884 DLL, and is essentially just as efficient as direct addressing. */
3885 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3886 && (TARGET_RDOS || TARGET_PECOFF))
3887 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3888 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3889 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3890 else
3891 opts->x_ix86_cmodel = CM_32;
3892 }
3893 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3894 {
3895 error ("-masm=intel not supported in this configuration");
3896 opts->x_ix86_asm_dialect = ASM_ATT;
3897 }
3898 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3899 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3900 sorry ("%i-bit mode not compiled in",
3901 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3902
3903 for (i = 0; i < pta_size; i++)
3904 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3905 {
3906 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3907 {
3908 error (main_args_p
3909 ? G_("%<generic%> CPU can be used only for %<-mtune=%> "
3910 "switch")
3911 : G_("%<generic%> CPU can be used only for "
3912 "%<target(\"tune=\")%> attribute"));
3913 return false;
3914 }
3915 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3916 {
3917 error (main_args_p
3918 ? G_("%<intel%> CPU can be used only for %<-mtune=%> "
3919 "switch")
3920 : G_("%<intel%> CPU can be used only for "
3921 "%<target(\"tune=\")%> attribute"));
3922 return false;
3923 }
3924
3925 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3926 && !(processor_alias_table[i].flags & PTA_64BIT))
3927 {
3928 error ("CPU you selected does not support x86-64 "
3929 "instruction set");
3930 return false;
3931 }
3932
3933 ix86_schedule = processor_alias_table[i].schedule;
3934 ix86_arch = processor_alias_table[i].processor;
3935 /* Default cpu tuning to the architecture. */
3936 ix86_tune = ix86_arch;
3937
3938 if (processor_alias_table[i].flags & PTA_MMX
3939 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3940 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3941 if (processor_alias_table[i].flags & PTA_3DNOW
3942 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3943 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3944 if (processor_alias_table[i].flags & PTA_3DNOW_A
3945 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3946 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3947 if (processor_alias_table[i].flags & PTA_SSE
3948 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3949 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3950 if (processor_alias_table[i].flags & PTA_SSE2
3951 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3952 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3953 if (processor_alias_table[i].flags & PTA_SSE3
3954 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3955 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3956 if (processor_alias_table[i].flags & PTA_SSSE3
3957 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3958 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3959 if (processor_alias_table[i].flags & PTA_SSE4_1
3960 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3961 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3962 if (processor_alias_table[i].flags & PTA_SSE4_2
3963 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3964 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3965 if (processor_alias_table[i].flags & PTA_AVX
3966 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3967 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3968 if (processor_alias_table[i].flags & PTA_AVX2
3969 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3970 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3971 if (processor_alias_table[i].flags & PTA_FMA
3972 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3973 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3974 if (processor_alias_table[i].flags & PTA_SSE4A
3975 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3976 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3977 if (processor_alias_table[i].flags & PTA_FMA4
3978 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3979 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3980 if (processor_alias_table[i].flags & PTA_XOP
3981 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3982 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3983 if (processor_alias_table[i].flags & PTA_LWP
3984 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3985 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3986 if (processor_alias_table[i].flags & PTA_ABM
3987 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3988 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3989 if (processor_alias_table[i].flags & PTA_BMI
3990 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3991 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3992 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3993 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3994 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3995 if (processor_alias_table[i].flags & PTA_TBM
3996 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3997 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3998 if (processor_alias_table[i].flags & PTA_BMI2
3999 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
4000 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
4001 if (processor_alias_table[i].flags & PTA_CX16
4002 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_CX16))
4003 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_CX16;
4004 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
4005 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
4006 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
4007 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
4008 && (processor_alias_table[i].flags & PTA_NO_SAHF))
4009 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
4010 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
4011 if (processor_alias_table[i].flags & PTA_MOVBE
4012 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
4013 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
4014 if (processor_alias_table[i].flags & PTA_AES
4015 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
4016 ix86_isa_flags |= OPTION_MASK_ISA_AES;
4017 if (processor_alias_table[i].flags & PTA_SHA
4018 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
4019 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
4020 if (processor_alias_table[i].flags & PTA_PCLMUL
4021 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
4022 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
4023 if (processor_alias_table[i].flags & PTA_FSGSBASE
4024 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
4025 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
4026 if (processor_alias_table[i].flags & PTA_RDRND
4027 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
4028 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
4029 if (processor_alias_table[i].flags & PTA_F16C
4030 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
4031 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
4032 if (processor_alias_table[i].flags & PTA_RTM
4033 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
4034 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
4035 if (processor_alias_table[i].flags & PTA_HLE
4036 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
4037 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
4038 if (processor_alias_table[i].flags & PTA_PRFCHW
4039 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
4040 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
4041 if (processor_alias_table[i].flags & PTA_RDSEED
4042 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
4043 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
4044 if (processor_alias_table[i].flags & PTA_ADX
4045 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
4046 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
4047 if (processor_alias_table[i].flags & PTA_FXSR
4048 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
4049 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
4050 if (processor_alias_table[i].flags & PTA_XSAVE
4051 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
4052 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
4053 if (processor_alias_table[i].flags & PTA_XSAVEOPT
4054 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
4055 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
4056 if (processor_alias_table[i].flags & PTA_AVX512F
4057 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
4058 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
4059 if (processor_alias_table[i].flags & PTA_AVX512ER
4060 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
4061 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
4062 if (processor_alias_table[i].flags & PTA_AVX512PF
4063 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
4064 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
4065 if (processor_alias_table[i].flags & PTA_AVX512CD
4066 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
4067 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
4068 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
4069 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
4070 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
4071 if (processor_alias_table[i].flags & PTA_CLWB
4072 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB))
4073 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB;
4074 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
4075 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
4076 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
4077 if (processor_alias_table[i].flags & PTA_CLZERO
4078 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLZERO))
4079 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLZERO;
4080 if (processor_alias_table[i].flags & PTA_XSAVEC
4081 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
4082 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
4083 if (processor_alias_table[i].flags & PTA_XSAVES
4084 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
4085 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
4086 if (processor_alias_table[i].flags & PTA_AVX512DQ
4087 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
4088 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
4089 if (processor_alias_table[i].flags & PTA_AVX512BW
4090 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
4091 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
4092 if (processor_alias_table[i].flags & PTA_AVX512VL
4093 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
4094 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
4095 if (processor_alias_table[i].flags & PTA_MPX
4096 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_MPX))
4097 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_MPX;
4098 if (processor_alias_table[i].flags & PTA_AVX512VBMI
4099 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI))
4100 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI;
4101 if (processor_alias_table[i].flags & PTA_AVX512IFMA
4102 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA))
4103 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA;
4104
4105 if (processor_alias_table[i].flags & PTA_AVX5124VNNIW
4106 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_AVX5124VNNIW))
4107 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_AVX5124VNNIW;
4108 if (processor_alias_table[i].flags & PTA_AVX5124FMAPS
4109 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_AVX5124FMAPS))
4110 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_AVX5124FMAPS;
4111 if (processor_alias_table[i].flags & PTA_AVX512VPOPCNTDQ
4112 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_AVX512VPOPCNTDQ))
4113 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_AVX512VPOPCNTDQ;
4114 if (processor_alias_table[i].flags & PTA_SGX
4115 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_SGX))
4116 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_SGX;
4117
4118 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
4119 x86_prefetch_sse = true;
4120 if (processor_alias_table[i].flags & PTA_MWAITX
4121 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MWAITX))
4122 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MWAITX;
4123 if (processor_alias_table[i].flags & PTA_PKU
4124 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PKU))
4125 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PKU;
4126
4127 /* Don't enable x87 instructions if only
4128 general registers are allowed. */
4129 if (!(opts_set->x_ix86_target_flags & OPTION_MASK_GENERAL_REGS_ONLY)
4130 && !(opts_set->x_target_flags & MASK_80387))
4131 {
4132 if (processor_alias_table[i].flags & PTA_NO_80387)
4133 opts->x_target_flags &= ~MASK_80387;
4134 else
4135 opts->x_target_flags |= MASK_80387;
4136 }
4137 break;
4138 }
4139
4140 if (TARGET_X32 && (opts->x_ix86_isa_flags2 & OPTION_MASK_ISA_MPX))
4141 error ("Intel MPX does not support x32");
4142
4143 if (TARGET_X32 && (ix86_isa_flags2 & OPTION_MASK_ISA_MPX))
4144 error ("Intel MPX does not support x32");
4145
4146 if (i == pta_size)
4147 {
4148 error (main_args_p
4149 ? G_("bad value (%qs) for %<-march=%> switch")
4150 : G_("bad value (%qs) for %<target(\"arch=\")%> attribute"),
4151 opts->x_ix86_arch_string);
4152
4153 auto_vec <const char *> candidates;
4154 for (i = 0; i < pta_size; i++)
4155 if (strcmp (processor_alias_table[i].name, "generic")
4156 && strcmp (processor_alias_table[i].name, "intel")
4157 && (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4158 || (processor_alias_table[i].flags & PTA_64BIT)))
4159 candidates.safe_push (processor_alias_table[i].name);
4160
4161 char *s;
4162 const char *hint
4163 = candidates_list_and_hint (opts->x_ix86_arch_string, s, candidates);
4164 if (hint)
4165 inform (input_location,
4166 main_args_p
4167 ? G_("valid arguments to %<-march=%> switch are: "
4168 "%s; did you mean %qs?")
4169 : G_("valid arguments to %<target(\"arch=\")%> attribute are: "
4170 "%s; did you mean %qs?"), s, hint);
4171 else
4172 inform (input_location,
4173 main_args_p
4174 ? G_("valid arguments to %<-march=%> switch are: %s")
4175 : G_("valid arguments to %<target(\"arch=\")%> attribute "
4176 "are: %s"), s);
4177 XDELETEVEC (s);
4178 }
4179
4180 ix86_arch_mask = 1u << ix86_arch;
4181 for (i = 0; i < X86_ARCH_LAST; ++i)
4182 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4183
4184 for (i = 0; i < pta_size; i++)
4185 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
4186 {
4187 ix86_schedule = processor_alias_table[i].schedule;
4188 ix86_tune = processor_alias_table[i].processor;
4189 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4190 {
4191 if (!(processor_alias_table[i].flags & PTA_64BIT))
4192 {
4193 if (ix86_tune_defaulted)
4194 {
4195 opts->x_ix86_tune_string = "x86-64";
4196 for (i = 0; i < pta_size; i++)
4197 if (! strcmp (opts->x_ix86_tune_string,
4198 processor_alias_table[i].name))
4199 break;
4200 ix86_schedule = processor_alias_table[i].schedule;
4201 ix86_tune = processor_alias_table[i].processor;
4202 }
4203 else
4204 error ("CPU you selected does not support x86-64 "
4205 "instruction set");
4206 }
4207 }
4208 /* Intel CPUs have always interpreted SSE prefetch instructions as
4209 NOPs; so, we can enable SSE prefetch instructions even when
4210 -mtune (rather than -march) points us to a processor that has them.
4211 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
4212 higher processors. */
4213 if (TARGET_CMOV
4214 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
4215 x86_prefetch_sse = true;
4216 break;
4217 }
4218
4219 if (ix86_tune_specified && i == pta_size)
4220 {
4221 error (main_args_p
4222 ? G_("bad value (%qs) for %<-mtune=%> switch")
4223 : G_("bad value (%qs) for %<target(\"tune=\")%> attribute"),
4224 opts->x_ix86_tune_string);
4225
4226 auto_vec <const char *> candidates;
4227 for (i = 0; i < pta_size; i++)
4228 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4229 || (processor_alias_table[i].flags & PTA_64BIT))
4230 candidates.safe_push (processor_alias_table[i].name);
4231
4232 char *s;
4233 const char *hint
4234 = candidates_list_and_hint (opts->x_ix86_tune_string, s, candidates);
4235 if (hint)
4236 inform (input_location,
4237 main_args_p
4238 ? G_("valid arguments to %<-mtune=%> switch are: "
4239 "%s; did you mean %qs?")
4240 : G_("valid arguments to %<target(\"tune=\")%> attribute are: "
4241 "%s; did you mean %qs?"), s, hint);
4242 else
4243 inform (input_location,
4244 main_args_p
4245 ? G_("valid arguments to %<-mtune=%> switch are: %s")
4246 : G_("valid arguments to %<target(\"tune=\")%> attribute "
4247 "are: %s"), s);
4248 XDELETEVEC (s);
4249 }
4250
4251 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
4252
4253 #ifndef USE_IX86_FRAME_POINTER
4254 #define USE_IX86_FRAME_POINTER 0
4255 #endif
4256
4257 #ifndef USE_X86_64_FRAME_POINTER
4258 #define USE_X86_64_FRAME_POINTER 0
4259 #endif
4260
4261 /* Set the default values for switches whose default depends on TARGET_64BIT
4262 in case they weren't overwritten by command line options. */
4263 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4264 {
4265 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
4266 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
4267 if (opts->x_flag_asynchronous_unwind_tables
4268 && !opts_set->x_flag_unwind_tables
4269 && TARGET_64BIT_MS_ABI)
4270 opts->x_flag_unwind_tables = 1;
4271 if (opts->x_flag_asynchronous_unwind_tables == 2)
4272 opts->x_flag_unwind_tables
4273 = opts->x_flag_asynchronous_unwind_tables = 1;
4274 if (opts->x_flag_pcc_struct_return == 2)
4275 opts->x_flag_pcc_struct_return = 0;
4276 }
4277 else
4278 {
4279 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
4280 opts->x_flag_omit_frame_pointer
4281 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
4282 if (opts->x_flag_asynchronous_unwind_tables == 2)
4283 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
4284 if (opts->x_flag_pcc_struct_return == 2)
4285 {
4286 /* Intel MCU psABI specifies that -freg-struct-return should
4287 be on. Instead of setting DEFAULT_PCC_STRUCT_RETURN to 1,
4288 we check -miamcu so that -freg-struct-return is always
4289 turned on if -miamcu is used. */
4290 if (TARGET_IAMCU_P (opts->x_target_flags))
4291 opts->x_flag_pcc_struct_return = 0;
4292 else
4293 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
4294 }
4295 }
4296
4297 ix86_tune_cost = processor_target_table[ix86_tune].cost;
4298 /* TODO: ix86_cost should be chosen at instruction or function granuality
4299 so for cold code we use size_cost even in !optimize_size compilation. */
4300 if (opts->x_optimize_size)
4301 ix86_cost = &ix86_size_cost;
4302 else
4303 ix86_cost = ix86_tune_cost;
4304
4305 /* Arrange to set up i386_stack_locals for all functions. */
4306 init_machine_status = ix86_init_machine_status;
4307
4308 /* Validate -mregparm= value. */
4309 if (opts_set->x_ix86_regparm)
4310 {
4311 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4312 warning (0, "-mregparm is ignored in 64-bit mode");
4313 else if (TARGET_IAMCU_P (opts->x_target_flags))
4314 warning (0, "-mregparm is ignored for Intel MCU psABI");
4315 if (opts->x_ix86_regparm > REGPARM_MAX)
4316 {
4317 error ("-mregparm=%d is not between 0 and %d",
4318 opts->x_ix86_regparm, REGPARM_MAX);
4319 opts->x_ix86_regparm = 0;
4320 }
4321 }
4322 if (TARGET_IAMCU_P (opts->x_target_flags)
4323 || TARGET_64BIT_P (opts->x_ix86_isa_flags))
4324 opts->x_ix86_regparm = REGPARM_MAX;
4325
4326 /* Default align_* from the processor table. */
4327 ix86_default_align (opts);
4328
4329 /* Provide default for -mbranch-cost= value. */
4330 if (!opts_set->x_ix86_branch_cost)
4331 opts->x_ix86_branch_cost = ix86_tune_cost->branch_cost;
4332
4333 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4334 {
4335 opts->x_target_flags
4336 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
4337
4338 /* Enable by default the SSE and MMX builtins. Do allow the user to
4339 explicitly disable any of these. In particular, disabling SSE and
4340 MMX for kernel code is extremely useful. */
4341 if (!ix86_arch_specified)
4342 opts->x_ix86_isa_flags
4343 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
4344 | TARGET_SUBTARGET64_ISA_DEFAULT)
4345 & ~opts->x_ix86_isa_flags_explicit);
4346
4347 if (TARGET_RTD_P (opts->x_target_flags))
4348 warning (0,
4349 main_args_p
4350 ? G_("%<-mrtd%> is ignored in 64bit mode")
4351 : G_("%<target(\"rtd\")%> is ignored in 64bit mode"));
4352 }
4353 else
4354 {
4355 opts->x_target_flags
4356 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
4357
4358 if (!ix86_arch_specified)
4359 opts->x_ix86_isa_flags
4360 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
4361
4362 /* i386 ABI does not specify red zone. It still makes sense to use it
4363 when programmer takes care to stack from being destroyed. */
4364 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
4365 opts->x_target_flags |= MASK_NO_RED_ZONE;
4366 }
4367
4368 /* Keep nonleaf frame pointers. */
4369 if (opts->x_flag_omit_frame_pointer)
4370 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
4371 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
4372 opts->x_flag_omit_frame_pointer = 1;
4373
4374 /* If we're doing fast math, we don't care about comparison order
4375 wrt NaNs. This lets us use a shorter comparison sequence. */
4376 if (opts->x_flag_finite_math_only)
4377 opts->x_target_flags &= ~MASK_IEEE_FP;
4378
4379 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
4380 since the insns won't need emulation. */
4381 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
4382 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
4383
4384 /* Likewise, if the target doesn't have a 387, or we've specified
4385 software floating point, don't use 387 inline intrinsics. */
4386 if (!TARGET_80387_P (opts->x_target_flags))
4387 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
4388
4389 /* Turn on MMX builtins for -msse. */
4390 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
4391 opts->x_ix86_isa_flags
4392 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
4393
4394 /* Enable SSE prefetch. */
4395 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
4396 || (TARGET_PRFCHW_P (opts->x_ix86_isa_flags)
4397 && !TARGET_3DNOW_P (opts->x_ix86_isa_flags))
4398 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
4399 x86_prefetch_sse = true;
4400
4401 /* Enable popcnt instruction for -msse4.2 or -mabm. */
4402 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
4403 || TARGET_ABM_P (opts->x_ix86_isa_flags))
4404 opts->x_ix86_isa_flags
4405 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
4406
4407 /* Enable lzcnt instruction for -mabm. */
4408 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
4409 opts->x_ix86_isa_flags
4410 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
4411
4412 /* Disable BMI, BMI2 and TBM instructions for -m16. */
4413 if (TARGET_16BIT_P(opts->x_ix86_isa_flags))
4414 opts->x_ix86_isa_flags
4415 &= ~((OPTION_MASK_ISA_BMI | OPTION_MASK_ISA_BMI2 | OPTION_MASK_ISA_TBM)
4416 & ~opts->x_ix86_isa_flags_explicit);
4417
4418 /* Validate -mpreferred-stack-boundary= value or default it to
4419 PREFERRED_STACK_BOUNDARY_DEFAULT. */
4420 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
4421 if (opts_set->x_ix86_preferred_stack_boundary_arg)
4422 {
4423 int min = TARGET_64BIT_P (opts->x_ix86_isa_flags)? 3 : 2;
4424 int max = TARGET_SEH ? 4 : 12;
4425
4426 if (opts->x_ix86_preferred_stack_boundary_arg < min
4427 || opts->x_ix86_preferred_stack_boundary_arg > max)
4428 {
4429 if (min == max)
4430 error ("-mpreferred-stack-boundary is not supported "
4431 "for this target");
4432 else
4433 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
4434 opts->x_ix86_preferred_stack_boundary_arg, min, max);
4435 }
4436 else
4437 ix86_preferred_stack_boundary
4438 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
4439 }
4440
4441 /* Set the default value for -mstackrealign. */
4442 if (!opts_set->x_ix86_force_align_arg_pointer)
4443 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
4444
4445 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
4446
4447 /* Validate -mincoming-stack-boundary= value or default it to
4448 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
4449 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
4450 if (opts_set->x_ix86_incoming_stack_boundary_arg)
4451 {
4452 int min = TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 3 : 2;
4453
4454 if (opts->x_ix86_incoming_stack_boundary_arg < min
4455 || opts->x_ix86_incoming_stack_boundary_arg > 12)
4456 error ("-mincoming-stack-boundary=%d is not between %d and 12",
4457 opts->x_ix86_incoming_stack_boundary_arg, min);
4458 else
4459 {
4460 ix86_user_incoming_stack_boundary
4461 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
4462 ix86_incoming_stack_boundary
4463 = ix86_user_incoming_stack_boundary;
4464 }
4465 }
4466
4467 #ifndef NO_PROFILE_COUNTERS
4468 if (flag_nop_mcount)
4469 error ("-mnop-mcount is not compatible with this target");
4470 #endif
4471 if (flag_nop_mcount && flag_pic)
4472 error ("-mnop-mcount is not implemented for -fPIC");
4473
4474 /* Accept -msseregparm only if at least SSE support is enabled. */
4475 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
4476 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
4477 error (main_args_p
4478 ? G_("%<-msseregparm%> used without SSE enabled")
4479 : G_("%<target(\"sseregparm\")%> used without SSE enabled"));
4480
4481 if (opts_set->x_ix86_fpmath)
4482 {
4483 if (opts->x_ix86_fpmath & FPMATH_SSE)
4484 {
4485 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
4486 {
4487 if (TARGET_80387_P (opts->x_target_flags))
4488 {
4489 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4490 opts->x_ix86_fpmath = FPMATH_387;
4491 }
4492 }
4493 else if ((opts->x_ix86_fpmath & FPMATH_387)
4494 && !TARGET_80387_P (opts->x_target_flags))
4495 {
4496 warning (0, "387 instruction set disabled, using SSE arithmetics");
4497 opts->x_ix86_fpmath = FPMATH_SSE;
4498 }
4499 }
4500 }
4501 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4502 fpmath=387. The second is however default at many targets since the
4503 extra 80bit precision of temporaries is considered to be part of ABI.
4504 Overwrite the default at least for -ffast-math.
4505 TODO: -mfpmath=both seems to produce same performing code with bit
4506 smaller binaries. It is however not clear if register allocation is
4507 ready for this setting.
4508 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4509 codegen. We may switch to 387 with -ffast-math for size optimized
4510 functions. */
4511 else if (fast_math_flags_set_p (&global_options)
4512 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4513 opts->x_ix86_fpmath = FPMATH_SSE;
4514 else
4515 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4516
4517 /* Use external vectorized library in vectorizing intrinsics. */
4518 if (opts_set->x_ix86_veclibabi_type)
4519 switch (opts->x_ix86_veclibabi_type)
4520 {
4521 case ix86_veclibabi_type_svml:
4522 ix86_veclib_handler = ix86_veclibabi_svml;
4523 break;
4524
4525 case ix86_veclibabi_type_acml:
4526 ix86_veclib_handler = ix86_veclibabi_acml;
4527 break;
4528
4529 default:
4530 gcc_unreachable ();
4531 }
4532
4533 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4534 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4535 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4536
4537 /* If stack probes are required, the space used for large function
4538 arguments on the stack must also be probed, so enable
4539 -maccumulate-outgoing-args so this happens in the prologue. */
4540 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4541 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4542 {
4543 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4544 warning (0,
4545 main_args_p
4546 ? G_("stack probing requires %<-maccumulate-outgoing-args%> "
4547 "for correctness")
4548 : G_("stack probing requires "
4549 "%<target(\"accumulate-outgoing-args\")%> for "
4550 "correctness"));
4551 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4552 }
4553
4554 /* Stack realignment without -maccumulate-outgoing-args requires %ebp,
4555 so enable -maccumulate-outgoing-args when %ebp is fixed. */
4556 if (fixed_regs[BP_REG]
4557 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4558 {
4559 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4560 warning (0,
4561 main_args_p
4562 ? G_("fixed ebp register requires "
4563 "%<-maccumulate-outgoing-args%>")
4564 : G_("fixed ebp register requires "
4565 "%<target(\"accumulate-outgoing-args\")%>"));
4566 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4567 }
4568
4569 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4570 {
4571 char *p;
4572 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4573 p = strchr (internal_label_prefix, 'X');
4574 internal_label_prefix_len = p - internal_label_prefix;
4575 *p = '\0';
4576 }
4577
4578 /* When scheduling description is not available, disable scheduler pass
4579 so it won't slow down the compilation and make x87 code slower. */
4580 if (!TARGET_SCHEDULE)
4581 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4582
4583 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4584 ix86_tune_cost->simultaneous_prefetches,
4585 opts->x_param_values,
4586 opts_set->x_param_values);
4587 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4588 ix86_tune_cost->prefetch_block,
4589 opts->x_param_values,
4590 opts_set->x_param_values);
4591 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4592 ix86_tune_cost->l1_cache_size,
4593 opts->x_param_values,
4594 opts_set->x_param_values);
4595 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4596 ix86_tune_cost->l2_cache_size,
4597 opts->x_param_values,
4598 opts_set->x_param_values);
4599
4600 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4601 if (opts->x_flag_prefetch_loop_arrays < 0
4602 && HAVE_prefetch
4603 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4604 && !opts->x_optimize_size
4605 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4606 opts->x_flag_prefetch_loop_arrays = 1;
4607
4608 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4609 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4610 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4611 targetm.expand_builtin_va_start = NULL;
4612
4613 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4614 {
4615 ix86_gen_leave = gen_leave_rex64;
4616 if (Pmode == DImode)
4617 {
4618 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4619 ix86_gen_tls_local_dynamic_base_64
4620 = gen_tls_local_dynamic_base_64_di;
4621 }
4622 else
4623 {
4624 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4625 ix86_gen_tls_local_dynamic_base_64
4626 = gen_tls_local_dynamic_base_64_si;
4627 }
4628 }
4629 else
4630 ix86_gen_leave = gen_leave;
4631
4632 if (Pmode == DImode)
4633 {
4634 ix86_gen_add3 = gen_adddi3;
4635 ix86_gen_sub3 = gen_subdi3;
4636 ix86_gen_sub3_carry = gen_subdi3_carry;
4637 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4638 ix86_gen_andsp = gen_anddi3;
4639 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4640 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4641 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4642 ix86_gen_monitor = gen_sse3_monitor_di;
4643 ix86_gen_monitorx = gen_monitorx_di;
4644 ix86_gen_clzero = gen_clzero_di;
4645 }
4646 else
4647 {
4648 ix86_gen_add3 = gen_addsi3;
4649 ix86_gen_sub3 = gen_subsi3;
4650 ix86_gen_sub3_carry = gen_subsi3_carry;
4651 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4652 ix86_gen_andsp = gen_andsi3;
4653 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4654 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4655 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4656 ix86_gen_monitor = gen_sse3_monitor_si;
4657 ix86_gen_monitorx = gen_monitorx_si;
4658 ix86_gen_clzero = gen_clzero_si;
4659 }
4660
4661 #ifdef USE_IX86_CLD
4662 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4663 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4664 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4665 #endif
4666
4667 /* Set the default value for -mfentry. */
4668 if (!opts_set->x_flag_fentry)
4669 opts->x_flag_fentry = TARGET_SEH;
4670 else
4671 {
4672 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic
4673 && opts->x_flag_fentry)
4674 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4675 "with -fpic");
4676 else if (TARGET_SEH && !opts->x_flag_fentry)
4677 sorry ("-mno-fentry isn%'t compatible with SEH");
4678 }
4679
4680 if (TARGET_SEH && TARGET_CALL_MS2SYSV_XLOGUES)
4681 sorry ("-mcall-ms2sysv-xlogues isn%'t currently supported with SEH");
4682
4683 if (!(opts_set->x_target_flags & MASK_VZEROUPPER)
4684 && TARGET_EMIT_VZEROUPPER)
4685 opts->x_target_flags |= MASK_VZEROUPPER;
4686 if (!(opts_set->x_target_flags & MASK_STV))
4687 opts->x_target_flags |= MASK_STV;
4688 /* Disable STV if -mpreferred-stack-boundary={2,3} or
4689 -mincoming-stack-boundary={2,3} or -mstackrealign - the needed
4690 stack realignment will be extra cost the pass doesn't take into
4691 account and the pass can't realign the stack. */
4692 if (ix86_preferred_stack_boundary < 128
4693 || ix86_incoming_stack_boundary < 128
4694 || opts->x_ix86_force_align_arg_pointer)
4695 opts->x_target_flags &= ~MASK_STV;
4696 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4697 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4698 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4699 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4700 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4701 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4702
4703 /* Enable 128-bit AVX instruction generation
4704 for the auto-vectorizer. */
4705 if (TARGET_AVX128_OPTIMAL
4706 && (opts_set->x_prefer_vector_width_type == PVW_NONE))
4707 opts->x_prefer_vector_width_type = PVW_AVX128;
4708
4709 /* Use 256-bit AVX instruction generation
4710 in the auto-vectorizer. */
4711 if (ix86_tune_features[X86_TUNE_AVX256_OPTIMAL]
4712 && (opts_set->x_prefer_vector_width_type == PVW_NONE))
4713 opts->x_prefer_vector_width_type = PVW_AVX256;
4714
4715 if (opts->x_ix86_recip_name)
4716 {
4717 char *p = ASTRDUP (opts->x_ix86_recip_name);
4718 char *q;
4719 unsigned int mask, i;
4720 bool invert;
4721
4722 while ((q = strtok (p, ",")) != NULL)
4723 {
4724 p = NULL;
4725 if (*q == '!')
4726 {
4727 invert = true;
4728 q++;
4729 }
4730 else
4731 invert = false;
4732
4733 if (!strcmp (q, "default"))
4734 mask = RECIP_MASK_ALL;
4735 else
4736 {
4737 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4738 if (!strcmp (q, recip_options[i].string))
4739 {
4740 mask = recip_options[i].mask;
4741 break;
4742 }
4743
4744 if (i == ARRAY_SIZE (recip_options))
4745 {
4746 error ("unknown option for -mrecip=%s", q);
4747 invert = false;
4748 mask = RECIP_MASK_NONE;
4749 }
4750 }
4751
4752 opts->x_recip_mask_explicit |= mask;
4753 if (invert)
4754 opts->x_recip_mask &= ~mask;
4755 else
4756 opts->x_recip_mask |= mask;
4757 }
4758 }
4759
4760 if (TARGET_RECIP_P (opts->x_target_flags))
4761 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4762 else if (opts_set->x_target_flags & MASK_RECIP)
4763 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4764
4765 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4766 for 64-bit Bionic. Also default long double to 64-bit for Intel
4767 MCU psABI. */
4768 if ((TARGET_HAS_BIONIC || TARGET_IAMCU)
4769 && !(opts_set->x_target_flags
4770 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4771 opts->x_target_flags |= (TARGET_64BIT
4772 ? MASK_LONG_DOUBLE_128
4773 : MASK_LONG_DOUBLE_64);
4774
4775 /* Only one of them can be active. */
4776 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4777 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4778
4779 /* Handle stack protector */
4780 if (!opts_set->x_ix86_stack_protector_guard)
4781 opts->x_ix86_stack_protector_guard
4782 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4783
4784 #ifdef TARGET_THREAD_SSP_OFFSET
4785 ix86_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4786 #endif
4787
4788 if (global_options_set.x_ix86_stack_protector_guard_offset_str)
4789 {
4790 char *endp;
4791 const char *str = ix86_stack_protector_guard_offset_str;
4792
4793 errno = 0;
4794 int64_t offset;
4795
4796 #if defined(INT64_T_IS_LONG)
4797 offset = strtol (str, &endp, 0);
4798 #else
4799 offset = strtoll (str, &endp, 0);
4800 #endif
4801
4802 if (!*str || *endp || errno)
4803 error ("%qs is not a valid number "
4804 "in -mstack-protector-guard-offset=", str);
4805
4806 if (!IN_RANGE (offset, HOST_WIDE_INT_C (-0x80000000),
4807 HOST_WIDE_INT_C (0x7fffffff)))
4808 error ("%qs is not a valid offset "
4809 "in -mstack-protector-guard-offset=", str);
4810
4811 ix86_stack_protector_guard_offset = offset;
4812 }
4813
4814 ix86_stack_protector_guard_reg = DEFAULT_TLS_SEG_REG;
4815
4816 /* The kernel uses a different segment register for performance
4817 reasons; a system call would not have to trash the userspace
4818 segment register, which would be expensive. */
4819 if (ix86_cmodel == CM_KERNEL)
4820 ix86_stack_protector_guard_reg = ADDR_SPACE_SEG_GS;
4821
4822 if (global_options_set.x_ix86_stack_protector_guard_reg_str)
4823 {
4824 const char *str = ix86_stack_protector_guard_reg_str;
4825 addr_space_t seg = ADDR_SPACE_GENERIC;
4826
4827 /* Discard optional register prefix. */
4828 if (str[0] == '%')
4829 str++;
4830
4831 if (strlen (str) == 2 && str[1] == 's')
4832 {
4833 if (str[0] == 'f')
4834 seg = ADDR_SPACE_SEG_FS;
4835 else if (str[0] == 'g')
4836 seg = ADDR_SPACE_SEG_GS;
4837 }
4838
4839 if (seg == ADDR_SPACE_GENERIC)
4840 error ("%qs is not a valid base register "
4841 "in -mstack-protector-guard-reg=",
4842 ix86_stack_protector_guard_reg_str);
4843
4844 ix86_stack_protector_guard_reg = seg;
4845 }
4846
4847 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4848 if (opts->x_ix86_tune_memcpy_strategy)
4849 {
4850 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4851 ix86_parse_stringop_strategy_string (str, false);
4852 free (str);
4853 }
4854
4855 if (opts->x_ix86_tune_memset_strategy)
4856 {
4857 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4858 ix86_parse_stringop_strategy_string (str, true);
4859 free (str);
4860 }
4861
4862 /* Save the initial options in case the user does function specific
4863 options. */
4864 if (main_args_p)
4865 target_option_default_node = target_option_current_node
4866 = build_target_option_node (opts);
4867
4868 /* Do not support control flow instrumentation if CET is not enabled. */
4869 if (opts->x_flag_cf_protection != CF_NONE)
4870 {
4871 if (!(TARGET_IBT_P (opts->x_ix86_isa_flags2)
4872 || TARGET_SHSTK_P (opts->x_ix86_isa_flags2)))
4873 {
4874 if (flag_cf_protection == CF_FULL)
4875 {
4876 error ("%<-fcf-protection=full%> requires CET support "
4877 "on this target. Use -mcet or one of -mibt, "
4878 "-mshstk options to enable CET");
4879 }
4880 else if (flag_cf_protection == CF_BRANCH)
4881 {
4882 error ("%<-fcf-protection=branch%> requires CET support "
4883 "on this target. Use -mcet or one of -mibt, "
4884 "-mshstk options to enable CET");
4885 }
4886 else if (flag_cf_protection == CF_RETURN)
4887 {
4888 error ("%<-fcf-protection=return%> requires CET support "
4889 "on this target. Use -mcet or one of -mibt, "
4890 "-mshstk options to enable CET");
4891 }
4892 flag_cf_protection = CF_NONE;
4893 return false;
4894 }
4895 opts->x_flag_cf_protection =
4896 (cf_protection_level) (opts->x_flag_cf_protection | CF_SET);
4897 }
4898
4899 return true;
4900 }
4901
4902 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4903
4904 static void
4905 ix86_option_override (void)
4906 {
4907 ix86_option_override_internal (true, &global_options, &global_options_set);
4908 }
4909
4910 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
4911 static char *
4912 ix86_offload_options (void)
4913 {
4914 if (TARGET_LP64)
4915 return xstrdup ("-foffload-abi=lp64");
4916 return xstrdup ("-foffload-abi=ilp32");
4917 }
4918
4919 /* Update register usage after having seen the compiler flags. */
4920
4921 static void
4922 ix86_conditional_register_usage (void)
4923 {
4924 int i, c_mask;
4925
4926 /* If there are no caller-saved registers, preserve all registers.
4927 except fixed_regs and registers used for function return value
4928 since aggregate_value_p checks call_used_regs[regno] on return
4929 value. */
4930 if (cfun && cfun->machine->no_caller_saved_registers)
4931 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4932 if (!fixed_regs[i] && !ix86_function_value_regno_p (i))
4933 call_used_regs[i] = 0;
4934
4935 /* For 32-bit targets, squash the REX registers. */
4936 if (! TARGET_64BIT)
4937 {
4938 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4939 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4940 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4941 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4942 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4943 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4944 }
4945
4946 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4947 c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI);
4948
4949 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4950
4951 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4952 {
4953 /* Set/reset conditionally defined registers from
4954 CALL_USED_REGISTERS initializer. */
4955 if (call_used_regs[i] > 1)
4956 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4957
4958 /* Calculate registers of CLOBBERED_REGS register set
4959 as call used registers from GENERAL_REGS register set. */
4960 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4961 && call_used_regs[i])
4962 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4963 }
4964
4965 /* If MMX is disabled, squash the registers. */
4966 if (! TARGET_MMX)
4967 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4968 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4969 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4970
4971 /* If SSE is disabled, squash the registers. */
4972 if (! TARGET_SSE)
4973 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4974 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4975 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4976
4977 /* If the FPU is disabled, squash the registers. */
4978 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4979 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4980 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4981 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4982
4983 /* If AVX512F is disabled, squash the registers. */
4984 if (! TARGET_AVX512F)
4985 {
4986 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4987 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4988
4989 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4990 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4991 }
4992
4993 /* If MPX is disabled, squash the registers. */
4994 if (! TARGET_MPX)
4995 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
4996 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4997 }
4998
4999 /* Canonicalize a comparison from one we don't have to one we do have. */
5000
5001 static void
5002 ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5003 bool op0_preserve_value)
5004 {
5005 /* The order of operands in x87 ficom compare is forced by combine in
5006 simplify_comparison () function. Float operator is treated as RTX_OBJ
5007 with a precedence over other operators and is always put in the first
5008 place. Swap condition and operands to match ficom instruction. */
5009 if (!op0_preserve_value
5010 && GET_CODE (*op0) == FLOAT && MEM_P (XEXP (*op0, 0)) && REG_P (*op1))
5011 {
5012 enum rtx_code scode = swap_condition ((enum rtx_code) *code);
5013
5014 /* We are called only for compares that are split to SAHF instruction.
5015 Ensure that we have setcc/jcc insn for the swapped condition. */
5016 if (ix86_fp_compare_code_to_integer (scode) != UNKNOWN)
5017 {
5018 std::swap (*op0, *op1);
5019 *code = (int) scode;
5020 }
5021 }
5022 }
5023 \f
5024 /* Save the current options */
5025
5026 static void
5027 ix86_function_specific_save (struct cl_target_option *ptr,
5028 struct gcc_options *opts)
5029 {
5030 ptr->arch = ix86_arch;
5031 ptr->schedule = ix86_schedule;
5032 ptr->prefetch_sse = x86_prefetch_sse;
5033 ptr->tune = ix86_tune;
5034 ptr->branch_cost = ix86_branch_cost;
5035 ptr->tune_defaulted = ix86_tune_defaulted;
5036 ptr->arch_specified = ix86_arch_specified;
5037 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
5038 ptr->x_ix86_isa_flags2_explicit = opts->x_ix86_isa_flags2_explicit;
5039 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
5040 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
5041 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
5042 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
5043 ptr->x_ix86_abi = opts->x_ix86_abi;
5044 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
5045 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
5046 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
5047 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
5048 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
5049 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
5050 ptr->x_ix86_pmode = opts->x_ix86_pmode;
5051 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
5052 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
5053 ptr->x_ix86_regparm = opts->x_ix86_regparm;
5054 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
5055 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
5056 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
5057 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
5058 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
5059 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
5060 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
5061 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
5062 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
5063 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
5064
5065 /* The fields are char but the variables are not; make sure the
5066 values fit in the fields. */
5067 gcc_assert (ptr->arch == ix86_arch);
5068 gcc_assert (ptr->schedule == ix86_schedule);
5069 gcc_assert (ptr->tune == ix86_tune);
5070 gcc_assert (ptr->branch_cost == ix86_branch_cost);
5071 }
5072
5073 /* Restore the current options */
5074
5075 static void
5076 ix86_function_specific_restore (struct gcc_options *opts,
5077 struct cl_target_option *ptr)
5078 {
5079 enum processor_type old_tune = ix86_tune;
5080 enum processor_type old_arch = ix86_arch;
5081 unsigned int ix86_arch_mask;
5082 int i;
5083
5084 /* We don't change -fPIC. */
5085 opts->x_flag_pic = flag_pic;
5086
5087 ix86_arch = (enum processor_type) ptr->arch;
5088 ix86_schedule = (enum attr_cpu) ptr->schedule;
5089 ix86_tune = (enum processor_type) ptr->tune;
5090 x86_prefetch_sse = ptr->prefetch_sse;
5091 opts->x_ix86_branch_cost = ptr->branch_cost;
5092 ix86_tune_defaulted = ptr->tune_defaulted;
5093 ix86_arch_specified = ptr->arch_specified;
5094 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
5095 opts->x_ix86_isa_flags2_explicit = ptr->x_ix86_isa_flags2_explicit;
5096 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
5097 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
5098 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
5099 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
5100 opts->x_ix86_abi = ptr->x_ix86_abi;
5101 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
5102 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
5103 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
5104 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
5105 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
5106 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
5107 opts->x_ix86_pmode = ptr->x_ix86_pmode;
5108 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
5109 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
5110 opts->x_ix86_regparm = ptr->x_ix86_regparm;
5111 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
5112 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
5113 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
5114 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
5115 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
5116 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
5117 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
5118 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
5119 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
5120 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
5121 ix86_tune_cost = processor_target_table[ix86_tune].cost;
5122 /* TODO: ix86_cost should be chosen at instruction or function granuality
5123 so for cold code we use size_cost even in !optimize_size compilation. */
5124 if (opts->x_optimize_size)
5125 ix86_cost = &ix86_size_cost;
5126 else
5127 ix86_cost = ix86_tune_cost;
5128
5129 /* Recreate the arch feature tests if the arch changed */
5130 if (old_arch != ix86_arch)
5131 {
5132 ix86_arch_mask = 1u << ix86_arch;
5133 for (i = 0; i < X86_ARCH_LAST; ++i)
5134 ix86_arch_features[i]
5135 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
5136 }
5137
5138 /* Recreate the tune optimization tests */
5139 if (old_tune != ix86_tune)
5140 set_ix86_tune_features (ix86_tune, false);
5141 }
5142
5143 /* Adjust target options after streaming them in. This is mainly about
5144 reconciling them with global options. */
5145
5146 static void
5147 ix86_function_specific_post_stream_in (struct cl_target_option *ptr)
5148 {
5149 /* flag_pic is a global option, but ix86_cmodel is target saved option
5150 partly computed from flag_pic. If flag_pic is on, adjust x_ix86_cmodel
5151 for PIC, or error out. */
5152 if (flag_pic)
5153 switch (ptr->x_ix86_cmodel)
5154 {
5155 case CM_SMALL:
5156 ptr->x_ix86_cmodel = CM_SMALL_PIC;
5157 break;
5158
5159 case CM_MEDIUM:
5160 ptr->x_ix86_cmodel = CM_MEDIUM_PIC;
5161 break;
5162
5163 case CM_LARGE:
5164 ptr->x_ix86_cmodel = CM_LARGE_PIC;
5165 break;
5166
5167 case CM_KERNEL:
5168 error ("code model %s does not support PIC mode", "kernel");
5169 break;
5170
5171 default:
5172 break;
5173 }
5174 else
5175 switch (ptr->x_ix86_cmodel)
5176 {
5177 case CM_SMALL_PIC:
5178 ptr->x_ix86_cmodel = CM_SMALL;
5179 break;
5180
5181 case CM_MEDIUM_PIC:
5182 ptr->x_ix86_cmodel = CM_MEDIUM;
5183 break;
5184
5185 case CM_LARGE_PIC:
5186 ptr->x_ix86_cmodel = CM_LARGE;
5187 break;
5188
5189 default:
5190 break;
5191 }
5192 }
5193
5194 /* Print the current options */
5195
5196 static void
5197 ix86_function_specific_print (FILE *file, int indent,
5198 struct cl_target_option *ptr)
5199 {
5200 char *target_string
5201 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_ix86_isa_flags2,
5202 ptr->x_target_flags, ptr->x_ix86_target_flags,
5203 NULL, NULL, ptr->x_ix86_fpmath, false);
5204
5205 gcc_assert (ptr->arch < PROCESSOR_max);
5206 fprintf (file, "%*sarch = %d (%s)\n",
5207 indent, "",
5208 ptr->arch, processor_target_table[ptr->arch].name);
5209
5210 gcc_assert (ptr->tune < PROCESSOR_max);
5211 fprintf (file, "%*stune = %d (%s)\n",
5212 indent, "",
5213 ptr->tune, processor_target_table[ptr->tune].name);
5214
5215 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
5216
5217 if (target_string)
5218 {
5219 fprintf (file, "%*s%s\n", indent, "", target_string);
5220 free (target_string);
5221 }
5222 }
5223
5224 \f
5225 /* Inner function to process the attribute((target(...))), take an argument and
5226 set the current options from the argument. If we have a list, recursively go
5227 over the list. */
5228
5229 static bool
5230 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
5231 struct gcc_options *opts,
5232 struct gcc_options *opts_set,
5233 struct gcc_options *enum_opts_set)
5234 {
5235 char *next_optstr;
5236 bool ret = true;
5237
5238 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
5239 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
5240 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
5241 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
5242 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
5243
5244 enum ix86_opt_type
5245 {
5246 ix86_opt_unknown,
5247 ix86_opt_yes,
5248 ix86_opt_no,
5249 ix86_opt_str,
5250 ix86_opt_enum,
5251 ix86_opt_isa
5252 };
5253
5254 static const struct
5255 {
5256 const char *string;
5257 size_t len;
5258 enum ix86_opt_type type;
5259 int opt;
5260 int mask;
5261 } attrs[] = {
5262 /* isa options */
5263 IX86_ATTR_ISA ("sgx", OPT_msgx),
5264 IX86_ATTR_ISA ("avx5124fmaps", OPT_mavx5124fmaps),
5265 IX86_ATTR_ISA ("avx5124vnniw", OPT_mavx5124vnniw),
5266 IX86_ATTR_ISA ("avx512vpopcntdq", OPT_mavx512vpopcntdq),
5267 IX86_ATTR_ISA ("avx512vbmi2", OPT_mavx512vbmi2),
5268 IX86_ATTR_ISA ("avx512vnni", OPT_mavx512vnni),
5269
5270 IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi),
5271 IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma),
5272 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
5273 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
5274 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
5275 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
5276 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
5277 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
5278 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
5279 IX86_ATTR_ISA ("avx2", OPT_mavx2),
5280 IX86_ATTR_ISA ("fma", OPT_mfma),
5281 IX86_ATTR_ISA ("xop", OPT_mxop),
5282 IX86_ATTR_ISA ("fma4", OPT_mfma4),
5283 IX86_ATTR_ISA ("f16c", OPT_mf16c),
5284 IX86_ATTR_ISA ("avx", OPT_mavx),
5285 IX86_ATTR_ISA ("sse4", OPT_msse4),
5286 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
5287 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
5288 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
5289 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
5290 IX86_ATTR_ISA ("sse3", OPT_msse3),
5291 IX86_ATTR_ISA ("aes", OPT_maes),
5292 IX86_ATTR_ISA ("sha", OPT_msha),
5293 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
5294 IX86_ATTR_ISA ("sse2", OPT_msse2),
5295 IX86_ATTR_ISA ("sse", OPT_msse),
5296 IX86_ATTR_ISA ("3dnowa", OPT_m3dnowa),
5297 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
5298 IX86_ATTR_ISA ("mmx", OPT_mmmx),
5299 IX86_ATTR_ISA ("rtm", OPT_mrtm),
5300 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
5301 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
5302 IX86_ATTR_ISA ("adx", OPT_madx),
5303 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
5304 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
5305 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
5306 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
5307 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
5308 IX86_ATTR_ISA ("xsave", OPT_mxsave),
5309 IX86_ATTR_ISA ("abm", OPT_mabm),
5310 IX86_ATTR_ISA ("bmi", OPT_mbmi),
5311 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
5312 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
5313 IX86_ATTR_ISA ("tbm", OPT_mtbm),
5314 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
5315 IX86_ATTR_ISA ("cx16", OPT_mcx16),
5316 IX86_ATTR_ISA ("sahf", OPT_msahf),
5317 IX86_ATTR_ISA ("movbe", OPT_mmovbe),
5318 IX86_ATTR_ISA ("crc32", OPT_mcrc32),
5319 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
5320 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
5321 IX86_ATTR_ISA ("mwaitx", OPT_mmwaitx),
5322 IX86_ATTR_ISA ("clzero", OPT_mclzero),
5323 IX86_ATTR_ISA ("pku", OPT_mpku),
5324 IX86_ATTR_ISA ("lwp", OPT_mlwp),
5325 IX86_ATTR_ISA ("hle", OPT_mhle),
5326 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
5327 IX86_ATTR_ISA ("mpx", OPT_mmpx),
5328 IX86_ATTR_ISA ("clwb", OPT_mclwb),
5329 IX86_ATTR_ISA ("rdpid", OPT_mrdpid),
5330 IX86_ATTR_ISA ("gfni", OPT_mgfni),
5331 IX86_ATTR_ISA ("ibt", OPT_mibt),
5332 IX86_ATTR_ISA ("shstk", OPT_mshstk),
5333 IX86_ATTR_ISA ("vaes", OPT_mvaes),
5334 IX86_ATTR_ISA ("vpclmulqdq", OPT_mvpclmulqdq),
5335
5336 /* enum options */
5337 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
5338
5339 /* string options */
5340 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
5341 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
5342
5343 /* flag options */
5344 IX86_ATTR_YES ("cld",
5345 OPT_mcld,
5346 MASK_CLD),
5347
5348 IX86_ATTR_NO ("fancy-math-387",
5349 OPT_mfancy_math_387,
5350 MASK_NO_FANCY_MATH_387),
5351
5352 IX86_ATTR_YES ("ieee-fp",
5353 OPT_mieee_fp,
5354 MASK_IEEE_FP),
5355
5356 IX86_ATTR_YES ("inline-all-stringops",
5357 OPT_minline_all_stringops,
5358 MASK_INLINE_ALL_STRINGOPS),
5359
5360 IX86_ATTR_YES ("inline-stringops-dynamically",
5361 OPT_minline_stringops_dynamically,
5362 MASK_INLINE_STRINGOPS_DYNAMICALLY),
5363
5364 IX86_ATTR_NO ("align-stringops",
5365 OPT_mno_align_stringops,
5366 MASK_NO_ALIGN_STRINGOPS),
5367
5368 IX86_ATTR_YES ("recip",
5369 OPT_mrecip,
5370 MASK_RECIP),
5371
5372 };
5373
5374 /* If this is a list, recurse to get the options. */
5375 if (TREE_CODE (args) == TREE_LIST)
5376 {
5377 bool ret = true;
5378
5379 for (; args; args = TREE_CHAIN (args))
5380 if (TREE_VALUE (args)
5381 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
5382 p_strings, opts, opts_set,
5383 enum_opts_set))
5384 ret = false;
5385
5386 return ret;
5387 }
5388
5389 else if (TREE_CODE (args) != STRING_CST)
5390 {
5391 error ("attribute %<target%> argument not a string");
5392 return false;
5393 }
5394
5395 /* Handle multiple arguments separated by commas. */
5396 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
5397
5398 while (next_optstr && *next_optstr != '\0')
5399 {
5400 char *p = next_optstr;
5401 char *orig_p = p;
5402 char *comma = strchr (next_optstr, ',');
5403 const char *opt_string;
5404 size_t len, opt_len;
5405 int opt;
5406 bool opt_set_p;
5407 char ch;
5408 unsigned i;
5409 enum ix86_opt_type type = ix86_opt_unknown;
5410 int mask = 0;
5411
5412 if (comma)
5413 {
5414 *comma = '\0';
5415 len = comma - next_optstr;
5416 next_optstr = comma + 1;
5417 }
5418 else
5419 {
5420 len = strlen (p);
5421 next_optstr = NULL;
5422 }
5423
5424 /* Recognize no-xxx. */
5425 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
5426 {
5427 opt_set_p = false;
5428 p += 3;
5429 len -= 3;
5430 }
5431 else
5432 opt_set_p = true;
5433
5434 /* Find the option. */
5435 ch = *p;
5436 opt = N_OPTS;
5437 for (i = 0; i < ARRAY_SIZE (attrs); i++)
5438 {
5439 type = attrs[i].type;
5440 opt_len = attrs[i].len;
5441 if (ch == attrs[i].string[0]
5442 && ((type != ix86_opt_str && type != ix86_opt_enum)
5443 ? len == opt_len
5444 : len > opt_len)
5445 && memcmp (p, attrs[i].string, opt_len) == 0)
5446 {
5447 opt = attrs[i].opt;
5448 mask = attrs[i].mask;
5449 opt_string = attrs[i].string;
5450 break;
5451 }
5452 }
5453
5454 /* Process the option. */
5455 if (opt == N_OPTS)
5456 {
5457 error ("attribute(target(\"%s\")) is unknown", orig_p);
5458 ret = false;
5459 }
5460
5461 else if (type == ix86_opt_isa)
5462 {
5463 struct cl_decoded_option decoded;
5464
5465 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
5466 ix86_handle_option (opts, opts_set,
5467 &decoded, input_location);
5468 }
5469
5470 else if (type == ix86_opt_yes || type == ix86_opt_no)
5471 {
5472 if (type == ix86_opt_no)
5473 opt_set_p = !opt_set_p;
5474
5475 if (opt_set_p)
5476 opts->x_target_flags |= mask;
5477 else
5478 opts->x_target_flags &= ~mask;
5479 }
5480
5481 else if (type == ix86_opt_str)
5482 {
5483 if (p_strings[opt])
5484 {
5485 error ("option(\"%s\") was already specified", opt_string);
5486 ret = false;
5487 }
5488 else
5489 p_strings[opt] = xstrdup (p + opt_len);
5490 }
5491
5492 else if (type == ix86_opt_enum)
5493 {
5494 bool arg_ok;
5495 int value;
5496
5497 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
5498 if (arg_ok)
5499 set_option (opts, enum_opts_set, opt, value,
5500 p + opt_len, DK_UNSPECIFIED, input_location,
5501 global_dc);
5502 else
5503 {
5504 error ("attribute(target(\"%s\")) is unknown", orig_p);
5505 ret = false;
5506 }
5507 }
5508
5509 else
5510 gcc_unreachable ();
5511 }
5512
5513 return ret;
5514 }
5515
5516 /* Release allocated strings. */
5517 static void
5518 release_options_strings (char **option_strings)
5519 {
5520 /* Free up memory allocated to hold the strings */
5521 for (unsigned i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
5522 free (option_strings[i]);
5523 }
5524
5525 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
5526
5527 tree
5528 ix86_valid_target_attribute_tree (tree args,
5529 struct gcc_options *opts,
5530 struct gcc_options *opts_set)
5531 {
5532 const char *orig_arch_string = opts->x_ix86_arch_string;
5533 const char *orig_tune_string = opts->x_ix86_tune_string;
5534 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
5535 int orig_tune_defaulted = ix86_tune_defaulted;
5536 int orig_arch_specified = ix86_arch_specified;
5537 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
5538 tree t = NULL_TREE;
5539 struct cl_target_option *def
5540 = TREE_TARGET_OPTION (target_option_default_node);
5541 struct gcc_options enum_opts_set;
5542
5543 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
5544
5545 /* Process each of the options on the chain. */
5546 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
5547 opts_set, &enum_opts_set))
5548 return error_mark_node;
5549
5550 /* If the changed options are different from the default, rerun
5551 ix86_option_override_internal, and then save the options away.
5552 The string options are attribute options, and will be undone
5553 when we copy the save structure. */
5554 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
5555 || opts->x_ix86_isa_flags2 != def->x_ix86_isa_flags2
5556 || opts->x_target_flags != def->x_target_flags
5557 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
5558 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
5559 || enum_opts_set.x_ix86_fpmath)
5560 {
5561 /* If we are using the default tune= or arch=, undo the string assigned,
5562 and use the default. */
5563 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
5564 {
5565 opts->x_ix86_arch_string
5566 = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_ARCH]);
5567
5568 /* If arch= is set, clear all bits in x_ix86_isa_flags,
5569 except for ISA_64BIT, ABI_64, ABI_X32, and CODE16. */
5570 opts->x_ix86_isa_flags &= (OPTION_MASK_ISA_64BIT
5571 | OPTION_MASK_ABI_64
5572 | OPTION_MASK_ABI_X32
5573 | OPTION_MASK_CODE16);
5574 opts->x_ix86_isa_flags2 = 0;
5575 }
5576 else if (!orig_arch_specified)
5577 opts->x_ix86_arch_string = NULL;
5578
5579 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
5580 opts->x_ix86_tune_string
5581 = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]);
5582 else if (orig_tune_defaulted)
5583 opts->x_ix86_tune_string = NULL;
5584
5585 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
5586 if (enum_opts_set.x_ix86_fpmath)
5587 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
5588
5589 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
5590 bool r = ix86_option_override_internal (false, opts, opts_set);
5591 if (!r)
5592 {
5593 release_options_strings (option_strings);
5594 return error_mark_node;
5595 }
5596
5597 /* Add any builtin functions with the new isa if any. */
5598 ix86_add_new_builtins (opts->x_ix86_isa_flags, opts->x_ix86_isa_flags2);
5599
5600 /* Save the current options unless we are validating options for
5601 #pragma. */
5602 t = build_target_option_node (opts);
5603
5604 opts->x_ix86_arch_string = orig_arch_string;
5605 opts->x_ix86_tune_string = orig_tune_string;
5606 opts_set->x_ix86_fpmath = orig_fpmath_set;
5607
5608 release_options_strings (option_strings);
5609 }
5610
5611 return t;
5612 }
5613
5614 /* Hook to validate attribute((target("string"))). */
5615
5616 static bool
5617 ix86_valid_target_attribute_p (tree fndecl,
5618 tree ARG_UNUSED (name),
5619 tree args,
5620 int ARG_UNUSED (flags))
5621 {
5622 struct gcc_options func_options;
5623 tree new_target, new_optimize;
5624 bool ret = true;
5625
5626 /* attribute((target("default"))) does nothing, beyond
5627 affecting multi-versioning. */
5628 if (TREE_VALUE (args)
5629 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
5630 && TREE_CHAIN (args) == NULL_TREE
5631 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
5632 return true;
5633
5634 tree old_optimize = build_optimization_node (&global_options);
5635
5636 /* Get the optimization options of the current function. */
5637 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
5638
5639 if (!func_optimize)
5640 func_optimize = old_optimize;
5641
5642 /* Init func_options. */
5643 memset (&func_options, 0, sizeof (func_options));
5644 init_options_struct (&func_options, NULL);
5645 lang_hooks.init_options_struct (&func_options);
5646
5647 cl_optimization_restore (&func_options,
5648 TREE_OPTIMIZATION (func_optimize));
5649
5650 /* Initialize func_options to the default before its target options can
5651 be set. */
5652 cl_target_option_restore (&func_options,
5653 TREE_TARGET_OPTION (target_option_default_node));
5654
5655 new_target = ix86_valid_target_attribute_tree (args, &func_options,
5656 &global_options_set);
5657
5658 new_optimize = build_optimization_node (&func_options);
5659
5660 if (new_target == error_mark_node)
5661 ret = false;
5662
5663 else if (fndecl && new_target)
5664 {
5665 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
5666
5667 if (old_optimize != new_optimize)
5668 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
5669 }
5670
5671 finalize_options_struct (&func_options);
5672
5673 return ret;
5674 }
5675
5676 \f
5677 /* Hook to determine if one function can safely inline another. */
5678
5679 static bool
5680 ix86_can_inline_p (tree caller, tree callee)
5681 {
5682 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
5683 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
5684 if (!callee_tree)
5685 callee_tree = target_option_default_node;
5686 if (!caller_tree)
5687 caller_tree = target_option_default_node;
5688 if (callee_tree == caller_tree)
5689 return true;
5690
5691 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
5692 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
5693 bool ret = false;
5694
5695 /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
5696 function can inline a SSE2 function but a SSE2 function can't inline
5697 a SSE4 function. */
5698 if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
5699 != callee_opts->x_ix86_isa_flags)
5700 || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2)
5701 != callee_opts->x_ix86_isa_flags2))
5702 ret = false;
5703
5704 /* See if we have the same non-isa options. */
5705 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
5706 ret = false;
5707
5708 /* See if arch, tune, etc. are the same. */
5709 else if (caller_opts->arch != callee_opts->arch)
5710 ret = false;
5711
5712 else if (caller_opts->tune != callee_opts->tune)
5713 ret = false;
5714
5715 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath
5716 /* If the calle doesn't use FP expressions differences in
5717 ix86_fpmath can be ignored. We are called from FEs
5718 for multi-versioning call optimization, so beware of
5719 ipa_fn_summaries not available. */
5720 && (! ipa_fn_summaries
5721 || ipa_fn_summaries->get
5722 (cgraph_node::get (callee))->fp_expressions))
5723 ret = false;
5724
5725 else if (caller_opts->branch_cost != callee_opts->branch_cost)
5726 ret = false;
5727
5728 else
5729 ret = true;
5730
5731 return ret;
5732 }
5733
5734 \f
5735 /* Remember the last target of ix86_set_current_function. */
5736 static GTY(()) tree ix86_previous_fndecl;
5737
5738 /* Set targets globals to the default (or current #pragma GCC target
5739 if active). Invalidate ix86_previous_fndecl cache. */
5740
5741 void
5742 ix86_reset_previous_fndecl (void)
5743 {
5744 tree new_tree = target_option_current_node;
5745 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5746 if (TREE_TARGET_GLOBALS (new_tree))
5747 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5748 else if (new_tree == target_option_default_node)
5749 restore_target_globals (&default_target_globals);
5750 else
5751 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5752 ix86_previous_fndecl = NULL_TREE;
5753 }
5754
5755 /* Set the func_type field from the function FNDECL. */
5756
5757 static void
5758 ix86_set_func_type (tree fndecl)
5759 {
5760 if (cfun->machine->func_type == TYPE_UNKNOWN)
5761 {
5762 if (lookup_attribute ("interrupt",
5763 TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))
5764 {
5765 if (ix86_function_naked (fndecl))
5766 error_at (DECL_SOURCE_LOCATION (fndecl),
5767 "interrupt and naked attributes are not compatible");
5768
5769 int nargs = 0;
5770 for (tree arg = DECL_ARGUMENTS (fndecl);
5771 arg;
5772 arg = TREE_CHAIN (arg))
5773 nargs++;
5774 cfun->machine->no_caller_saved_registers = true;
5775 cfun->machine->func_type
5776 = nargs == 2 ? TYPE_EXCEPTION : TYPE_INTERRUPT;
5777
5778 ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
5779
5780 /* Only dwarf2out.c can handle -WORD(AP) as a pointer argument. */
5781 if (write_symbols != NO_DEBUG && write_symbols != DWARF2_DEBUG)
5782 sorry ("Only DWARF debug format is supported for interrupt "
5783 "service routine.");
5784 }
5785 else
5786 {
5787 cfun->machine->func_type = TYPE_NORMAL;
5788 if (lookup_attribute ("no_caller_saved_registers",
5789 TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))
5790 cfun->machine->no_caller_saved_registers = true;
5791 }
5792 }
5793 }
5794
5795 /* Establish appropriate back-end context for processing the function
5796 FNDECL. The argument might be NULL to indicate processing at top
5797 level, outside of any function scope. */
5798 static void
5799 ix86_set_current_function (tree fndecl)
5800 {
5801 /* Only change the context if the function changes. This hook is called
5802 several times in the course of compiling a function, and we don't want to
5803 slow things down too much or call target_reinit when it isn't safe. */
5804 if (fndecl == ix86_previous_fndecl)
5805 {
5806 /* There may be 2 function bodies for the same function FNDECL,
5807 one is extern inline and one isn't. Call ix86_set_func_type
5808 to set the func_type field. */
5809 if (fndecl != NULL_TREE)
5810 ix86_set_func_type (fndecl);
5811 return;
5812 }
5813
5814 tree old_tree;
5815 if (ix86_previous_fndecl == NULL_TREE)
5816 old_tree = target_option_current_node;
5817 else if (DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl))
5818 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl);
5819 else
5820 old_tree = target_option_default_node;
5821
5822 if (fndecl == NULL_TREE)
5823 {
5824 if (old_tree != target_option_current_node)
5825 ix86_reset_previous_fndecl ();
5826 return;
5827 }
5828
5829 ix86_set_func_type (fndecl);
5830
5831 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
5832 if (new_tree == NULL_TREE)
5833 new_tree = target_option_default_node;
5834
5835 if (old_tree != new_tree)
5836 {
5837 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5838 if (TREE_TARGET_GLOBALS (new_tree))
5839 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5840 else if (new_tree == target_option_default_node)
5841 restore_target_globals (&default_target_globals);
5842 else
5843 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5844 }
5845 ix86_previous_fndecl = fndecl;
5846
5847 static bool prev_no_caller_saved_registers;
5848
5849 /* 64-bit MS and SYSV ABI have different set of call used registers.
5850 Avoid expensive re-initialization of init_regs each time we switch
5851 function context. */
5852 if (TARGET_64BIT
5853 && (call_used_regs[SI_REG]
5854 == (cfun->machine->call_abi == MS_ABI)))
5855 reinit_regs ();
5856 /* Need to re-initialize init_regs if caller-saved registers are
5857 changed. */
5858 else if (prev_no_caller_saved_registers
5859 != cfun->machine->no_caller_saved_registers)
5860 reinit_regs ();
5861
5862 if (cfun->machine->func_type != TYPE_NORMAL
5863 || cfun->machine->no_caller_saved_registers)
5864 {
5865 /* Don't allow MPX, SSE, MMX nor x87 instructions since they
5866 may change processor state. */
5867 const char *isa;
5868 if (TARGET_MPX)
5869 isa = "MPX";
5870 else if (TARGET_SSE)
5871 isa = "SSE";
5872 else if (TARGET_MMX)
5873 isa = "MMX/3Dnow";
5874 else if (TARGET_80387)
5875 isa = "80387";
5876 else
5877 isa = NULL;
5878 if (isa != NULL)
5879 {
5880 if (cfun->machine->func_type != TYPE_NORMAL)
5881 sorry ("%s instructions aren't allowed in %s service routine",
5882 isa, (cfun->machine->func_type == TYPE_EXCEPTION
5883 ? "exception" : "interrupt"));
5884 else
5885 sorry ("%s instructions aren't allowed in function with "
5886 "no_caller_saved_registers attribute", isa);
5887 /* Don't issue the same error twice. */
5888 cfun->machine->func_type = TYPE_NORMAL;
5889 cfun->machine->no_caller_saved_registers = false;
5890 }
5891 }
5892
5893 prev_no_caller_saved_registers
5894 = cfun->machine->no_caller_saved_registers;
5895 }
5896
5897 \f
5898 /* Return true if this goes in large data/bss. */
5899
5900 static bool
5901 ix86_in_large_data_p (tree exp)
5902 {
5903 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5904 return false;
5905
5906 if (exp == NULL_TREE)
5907 return false;
5908
5909 /* Functions are never large data. */
5910 if (TREE_CODE (exp) == FUNCTION_DECL)
5911 return false;
5912
5913 /* Automatic variables are never large data. */
5914 if (VAR_P (exp) && !is_global_var (exp))
5915 return false;
5916
5917 if (VAR_P (exp) && DECL_SECTION_NAME (exp))
5918 {
5919 const char *section = DECL_SECTION_NAME (exp);
5920 if (strcmp (section, ".ldata") == 0
5921 || strcmp (section, ".lbss") == 0)
5922 return true;
5923 return false;
5924 }
5925 else
5926 {
5927 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5928
5929 /* If this is an incomplete type with size 0, then we can't put it
5930 in data because it might be too big when completed. Also,
5931 int_size_in_bytes returns -1 if size can vary or is larger than
5932 an integer in which case also it is safer to assume that it goes in
5933 large data. */
5934 if (size <= 0 || size > ix86_section_threshold)
5935 return true;
5936 }
5937
5938 return false;
5939 }
5940
5941 /* i386-specific section flag to mark large sections. */
5942 #define SECTION_LARGE SECTION_MACH_DEP
5943
5944 /* Switch to the appropriate section for output of DECL.
5945 DECL is either a `VAR_DECL' node or a constant of some sort.
5946 RELOC indicates whether forming the initial value of DECL requires
5947 link-time relocations. */
5948
5949 ATTRIBUTE_UNUSED static section *
5950 x86_64_elf_select_section (tree decl, int reloc,
5951 unsigned HOST_WIDE_INT align)
5952 {
5953 if (ix86_in_large_data_p (decl))
5954 {
5955 const char *sname = NULL;
5956 unsigned int flags = SECTION_WRITE | SECTION_LARGE;
5957 switch (categorize_decl_for_section (decl, reloc))
5958 {
5959 case SECCAT_DATA:
5960 sname = ".ldata";
5961 break;
5962 case SECCAT_DATA_REL:
5963 sname = ".ldata.rel";
5964 break;
5965 case SECCAT_DATA_REL_LOCAL:
5966 sname = ".ldata.rel.local";
5967 break;
5968 case SECCAT_DATA_REL_RO:
5969 sname = ".ldata.rel.ro";
5970 break;
5971 case SECCAT_DATA_REL_RO_LOCAL:
5972 sname = ".ldata.rel.ro.local";
5973 break;
5974 case SECCAT_BSS:
5975 sname = ".lbss";
5976 flags |= SECTION_BSS;
5977 break;
5978 case SECCAT_RODATA:
5979 case SECCAT_RODATA_MERGE_STR:
5980 case SECCAT_RODATA_MERGE_STR_INIT:
5981 case SECCAT_RODATA_MERGE_CONST:
5982 sname = ".lrodata";
5983 flags &= ~SECTION_WRITE;
5984 break;
5985 case SECCAT_SRODATA:
5986 case SECCAT_SDATA:
5987 case SECCAT_SBSS:
5988 gcc_unreachable ();
5989 case SECCAT_TEXT:
5990 case SECCAT_TDATA:
5991 case SECCAT_TBSS:
5992 /* We don't split these for medium model. Place them into
5993 default sections and hope for best. */
5994 break;
5995 }
5996 if (sname)
5997 {
5998 /* We might get called with string constants, but get_named_section
5999 doesn't like them as they are not DECLs. Also, we need to set
6000 flags in that case. */
6001 if (!DECL_P (decl))
6002 return get_section (sname, flags, NULL);
6003 return get_named_section (decl, sname, reloc);
6004 }
6005 }
6006 return default_elf_select_section (decl, reloc, align);
6007 }
6008
6009 /* Select a set of attributes for section NAME based on the properties
6010 of DECL and whether or not RELOC indicates that DECL's initializer
6011 might contain runtime relocations. */
6012
6013 static unsigned int ATTRIBUTE_UNUSED
6014 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
6015 {
6016 unsigned int flags = default_section_type_flags (decl, name, reloc);
6017
6018 if (ix86_in_large_data_p (decl))
6019 flags |= SECTION_LARGE;
6020
6021 if (decl == NULL_TREE
6022 && (strcmp (name, ".ldata.rel.ro") == 0
6023 || strcmp (name, ".ldata.rel.ro.local") == 0))
6024 flags |= SECTION_RELRO;
6025
6026 if (strcmp (name, ".lbss") == 0
6027 || strncmp (name, ".lbss.", 5) == 0
6028 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
6029 flags |= SECTION_BSS;
6030
6031 return flags;
6032 }
6033
6034 /* Build up a unique section name, expressed as a
6035 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
6036 RELOC indicates whether the initial value of EXP requires
6037 link-time relocations. */
6038
6039 static void ATTRIBUTE_UNUSED
6040 x86_64_elf_unique_section (tree decl, int reloc)
6041 {
6042 if (ix86_in_large_data_p (decl))
6043 {
6044 const char *prefix = NULL;
6045 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
6046 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
6047
6048 switch (categorize_decl_for_section (decl, reloc))
6049 {
6050 case SECCAT_DATA:
6051 case SECCAT_DATA_REL:
6052 case SECCAT_DATA_REL_LOCAL:
6053 case SECCAT_DATA_REL_RO:
6054 case SECCAT_DATA_REL_RO_LOCAL:
6055 prefix = one_only ? ".ld" : ".ldata";
6056 break;
6057 case SECCAT_BSS:
6058 prefix = one_only ? ".lb" : ".lbss";
6059 break;
6060 case SECCAT_RODATA:
6061 case SECCAT_RODATA_MERGE_STR:
6062 case SECCAT_RODATA_MERGE_STR_INIT:
6063 case SECCAT_RODATA_MERGE_CONST:
6064 prefix = one_only ? ".lr" : ".lrodata";
6065 break;
6066 case SECCAT_SRODATA:
6067 case SECCAT_SDATA:
6068 case SECCAT_SBSS:
6069 gcc_unreachable ();
6070 case SECCAT_TEXT:
6071 case SECCAT_TDATA:
6072 case SECCAT_TBSS:
6073 /* We don't split these for medium model. Place them into
6074 default sections and hope for best. */
6075 break;
6076 }
6077 if (prefix)
6078 {
6079 const char *name, *linkonce;
6080 char *string;
6081
6082 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
6083 name = targetm.strip_name_encoding (name);
6084
6085 /* If we're using one_only, then there needs to be a .gnu.linkonce
6086 prefix to the section name. */
6087 linkonce = one_only ? ".gnu.linkonce" : "";
6088
6089 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
6090
6091 set_decl_section_name (decl, string);
6092 return;
6093 }
6094 }
6095 default_unique_section (decl, reloc);
6096 }
6097
6098 #ifdef COMMON_ASM_OP
6099
6100 #ifndef LARGECOMM_SECTION_ASM_OP
6101 #define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t"
6102 #endif
6103
6104 /* This says how to output assembler code to declare an
6105 uninitialized external linkage data object.
6106
6107 For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for
6108 large objects. */
6109 void
6110 x86_elf_aligned_decl_common (FILE *file, tree decl,
6111 const char *name, unsigned HOST_WIDE_INT size,
6112 int align)
6113 {
6114 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
6115 && size > (unsigned int)ix86_section_threshold)
6116 {
6117 switch_to_section (get_named_section (decl, ".lbss", 0));
6118 fputs (LARGECOMM_SECTION_ASM_OP, file);
6119 }
6120 else
6121 fputs (COMMON_ASM_OP, file);
6122 assemble_name (file, name);
6123 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
6124 size, align / BITS_PER_UNIT);
6125 }
6126 #endif
6127
6128 /* Utility function for targets to use in implementing
6129 ASM_OUTPUT_ALIGNED_BSS. */
6130
6131 void
6132 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
6133 unsigned HOST_WIDE_INT size, int align)
6134 {
6135 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
6136 && size > (unsigned int)ix86_section_threshold)
6137 switch_to_section (get_named_section (decl, ".lbss", 0));
6138 else
6139 switch_to_section (bss_section);
6140 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
6141 #ifdef ASM_DECLARE_OBJECT_NAME
6142 last_assemble_variable_decl = decl;
6143 ASM_DECLARE_OBJECT_NAME (file, name, decl);
6144 #else
6145 /* Standard thing is just output label for the object. */
6146 ASM_OUTPUT_LABEL (file, name);
6147 #endif /* ASM_DECLARE_OBJECT_NAME */
6148 ASM_OUTPUT_SKIP (file, size ? size : 1);
6149 }
6150 \f
6151 /* Decide whether we must probe the stack before any space allocation
6152 on this target. It's essentially TARGET_STACK_PROBE except when
6153 -fstack-check causes the stack to be already probed differently. */
6154
6155 bool
6156 ix86_target_stack_probe (void)
6157 {
6158 /* Do not probe the stack twice if static stack checking is enabled. */
6159 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
6160 return false;
6161
6162 return TARGET_STACK_PROBE;
6163 }
6164 \f
6165 /* Decide whether we can make a sibling call to a function. DECL is the
6166 declaration of the function being targeted by the call and EXP is the
6167 CALL_EXPR representing the call. */
6168
6169 static bool
6170 ix86_function_ok_for_sibcall (tree decl, tree exp)
6171 {
6172 tree type, decl_or_type;
6173 rtx a, b;
6174 bool bind_global = decl && !targetm.binds_local_p (decl);
6175
6176 if (ix86_function_naked (current_function_decl))
6177 return false;
6178
6179 /* Sibling call isn't OK if there are no caller-saved registers
6180 since all registers must be preserved before return. */
6181 if (cfun->machine->no_caller_saved_registers)
6182 return false;
6183
6184 /* If we are generating position-independent code, we cannot sibcall
6185 optimize direct calls to global functions, as the PLT requires
6186 %ebx be live. (Darwin does not have a PLT.) */
6187 if (!TARGET_MACHO
6188 && !TARGET_64BIT
6189 && flag_pic
6190 && flag_plt
6191 && bind_global)
6192 return false;
6193
6194 /* If we need to align the outgoing stack, then sibcalling would
6195 unalign the stack, which may break the called function. */
6196 if (ix86_minimum_incoming_stack_boundary (true)
6197 < PREFERRED_STACK_BOUNDARY)
6198 return false;
6199
6200 if (decl)
6201 {
6202 decl_or_type = decl;
6203 type = TREE_TYPE (decl);
6204 }
6205 else
6206 {
6207 /* We're looking at the CALL_EXPR, we need the type of the function. */
6208 type = CALL_EXPR_FN (exp); /* pointer expression */
6209 type = TREE_TYPE (type); /* pointer type */
6210 type = TREE_TYPE (type); /* function type */
6211 decl_or_type = type;
6212 }
6213
6214 /* Check that the return value locations are the same. Like
6215 if we are returning floats on the 80387 register stack, we cannot
6216 make a sibcall from a function that doesn't return a float to a
6217 function that does or, conversely, from a function that does return
6218 a float to a function that doesn't; the necessary stack adjustment
6219 would not be executed. This is also the place we notice
6220 differences in the return value ABI. Note that it is ok for one
6221 of the functions to have void return type as long as the return
6222 value of the other is passed in a register. */
6223 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
6224 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6225 cfun->decl, false);
6226 if (STACK_REG_P (a) || STACK_REG_P (b))
6227 {
6228 if (!rtx_equal_p (a, b))
6229 return false;
6230 }
6231 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6232 ;
6233 else if (!rtx_equal_p (a, b))
6234 return false;
6235
6236 if (TARGET_64BIT)
6237 {
6238 /* The SYSV ABI has more call-clobbered registers;
6239 disallow sibcalls from MS to SYSV. */
6240 if (cfun->machine->call_abi == MS_ABI
6241 && ix86_function_type_abi (type) == SYSV_ABI)
6242 return false;
6243 }
6244 else
6245 {
6246 /* If this call is indirect, we'll need to be able to use a
6247 call-clobbered register for the address of the target function.
6248 Make sure that all such registers are not used for passing
6249 parameters. Note that DLLIMPORT functions and call to global
6250 function via GOT slot are indirect. */
6251 if (!decl
6252 || (bind_global && flag_pic && !flag_plt)
6253 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
6254 {
6255 /* Check if regparm >= 3 since arg_reg_available is set to
6256 false if regparm == 0. If regparm is 1 or 2, there is
6257 always a call-clobbered register available.
6258
6259 ??? The symbol indirect call doesn't need a call-clobbered
6260 register. But we don't know if this is a symbol indirect
6261 call or not here. */
6262 if (ix86_function_regparm (type, NULL) >= 3
6263 && !cfun->machine->arg_reg_available)
6264 return false;
6265 }
6266 }
6267
6268 /* Otherwise okay. That also includes certain types of indirect calls. */
6269 return true;
6270 }
6271
6272 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
6273 and "sseregparm" calling convention attributes;
6274 arguments as in struct attribute_spec.handler. */
6275
6276 static tree
6277 ix86_handle_cconv_attribute (tree *node, tree name, tree args, int,
6278 bool *no_add_attrs)
6279 {
6280 if (TREE_CODE (*node) != FUNCTION_TYPE
6281 && TREE_CODE (*node) != METHOD_TYPE
6282 && TREE_CODE (*node) != FIELD_DECL
6283 && TREE_CODE (*node) != TYPE_DECL)
6284 {
6285 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6286 name);
6287 *no_add_attrs = true;
6288 return NULL_TREE;
6289 }
6290
6291 /* Can combine regparm with all attributes but fastcall, and thiscall. */
6292 if (is_attribute_p ("regparm", name))
6293 {
6294 tree cst;
6295
6296 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6297 {
6298 error ("fastcall and regparm attributes are not compatible");
6299 }
6300
6301 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6302 {
6303 error ("regparam and thiscall attributes are not compatible");
6304 }
6305
6306 cst = TREE_VALUE (args);
6307 if (TREE_CODE (cst) != INTEGER_CST)
6308 {
6309 warning (OPT_Wattributes,
6310 "%qE attribute requires an integer constant argument",
6311 name);
6312 *no_add_attrs = true;
6313 }
6314 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
6315 {
6316 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
6317 name, REGPARM_MAX);
6318 *no_add_attrs = true;
6319 }
6320
6321 return NULL_TREE;
6322 }
6323
6324 if (TARGET_64BIT)
6325 {
6326 /* Do not warn when emulating the MS ABI. */
6327 if ((TREE_CODE (*node) != FUNCTION_TYPE
6328 && TREE_CODE (*node) != METHOD_TYPE)
6329 || ix86_function_type_abi (*node) != MS_ABI)
6330 warning (OPT_Wattributes, "%qE attribute ignored",
6331 name);
6332 *no_add_attrs = true;
6333 return NULL_TREE;
6334 }
6335
6336 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
6337 if (is_attribute_p ("fastcall", name))
6338 {
6339 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
6340 {
6341 error ("fastcall and cdecl attributes are not compatible");
6342 }
6343 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
6344 {
6345 error ("fastcall and stdcall attributes are not compatible");
6346 }
6347 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
6348 {
6349 error ("fastcall and regparm attributes are not compatible");
6350 }
6351 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6352 {
6353 error ("fastcall and thiscall attributes are not compatible");
6354 }
6355 }
6356
6357 /* Can combine stdcall with fastcall (redundant), regparm and
6358 sseregparm. */
6359 else if (is_attribute_p ("stdcall", name))
6360 {
6361 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
6362 {
6363 error ("stdcall and cdecl attributes are not compatible");
6364 }
6365 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6366 {
6367 error ("stdcall and fastcall attributes are not compatible");
6368 }
6369 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6370 {
6371 error ("stdcall and thiscall attributes are not compatible");
6372 }
6373 }
6374
6375 /* Can combine cdecl with regparm and sseregparm. */
6376 else if (is_attribute_p ("cdecl", name))
6377 {
6378 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
6379 {
6380 error ("stdcall and cdecl attributes are not compatible");
6381 }
6382 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6383 {
6384 error ("fastcall and cdecl attributes are not compatible");
6385 }
6386 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6387 {
6388 error ("cdecl and thiscall attributes are not compatible");
6389 }
6390 }
6391 else if (is_attribute_p ("thiscall", name))
6392 {
6393 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
6394 warning (OPT_Wattributes, "%qE attribute is used for non-class method",
6395 name);
6396 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
6397 {
6398 error ("stdcall and thiscall attributes are not compatible");
6399 }
6400 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6401 {
6402 error ("fastcall and thiscall attributes are not compatible");
6403 }
6404 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
6405 {
6406 error ("cdecl and thiscall attributes are not compatible");
6407 }
6408 }
6409
6410 /* Can combine sseregparm with all attributes. */
6411
6412 return NULL_TREE;
6413 }
6414
6415 /* The transactional memory builtins are implicitly regparm or fastcall
6416 depending on the ABI. Override the generic do-nothing attribute that
6417 these builtins were declared with, and replace it with one of the two
6418 attributes that we expect elsewhere. */
6419
6420 static tree
6421 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
6422 int flags, bool *no_add_attrs)
6423 {
6424 tree alt;
6425
6426 /* In no case do we want to add the placeholder attribute. */
6427 *no_add_attrs = true;
6428
6429 /* The 64-bit ABI is unchanged for transactional memory. */
6430 if (TARGET_64BIT)
6431 return NULL_TREE;
6432
6433 /* ??? Is there a better way to validate 32-bit windows? We have
6434 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
6435 if (CHECK_STACK_LIMIT > 0)
6436 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
6437 else
6438 {
6439 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
6440 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
6441 }
6442 decl_attributes (node, alt, flags);
6443
6444 return NULL_TREE;
6445 }
6446
6447 /* This function determines from TYPE the calling-convention. */
6448
6449 unsigned int
6450 ix86_get_callcvt (const_tree type)
6451 {
6452 unsigned int ret = 0;
6453 bool is_stdarg;
6454 tree attrs;
6455
6456 if (TARGET_64BIT)
6457 return IX86_CALLCVT_CDECL;
6458
6459 attrs = TYPE_ATTRIBUTES (type);
6460 if (attrs != NULL_TREE)
6461 {
6462 if (lookup_attribute ("cdecl", attrs))
6463 ret |= IX86_CALLCVT_CDECL;
6464 else if (lookup_attribute ("stdcall", attrs))
6465 ret |= IX86_CALLCVT_STDCALL;
6466 else if (lookup_attribute ("fastcall", attrs))
6467 ret |= IX86_CALLCVT_FASTCALL;
6468 else if (lookup_attribute ("thiscall", attrs))
6469 ret |= IX86_CALLCVT_THISCALL;
6470
6471 /* Regparam isn't allowed for thiscall and fastcall. */
6472 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
6473 {
6474 if (lookup_attribute ("regparm", attrs))
6475 ret |= IX86_CALLCVT_REGPARM;
6476 if (lookup_attribute ("sseregparm", attrs))
6477 ret |= IX86_CALLCVT_SSEREGPARM;
6478 }
6479
6480 if (IX86_BASE_CALLCVT(ret) != 0)
6481 return ret;
6482 }
6483
6484 is_stdarg = stdarg_p (type);
6485 if (TARGET_RTD && !is_stdarg)
6486 return IX86_CALLCVT_STDCALL | ret;
6487
6488 if (ret != 0
6489 || is_stdarg
6490 || TREE_CODE (type) != METHOD_TYPE
6491 || ix86_function_type_abi (type) != MS_ABI)
6492 return IX86_CALLCVT_CDECL | ret;
6493
6494 return IX86_CALLCVT_THISCALL;
6495 }
6496
6497 /* Return 0 if the attributes for two types are incompatible, 1 if they
6498 are compatible, and 2 if they are nearly compatible (which causes a
6499 warning to be generated). */
6500
6501 static int
6502 ix86_comp_type_attributes (const_tree type1, const_tree type2)
6503 {
6504 unsigned int ccvt1, ccvt2;
6505
6506 if (TREE_CODE (type1) != FUNCTION_TYPE
6507 && TREE_CODE (type1) != METHOD_TYPE)
6508 return 1;
6509
6510 ccvt1 = ix86_get_callcvt (type1);
6511 ccvt2 = ix86_get_callcvt (type2);
6512 if (ccvt1 != ccvt2)
6513 return 0;
6514 if (ix86_function_regparm (type1, NULL)
6515 != ix86_function_regparm (type2, NULL))
6516 return 0;
6517
6518 return 1;
6519 }
6520 \f
6521 /* Return the regparm value for a function with the indicated TYPE and DECL.
6522 DECL may be NULL when calling function indirectly
6523 or considering a libcall. */
6524
6525 static int
6526 ix86_function_regparm (const_tree type, const_tree decl)
6527 {
6528 tree attr;
6529 int regparm;
6530 unsigned int ccvt;
6531
6532 if (TARGET_64BIT)
6533 return (ix86_function_type_abi (type) == SYSV_ABI
6534 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
6535 ccvt = ix86_get_callcvt (type);
6536 regparm = ix86_regparm;
6537
6538 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
6539 {
6540 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
6541 if (attr)
6542 {
6543 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
6544 return regparm;
6545 }
6546 }
6547 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6548 return 2;
6549 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6550 return 1;
6551
6552 /* Use register calling convention for local functions when possible. */
6553 if (decl
6554 && TREE_CODE (decl) == FUNCTION_DECL)
6555 {
6556 cgraph_node *target = cgraph_node::get (decl);
6557 if (target)
6558 target = target->function_symbol ();
6559
6560 /* Caller and callee must agree on the calling convention, so
6561 checking here just optimize means that with
6562 __attribute__((optimize (...))) caller could use regparm convention
6563 and callee not, or vice versa. Instead look at whether the callee
6564 is optimized or not. */
6565 if (target && opt_for_fn (target->decl, optimize)
6566 && !(profile_flag && !flag_fentry))
6567 {
6568 cgraph_local_info *i = &target->local;
6569 if (i && i->local && i->can_change_signature)
6570 {
6571 int local_regparm, globals = 0, regno;
6572
6573 /* Make sure no regparm register is taken by a
6574 fixed register variable. */
6575 for (local_regparm = 0; local_regparm < REGPARM_MAX;
6576 local_regparm++)
6577 if (fixed_regs[local_regparm])
6578 break;
6579
6580 /* We don't want to use regparm(3) for nested functions as
6581 these use a static chain pointer in the third argument. */
6582 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
6583 local_regparm = 2;
6584
6585 /* Save a register for the split stack. */
6586 if (flag_split_stack)
6587 {
6588 if (local_regparm == 3)
6589 local_regparm = 2;
6590 else if (local_regparm == 2
6591 && DECL_STATIC_CHAIN (target->decl))
6592 local_regparm = 1;
6593 }
6594
6595 /* Each fixed register usage increases register pressure,
6596 so less registers should be used for argument passing.
6597 This functionality can be overriden by an explicit
6598 regparm value. */
6599 for (regno = AX_REG; regno <= DI_REG; regno++)
6600 if (fixed_regs[regno])
6601 globals++;
6602
6603 local_regparm
6604 = globals < local_regparm ? local_regparm - globals : 0;
6605
6606 if (local_regparm > regparm)
6607 regparm = local_regparm;
6608 }
6609 }
6610 }
6611
6612 return regparm;
6613 }
6614
6615 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
6616 DFmode (2) arguments in SSE registers for a function with the
6617 indicated TYPE and DECL. DECL may be NULL when calling function
6618 indirectly or considering a libcall. Return -1 if any FP parameter
6619 should be rejected by error. This is used in siutation we imply SSE
6620 calling convetion but the function is called from another function with
6621 SSE disabled. Otherwise return 0. */
6622
6623 static int
6624 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
6625 {
6626 gcc_assert (!TARGET_64BIT);
6627
6628 /* Use SSE registers to pass SFmode and DFmode arguments if requested
6629 by the sseregparm attribute. */
6630 if (TARGET_SSEREGPARM
6631 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
6632 {
6633 if (!TARGET_SSE)
6634 {
6635 if (warn)
6636 {
6637 if (decl)
6638 error ("calling %qD with attribute sseregparm without "
6639 "SSE/SSE2 enabled", decl);
6640 else
6641 error ("calling %qT with attribute sseregparm without "
6642 "SSE/SSE2 enabled", type);
6643 }
6644 return 0;
6645 }
6646
6647 return 2;
6648 }
6649
6650 if (!decl)
6651 return 0;
6652
6653 cgraph_node *target = cgraph_node::get (decl);
6654 if (target)
6655 target = target->function_symbol ();
6656
6657 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
6658 (and DFmode for SSE2) arguments in SSE registers. */
6659 if (target
6660 /* TARGET_SSE_MATH */
6661 && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
6662 && opt_for_fn (target->decl, optimize)
6663 && !(profile_flag && !flag_fentry))
6664 {
6665 cgraph_local_info *i = &target->local;
6666 if (i && i->local && i->can_change_signature)
6667 {
6668 /* Refuse to produce wrong code when local function with SSE enabled
6669 is called from SSE disabled function.
6670 FIXME: We need a way to detect these cases cross-ltrans partition
6671 and avoid using SSE calling conventions on local functions called
6672 from function with SSE disabled. For now at least delay the
6673 warning until we know we are going to produce wrong code.
6674 See PR66047 */
6675 if (!TARGET_SSE && warn)
6676 return -1;
6677 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
6678 ->x_ix86_isa_flags) ? 2 : 1;
6679 }
6680 }
6681
6682 return 0;
6683 }
6684
6685 /* Return true if EAX is live at the start of the function. Used by
6686 ix86_expand_prologue to determine if we need special help before
6687 calling allocate_stack_worker. */
6688
6689 static bool
6690 ix86_eax_live_at_start_p (void)
6691 {
6692 /* Cheat. Don't bother working forward from ix86_function_regparm
6693 to the function type to whether an actual argument is located in
6694 eax. Instead just look at cfg info, which is still close enough
6695 to correct at this point. This gives false positives for broken
6696 functions that might use uninitialized data that happens to be
6697 allocated in eax, but who cares? */
6698 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
6699 }
6700
6701 static bool
6702 ix86_keep_aggregate_return_pointer (tree fntype)
6703 {
6704 tree attr;
6705
6706 if (!TARGET_64BIT)
6707 {
6708 attr = lookup_attribute ("callee_pop_aggregate_return",
6709 TYPE_ATTRIBUTES (fntype));
6710 if (attr)
6711 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
6712
6713 /* For 32-bit MS-ABI the default is to keep aggregate
6714 return pointer. */
6715 if (ix86_function_type_abi (fntype) == MS_ABI)
6716 return true;
6717 }
6718 return KEEP_AGGREGATE_RETURN_POINTER != 0;
6719 }
6720
6721 /* Value is the number of bytes of arguments automatically
6722 popped when returning from a subroutine call.
6723 FUNDECL is the declaration node of the function (as a tree),
6724 FUNTYPE is the data type of the function (as a tree),
6725 or for a library call it is an identifier node for the subroutine name.
6726 SIZE is the number of bytes of arguments passed on the stack.
6727
6728 On the 80386, the RTD insn may be used to pop them if the number
6729 of args is fixed, but if the number is variable then the caller
6730 must pop them all. RTD can't be used for library calls now
6731 because the library is compiled with the Unix compiler.
6732 Use of RTD is a selectable option, since it is incompatible with
6733 standard Unix calling sequences. If the option is not selected,
6734 the caller must always pop the args.
6735
6736 The attribute stdcall is equivalent to RTD on a per module basis. */
6737
6738 static int
6739 ix86_return_pops_args (tree fundecl, tree funtype, int size)
6740 {
6741 unsigned int ccvt;
6742
6743 /* None of the 64-bit ABIs pop arguments. */
6744 if (TARGET_64BIT)
6745 return 0;
6746
6747 ccvt = ix86_get_callcvt (funtype);
6748
6749 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
6750 | IX86_CALLCVT_THISCALL)) != 0
6751 && ! stdarg_p (funtype))
6752 return size;
6753
6754 /* Lose any fake structure return argument if it is passed on the stack. */
6755 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
6756 && !ix86_keep_aggregate_return_pointer (funtype))
6757 {
6758 int nregs = ix86_function_regparm (funtype, fundecl);
6759 if (nregs == 0)
6760 return GET_MODE_SIZE (Pmode);
6761 }
6762
6763 return 0;
6764 }
6765
6766 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
6767
6768 static bool
6769 ix86_legitimate_combined_insn (rtx_insn *insn)
6770 {
6771 int i;
6772
6773 /* Check operand constraints in case hard registers were propagated
6774 into insn pattern. This check prevents combine pass from
6775 generating insn patterns with invalid hard register operands.
6776 These invalid insns can eventually confuse reload to error out
6777 with a spill failure. See also PRs 46829 and 46843. */
6778
6779 gcc_assert (INSN_CODE (insn) >= 0);
6780
6781 extract_insn (insn);
6782 preprocess_constraints (insn);
6783
6784 int n_operands = recog_data.n_operands;
6785 int n_alternatives = recog_data.n_alternatives;
6786 for (i = 0; i < n_operands; i++)
6787 {
6788 rtx op = recog_data.operand[i];
6789 machine_mode mode = GET_MODE (op);
6790 const operand_alternative *op_alt;
6791 int offset = 0;
6792 bool win;
6793 int j;
6794
6795 /* A unary operator may be accepted by the predicate, but it
6796 is irrelevant for matching constraints. */
6797 if (UNARY_P (op))
6798 op = XEXP (op, 0);
6799
6800 if (SUBREG_P (op))
6801 {
6802 if (REG_P (SUBREG_REG (op))
6803 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
6804 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
6805 GET_MODE (SUBREG_REG (op)),
6806 SUBREG_BYTE (op),
6807 GET_MODE (op));
6808 op = SUBREG_REG (op);
6809 }
6810
6811 if (!(REG_P (op) && HARD_REGISTER_P (op)))
6812 continue;
6813
6814 op_alt = recog_op_alt;
6815
6816 /* Operand has no constraints, anything is OK. */
6817 win = !n_alternatives;
6818
6819 alternative_mask preferred = get_preferred_alternatives (insn);
6820 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
6821 {
6822 if (!TEST_BIT (preferred, j))
6823 continue;
6824 if (op_alt[i].anything_ok
6825 || (op_alt[i].matches != -1
6826 && operands_match_p
6827 (recog_data.operand[i],
6828 recog_data.operand[op_alt[i].matches]))
6829 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
6830 {
6831 win = true;
6832 break;
6833 }
6834 }
6835
6836 if (!win)
6837 return false;
6838 }
6839
6840 return true;
6841 }
6842 \f
6843 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
6844
6845 static unsigned HOST_WIDE_INT
6846 ix86_asan_shadow_offset (void)
6847 {
6848 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
6849 : HOST_WIDE_INT_C (0x7fff8000))
6850 : (HOST_WIDE_INT_1 << 29);
6851 }
6852 \f
6853 /* Argument support functions. */
6854
6855 /* Return true when register may be used to pass function parameters. */
6856 bool
6857 ix86_function_arg_regno_p (int regno)
6858 {
6859 int i;
6860 enum calling_abi call_abi;
6861 const int *parm_regs;
6862
6863 if (TARGET_MPX && BND_REGNO_P (regno))
6864 return true;
6865
6866 if (!TARGET_64BIT)
6867 {
6868 if (TARGET_MACHO)
6869 return (regno < REGPARM_MAX
6870 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
6871 else
6872 return (regno < REGPARM_MAX
6873 || (TARGET_MMX && MMX_REGNO_P (regno)
6874 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
6875 || (TARGET_SSE && SSE_REGNO_P (regno)
6876 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
6877 }
6878
6879 if (TARGET_SSE && SSE_REGNO_P (regno)
6880 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
6881 return true;
6882
6883 /* TODO: The function should depend on current function ABI but
6884 builtins.c would need updating then. Therefore we use the
6885 default ABI. */
6886 call_abi = ix86_cfun_abi ();
6887
6888 /* RAX is used as hidden argument to va_arg functions. */
6889 if (call_abi == SYSV_ABI && regno == AX_REG)
6890 return true;
6891
6892 if (call_abi == MS_ABI)
6893 parm_regs = x86_64_ms_abi_int_parameter_registers;
6894 else
6895 parm_regs = x86_64_int_parameter_registers;
6896
6897 for (i = 0; i < (call_abi == MS_ABI
6898 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
6899 if (regno == parm_regs[i])
6900 return true;
6901 return false;
6902 }
6903
6904 /* Return if we do not know how to pass TYPE solely in registers. */
6905
6906 static bool
6907 ix86_must_pass_in_stack (machine_mode mode, const_tree type)
6908 {
6909 if (must_pass_in_stack_var_size_or_pad (mode, type))
6910 return true;
6911
6912 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
6913 The layout_type routine is crafty and tries to trick us into passing
6914 currently unsupported vector types on the stack by using TImode. */
6915 return (!TARGET_64BIT && mode == TImode
6916 && type && TREE_CODE (type) != VECTOR_TYPE);
6917 }
6918
6919 /* It returns the size, in bytes, of the area reserved for arguments passed
6920 in registers for the function represented by fndecl dependent to the used
6921 abi format. */
6922 int
6923 ix86_reg_parm_stack_space (const_tree fndecl)
6924 {
6925 enum calling_abi call_abi = SYSV_ABI;
6926 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
6927 call_abi = ix86_function_abi (fndecl);
6928 else
6929 call_abi = ix86_function_type_abi (fndecl);
6930 if (TARGET_64BIT && call_abi == MS_ABI)
6931 return 32;
6932 return 0;
6933 }
6934
6935 /* We add this as a workaround in order to use libc_has_function
6936 hook in i386.md. */
6937 bool
6938 ix86_libc_has_function (enum function_class fn_class)
6939 {
6940 return targetm.libc_has_function (fn_class);
6941 }
6942
6943 /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
6944 specifying the call abi used. */
6945 enum calling_abi
6946 ix86_function_type_abi (const_tree fntype)
6947 {
6948 enum calling_abi abi = ix86_abi;
6949
6950 if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE)
6951 return abi;
6952
6953 if (abi == SYSV_ABI
6954 && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6955 {
6956 static int warned;
6957 if (TARGET_X32 && !warned)
6958 {
6959 error ("X32 does not support ms_abi attribute");
6960 warned = 1;
6961 }
6962
6963 abi = MS_ABI;
6964 }
6965 else if (abi == MS_ABI
6966 && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6967 abi = SYSV_ABI;
6968
6969 return abi;
6970 }
6971
6972 static enum calling_abi
6973 ix86_function_abi (const_tree fndecl)
6974 {
6975 return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi;
6976 }
6977
6978 /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
6979 specifying the call abi used. */
6980 enum calling_abi
6981 ix86_cfun_abi (void)
6982 {
6983 return cfun ? cfun->machine->call_abi : ix86_abi;
6984 }
6985
6986 static bool
6987 ix86_function_ms_hook_prologue (const_tree fn)
6988 {
6989 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6990 {
6991 if (decl_function_context (fn) != NULL_TREE)
6992 error_at (DECL_SOURCE_LOCATION (fn),
6993 "ms_hook_prologue is not compatible with nested function");
6994 else
6995 return true;
6996 }
6997 return false;
6998 }
6999
7000 static bool
7001 ix86_function_naked (const_tree fn)
7002 {
7003 if (fn && lookup_attribute ("naked", DECL_ATTRIBUTES (fn)))
7004 return true;
7005
7006 return false;
7007 }
7008
7009 /* Write the extra assembler code needed to declare a function properly. */
7010
7011 void
7012 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
7013 tree decl)
7014 {
7015 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
7016
7017 if (is_ms_hook)
7018 {
7019 int i, filler_count = (TARGET_64BIT ? 32 : 16);
7020 unsigned int filler_cc = 0xcccccccc;
7021
7022 for (i = 0; i < filler_count; i += 4)
7023 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
7024 }
7025
7026 #ifdef SUBTARGET_ASM_UNWIND_INIT
7027 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
7028 #endif
7029
7030 ASM_OUTPUT_LABEL (asm_out_file, fname);
7031
7032 /* Output magic byte marker, if hot-patch attribute is set. */
7033 if (is_ms_hook)
7034 {
7035 if (TARGET_64BIT)
7036 {
7037 /* leaq [%rsp + 0], %rsp */
7038 fputs (ASM_BYTE "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n",
7039 asm_out_file);
7040 }
7041 else
7042 {
7043 /* movl.s %edi, %edi
7044 push %ebp
7045 movl.s %esp, %ebp */
7046 fputs (ASM_BYTE "0x8b, 0xff, 0x55, 0x8b, 0xec\n", asm_out_file);
7047 }
7048 }
7049 }
7050
7051 /* Implementation of call abi switching target hook. Specific to FNDECL
7052 the specific call register sets are set. See also
7053 ix86_conditional_register_usage for more details. */
7054 void
7055 ix86_call_abi_override (const_tree fndecl)
7056 {
7057 cfun->machine->call_abi = ix86_function_abi (fndecl);
7058 }
7059
7060 /* Return 1 if pseudo register should be created and used to hold
7061 GOT address for PIC code. */
7062 bool
7063 ix86_use_pseudo_pic_reg (void)
7064 {
7065 if ((TARGET_64BIT
7066 && (ix86_cmodel == CM_SMALL_PIC
7067 || TARGET_PECOFF))
7068 || !flag_pic)
7069 return false;
7070 return true;
7071 }
7072
7073 /* Initialize large model PIC register. */
7074
7075 static void
7076 ix86_init_large_pic_reg (unsigned int tmp_regno)
7077 {
7078 rtx_code_label *label;
7079 rtx tmp_reg;
7080
7081 gcc_assert (Pmode == DImode);
7082 label = gen_label_rtx ();
7083 emit_label (label);
7084 LABEL_PRESERVE_P (label) = 1;
7085 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
7086 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
7087 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
7088 label));
7089 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
7090 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
7091 pic_offset_table_rtx, tmp_reg));
7092 const char *name = LABEL_NAME (label);
7093 PUT_CODE (label, NOTE);
7094 NOTE_KIND (label) = NOTE_INSN_DELETED_LABEL;
7095 NOTE_DELETED_LABEL_NAME (label) = name;
7096 }
7097
7098 /* Create and initialize PIC register if required. */
7099 static void
7100 ix86_init_pic_reg (void)
7101 {
7102 edge entry_edge;
7103 rtx_insn *seq;
7104
7105 if (!ix86_use_pseudo_pic_reg ())
7106 return;
7107
7108 start_sequence ();
7109
7110 if (TARGET_64BIT)
7111 {
7112 if (ix86_cmodel == CM_LARGE_PIC)
7113 ix86_init_large_pic_reg (R11_REG);
7114 else
7115 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
7116 }
7117 else
7118 {
7119 /* If there is future mcount call in the function it is more profitable
7120 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
7121 rtx reg = crtl->profile
7122 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
7123 : pic_offset_table_rtx;
7124 rtx_insn *insn = emit_insn (gen_set_got (reg));
7125 RTX_FRAME_RELATED_P (insn) = 1;
7126 if (crtl->profile)
7127 emit_move_insn (pic_offset_table_rtx, reg);
7128 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
7129 }
7130
7131 seq = get_insns ();
7132 end_sequence ();
7133
7134 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
7135 insert_insn_on_edge (seq, entry_edge);
7136 commit_one_edge_insertion (entry_edge);
7137 }
7138
7139 /* Initialize a variable CUM of type CUMULATIVE_ARGS
7140 for a call to a function whose data type is FNTYPE.
7141 For a library call, FNTYPE is 0. */
7142
7143 void
7144 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
7145 tree fntype, /* tree ptr for function decl */
7146 rtx libname, /* SYMBOL_REF of library name or 0 */
7147 tree fndecl,
7148 int caller)
7149 {
7150 struct cgraph_local_info *i = NULL;
7151 struct cgraph_node *target = NULL;
7152
7153 memset (cum, 0, sizeof (*cum));
7154
7155 if (fndecl)
7156 {
7157 target = cgraph_node::get (fndecl);
7158 if (target)
7159 {
7160 target = target->function_symbol ();
7161 i = cgraph_node::local_info (target->decl);
7162 cum->call_abi = ix86_function_abi (target->decl);
7163 }
7164 else
7165 cum->call_abi = ix86_function_abi (fndecl);
7166 }
7167 else
7168 cum->call_abi = ix86_function_type_abi (fntype);
7169
7170 cum->caller = caller;
7171
7172 /* Set up the number of registers to use for passing arguments. */
7173 cum->nregs = ix86_regparm;
7174 if (TARGET_64BIT)
7175 {
7176 cum->nregs = (cum->call_abi == SYSV_ABI
7177 ? X86_64_REGPARM_MAX
7178 : X86_64_MS_REGPARM_MAX);
7179 }
7180 if (TARGET_SSE)
7181 {
7182 cum->sse_nregs = SSE_REGPARM_MAX;
7183 if (TARGET_64BIT)
7184 {
7185 cum->sse_nregs = (cum->call_abi == SYSV_ABI
7186 ? X86_64_SSE_REGPARM_MAX
7187 : X86_64_MS_SSE_REGPARM_MAX);
7188 }
7189 }
7190 if (TARGET_MMX)
7191 cum->mmx_nregs = MMX_REGPARM_MAX;
7192 cum->warn_avx512f = true;
7193 cum->warn_avx = true;
7194 cum->warn_sse = true;
7195 cum->warn_mmx = true;
7196
7197 /* Because type might mismatch in between caller and callee, we need to
7198 use actual type of function for local calls.
7199 FIXME: cgraph_analyze can be told to actually record if function uses
7200 va_start so for local functions maybe_vaarg can be made aggressive
7201 helping K&R code.
7202 FIXME: once typesytem is fixed, we won't need this code anymore. */
7203 if (i && i->local && i->can_change_signature)
7204 fntype = TREE_TYPE (target->decl);
7205 cum->stdarg = stdarg_p (fntype);
7206 cum->maybe_vaarg = (fntype
7207 ? (!prototype_p (fntype) || stdarg_p (fntype))
7208 : !libname);
7209
7210 cum->bnd_regno = FIRST_BND_REG;
7211 cum->bnds_in_bt = 0;
7212 cum->force_bnd_pass = 0;
7213 cum->decl = fndecl;
7214
7215 cum->warn_empty = !warn_abi || cum->stdarg;
7216 if (!cum->warn_empty && fntype)
7217 {
7218 function_args_iterator iter;
7219 tree argtype;
7220 bool seen_empty_type = false;
7221 FOREACH_FUNCTION_ARGS (fntype, argtype, iter)
7222 {
7223 if (argtype == error_mark_node || VOID_TYPE_P (argtype))
7224 break;
7225 if (TYPE_EMPTY_P (argtype))
7226 seen_empty_type = true;
7227 else if (seen_empty_type)
7228 {
7229 cum->warn_empty = true;
7230 break;
7231 }
7232 }
7233 }
7234
7235 if (!TARGET_64BIT)
7236 {
7237 /* If there are variable arguments, then we won't pass anything
7238 in registers in 32-bit mode. */
7239 if (stdarg_p (fntype))
7240 {
7241 cum->nregs = 0;
7242 /* Since in 32-bit, variable arguments are always passed on
7243 stack, there is scratch register available for indirect
7244 sibcall. */
7245 cfun->machine->arg_reg_available = true;
7246 cum->sse_nregs = 0;
7247 cum->mmx_nregs = 0;
7248 cum->warn_avx512f = false;
7249 cum->warn_avx = false;
7250 cum->warn_sse = false;
7251 cum->warn_mmx = false;
7252 return;
7253 }
7254
7255 /* Use ecx and edx registers if function has fastcall attribute,
7256 else look for regparm information. */
7257 if (fntype)
7258 {
7259 unsigned int ccvt = ix86_get_callcvt (fntype);
7260 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
7261 {
7262 cum->nregs = 1;
7263 cum->fastcall = 1; /* Same first register as in fastcall. */
7264 }
7265 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
7266 {
7267 cum->nregs = 2;
7268 cum->fastcall = 1;
7269 }
7270 else
7271 cum->nregs = ix86_function_regparm (fntype, fndecl);
7272 }
7273
7274 /* Set up the number of SSE registers used for passing SFmode
7275 and DFmode arguments. Warn for mismatching ABI. */
7276 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
7277 }
7278
7279 cfun->machine->arg_reg_available = (cum->nregs > 0);
7280 }
7281
7282 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
7283 But in the case of vector types, it is some vector mode.
7284
7285 When we have only some of our vector isa extensions enabled, then there
7286 are some modes for which vector_mode_supported_p is false. For these
7287 modes, the generic vector support in gcc will choose some non-vector mode
7288 in order to implement the type. By computing the natural mode, we'll
7289 select the proper ABI location for the operand and not depend on whatever
7290 the middle-end decides to do with these vector types.
7291
7292 The midde-end can't deal with the vector types > 16 bytes. In this
7293 case, we return the original mode and warn ABI change if CUM isn't
7294 NULL.
7295
7296 If INT_RETURN is true, warn ABI change if the vector mode isn't
7297 available for function return value. */
7298
7299 static machine_mode
7300 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
7301 bool in_return)
7302 {
7303 machine_mode mode = TYPE_MODE (type);
7304
7305 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
7306 {
7307 HOST_WIDE_INT size = int_size_in_bytes (type);
7308 if ((size == 8 || size == 16 || size == 32 || size == 64)
7309 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
7310 && TYPE_VECTOR_SUBPARTS (type) > 1)
7311 {
7312 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
7313
7314 /* There are no XFmode vector modes. */
7315 if (innermode == XFmode)
7316 return mode;
7317
7318 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
7319 mode = MIN_MODE_VECTOR_FLOAT;
7320 else
7321 mode = MIN_MODE_VECTOR_INT;
7322
7323 /* Get the mode which has this inner mode and number of units. */
7324 FOR_EACH_MODE_FROM (mode, mode)
7325 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
7326 && GET_MODE_INNER (mode) == innermode)
7327 {
7328 if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
7329 {
7330 static bool warnedavx512f;
7331 static bool warnedavx512f_ret;
7332
7333 if (cum && cum->warn_avx512f && !warnedavx512f)
7334 {
7335 if (warning (OPT_Wpsabi, "AVX512F vector argument "
7336 "without AVX512F enabled changes the ABI"))
7337 warnedavx512f = true;
7338 }
7339 else if (in_return && !warnedavx512f_ret)
7340 {
7341 if (warning (OPT_Wpsabi, "AVX512F vector return "
7342 "without AVX512F enabled changes the ABI"))
7343 warnedavx512f_ret = true;
7344 }
7345
7346 return TYPE_MODE (type);
7347 }
7348 else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU)
7349 {
7350 static bool warnedavx;
7351 static bool warnedavx_ret;
7352
7353 if (cum && cum->warn_avx && !warnedavx)
7354 {
7355 if (warning (OPT_Wpsabi, "AVX vector argument "
7356 "without AVX enabled changes the ABI"))
7357 warnedavx = true;
7358 }
7359 else if (in_return && !warnedavx_ret)
7360 {
7361 if (warning (OPT_Wpsabi, "AVX vector return "
7362 "without AVX enabled changes the ABI"))
7363 warnedavx_ret = true;
7364 }
7365
7366 return TYPE_MODE (type);
7367 }
7368 else if (((size == 8 && TARGET_64BIT) || size == 16)
7369 && !TARGET_SSE
7370 && !TARGET_IAMCU)
7371 {
7372 static bool warnedsse;
7373 static bool warnedsse_ret;
7374
7375 if (cum && cum->warn_sse && !warnedsse)
7376 {
7377 if (warning (OPT_Wpsabi, "SSE vector argument "
7378 "without SSE enabled changes the ABI"))
7379 warnedsse = true;
7380 }
7381 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
7382 {
7383 if (warning (OPT_Wpsabi, "SSE vector return "
7384 "without SSE enabled changes the ABI"))
7385 warnedsse_ret = true;
7386 }
7387 }
7388 else if ((size == 8 && !TARGET_64BIT)
7389 && (!cfun
7390 || cfun->machine->func_type == TYPE_NORMAL)
7391 && !TARGET_MMX
7392 && !TARGET_IAMCU)
7393 {
7394 static bool warnedmmx;
7395 static bool warnedmmx_ret;
7396
7397 if (cum && cum->warn_mmx && !warnedmmx)
7398 {
7399 if (warning (OPT_Wpsabi, "MMX vector argument "
7400 "without MMX enabled changes the ABI"))
7401 warnedmmx = true;
7402 }
7403 else if (in_return && !warnedmmx_ret)
7404 {
7405 if (warning (OPT_Wpsabi, "MMX vector return "
7406 "without MMX enabled changes the ABI"))
7407 warnedmmx_ret = true;
7408 }
7409 }
7410 return mode;
7411 }
7412
7413 gcc_unreachable ();
7414 }
7415 }
7416
7417 return mode;
7418 }
7419
7420 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
7421 this may not agree with the mode that the type system has chosen for the
7422 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
7423 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
7424
7425 static rtx
7426 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
7427 unsigned int regno)
7428 {
7429 rtx tmp;
7430
7431 if (orig_mode != BLKmode)
7432 tmp = gen_rtx_REG (orig_mode, regno);
7433 else
7434 {
7435 tmp = gen_rtx_REG (mode, regno);
7436 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
7437 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
7438 }
7439
7440 return tmp;
7441 }
7442
7443 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
7444 of this code is to classify each 8bytes of incoming argument by the register
7445 class and assign registers accordingly. */
7446
7447 /* Return the union class of CLASS1 and CLASS2.
7448 See the x86-64 PS ABI for details. */
7449
7450 static enum x86_64_reg_class
7451 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
7452 {
7453 /* Rule #1: If both classes are equal, this is the resulting class. */
7454 if (class1 == class2)
7455 return class1;
7456
7457 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
7458 the other class. */
7459 if (class1 == X86_64_NO_CLASS)
7460 return class2;
7461 if (class2 == X86_64_NO_CLASS)
7462 return class1;
7463
7464 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
7465 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
7466 return X86_64_MEMORY_CLASS;
7467
7468 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
7469 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
7470 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
7471 return X86_64_INTEGERSI_CLASS;
7472 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
7473 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
7474 return X86_64_INTEGER_CLASS;
7475
7476 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
7477 MEMORY is used. */
7478 if (class1 == X86_64_X87_CLASS
7479 || class1 == X86_64_X87UP_CLASS
7480 || class1 == X86_64_COMPLEX_X87_CLASS
7481 || class2 == X86_64_X87_CLASS
7482 || class2 == X86_64_X87UP_CLASS
7483 || class2 == X86_64_COMPLEX_X87_CLASS)
7484 return X86_64_MEMORY_CLASS;
7485
7486 /* Rule #6: Otherwise class SSE is used. */
7487 return X86_64_SSE_CLASS;
7488 }
7489
7490 /* Classify the argument of type TYPE and mode MODE.
7491 CLASSES will be filled by the register class used to pass each word
7492 of the operand. The number of words is returned. In case the parameter
7493 should be passed in memory, 0 is returned. As a special case for zero
7494 sized containers, classes[0] will be NO_CLASS and 1 is returned.
7495
7496 BIT_OFFSET is used internally for handling records and specifies offset
7497 of the offset in bits modulo 512 to avoid overflow cases.
7498
7499 See the x86-64 PS ABI for details.
7500 */
7501
7502 static int
7503 classify_argument (machine_mode mode, const_tree type,
7504 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
7505 {
7506 HOST_WIDE_INT bytes =
7507 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7508 int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD);
7509
7510 /* Variable sized entities are always passed/returned in memory. */
7511 if (bytes < 0)
7512 return 0;
7513
7514 if (mode != VOIDmode
7515 && targetm.calls.must_pass_in_stack (mode, type))
7516 return 0;
7517
7518 if (type && AGGREGATE_TYPE_P (type))
7519 {
7520 int i;
7521 tree field;
7522 enum x86_64_reg_class subclasses[MAX_CLASSES];
7523
7524 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
7525 if (bytes > 64)
7526 return 0;
7527
7528 for (i = 0; i < words; i++)
7529 classes[i] = X86_64_NO_CLASS;
7530
7531 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
7532 signalize memory class, so handle it as special case. */
7533 if (!words)
7534 {
7535 classes[0] = X86_64_NO_CLASS;
7536 return 1;
7537 }
7538
7539 /* Classify each field of record and merge classes. */
7540 switch (TREE_CODE (type))
7541 {
7542 case RECORD_TYPE:
7543 /* And now merge the fields of structure. */
7544 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7545 {
7546 if (TREE_CODE (field) == FIELD_DECL)
7547 {
7548 int num;
7549
7550 if (TREE_TYPE (field) == error_mark_node)
7551 continue;
7552
7553 /* Bitfields are always classified as integer. Handle them
7554 early, since later code would consider them to be
7555 misaligned integers. */
7556 if (DECL_BIT_FIELD (field))
7557 {
7558 for (i = (int_bit_position (field)
7559 + (bit_offset % 64)) / 8 / 8;
7560 i < ((int_bit_position (field) + (bit_offset % 64))
7561 + tree_to_shwi (DECL_SIZE (field))
7562 + 63) / 8 / 8; i++)
7563 classes[i] =
7564 merge_classes (X86_64_INTEGER_CLASS,
7565 classes[i]);
7566 }
7567 else
7568 {
7569 int pos;
7570
7571 type = TREE_TYPE (field);
7572
7573 /* Flexible array member is ignored. */
7574 if (TYPE_MODE (type) == BLKmode
7575 && TREE_CODE (type) == ARRAY_TYPE
7576 && TYPE_SIZE (type) == NULL_TREE
7577 && TYPE_DOMAIN (type) != NULL_TREE
7578 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
7579 == NULL_TREE))
7580 {
7581 static bool warned;
7582
7583 if (!warned && warn_psabi)
7584 {
7585 warned = true;
7586 inform (input_location,
7587 "the ABI of passing struct with"
7588 " a flexible array member has"
7589 " changed in GCC 4.4");
7590 }
7591 continue;
7592 }
7593 num = classify_argument (TYPE_MODE (type), type,
7594 subclasses,
7595 (int_bit_position (field)
7596 + bit_offset) % 512);
7597 if (!num)
7598 return 0;
7599 pos = (int_bit_position (field)
7600 + (bit_offset % 64)) / 8 / 8;
7601 for (i = 0; i < num && (i + pos) < words; i++)
7602 classes[i + pos] =
7603 merge_classes (subclasses[i], classes[i + pos]);
7604 }
7605 }
7606 }
7607 break;
7608
7609 case ARRAY_TYPE:
7610 /* Arrays are handled as small records. */
7611 {
7612 int num;
7613 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
7614 TREE_TYPE (type), subclasses, bit_offset);
7615 if (!num)
7616 return 0;
7617
7618 /* The partial classes are now full classes. */
7619 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
7620 subclasses[0] = X86_64_SSE_CLASS;
7621 if (subclasses[0] == X86_64_INTEGERSI_CLASS
7622 && !((bit_offset % 64) == 0 && bytes == 4))
7623 subclasses[0] = X86_64_INTEGER_CLASS;
7624
7625 for (i = 0; i < words; i++)
7626 classes[i] = subclasses[i % num];
7627
7628 break;
7629 }
7630 case UNION_TYPE:
7631 case QUAL_UNION_TYPE:
7632 /* Unions are similar to RECORD_TYPE but offset is always 0.
7633 */
7634 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7635 {
7636 if (TREE_CODE (field) == FIELD_DECL)
7637 {
7638 int num;
7639
7640 if (TREE_TYPE (field) == error_mark_node)
7641 continue;
7642
7643 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
7644 TREE_TYPE (field), subclasses,
7645 bit_offset);
7646 if (!num)
7647 return 0;
7648 for (i = 0; i < num && i < words; i++)
7649 classes[i] = merge_classes (subclasses[i], classes[i]);
7650 }
7651 }
7652 break;
7653
7654 default:
7655 gcc_unreachable ();
7656 }
7657
7658 if (words > 2)
7659 {
7660 /* When size > 16 bytes, if the first one isn't
7661 X86_64_SSE_CLASS or any other ones aren't
7662 X86_64_SSEUP_CLASS, everything should be passed in
7663 memory. */
7664 if (classes[0] != X86_64_SSE_CLASS)
7665 return 0;
7666
7667 for (i = 1; i < words; i++)
7668 if (classes[i] != X86_64_SSEUP_CLASS)
7669 return 0;
7670 }
7671
7672 /* Final merger cleanup. */
7673 for (i = 0; i < words; i++)
7674 {
7675 /* If one class is MEMORY, everything should be passed in
7676 memory. */
7677 if (classes[i] == X86_64_MEMORY_CLASS)
7678 return 0;
7679
7680 /* The X86_64_SSEUP_CLASS should be always preceded by
7681 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
7682 if (classes[i] == X86_64_SSEUP_CLASS
7683 && classes[i - 1] != X86_64_SSE_CLASS
7684 && classes[i - 1] != X86_64_SSEUP_CLASS)
7685 {
7686 /* The first one should never be X86_64_SSEUP_CLASS. */
7687 gcc_assert (i != 0);
7688 classes[i] = X86_64_SSE_CLASS;
7689 }
7690
7691 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
7692 everything should be passed in memory. */
7693 if (classes[i] == X86_64_X87UP_CLASS
7694 && (classes[i - 1] != X86_64_X87_CLASS))
7695 {
7696 static bool warned;
7697
7698 /* The first one should never be X86_64_X87UP_CLASS. */
7699 gcc_assert (i != 0);
7700 if (!warned && warn_psabi)
7701 {
7702 warned = true;
7703 inform (input_location,
7704 "the ABI of passing union with long double"
7705 " has changed in GCC 4.4");
7706 }
7707 return 0;
7708 }
7709 }
7710 return words;
7711 }
7712
7713 /* Compute alignment needed. We align all types to natural boundaries with
7714 exception of XFmode that is aligned to 64bits. */
7715 if (mode != VOIDmode && mode != BLKmode)
7716 {
7717 int mode_alignment = GET_MODE_BITSIZE (mode);
7718
7719 if (mode == XFmode)
7720 mode_alignment = 128;
7721 else if (mode == XCmode)
7722 mode_alignment = 256;
7723 if (COMPLEX_MODE_P (mode))
7724 mode_alignment /= 2;
7725 /* Misaligned fields are always returned in memory. */
7726 if (bit_offset % mode_alignment)
7727 return 0;
7728 }
7729
7730 /* for V1xx modes, just use the base mode */
7731 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
7732 && GET_MODE_UNIT_SIZE (mode) == bytes)
7733 mode = GET_MODE_INNER (mode);
7734
7735 /* Classification of atomic types. */
7736 switch (mode)
7737 {
7738 case E_SDmode:
7739 case E_DDmode:
7740 classes[0] = X86_64_SSE_CLASS;
7741 return 1;
7742 case E_TDmode:
7743 classes[0] = X86_64_SSE_CLASS;
7744 classes[1] = X86_64_SSEUP_CLASS;
7745 return 2;
7746 case E_DImode:
7747 case E_SImode:
7748 case E_HImode:
7749 case E_QImode:
7750 case E_CSImode:
7751 case E_CHImode:
7752 case E_CQImode:
7753 {
7754 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
7755
7756 /* Analyze last 128 bits only. */
7757 size = (size - 1) & 0x7f;
7758
7759 if (size < 32)
7760 {
7761 classes[0] = X86_64_INTEGERSI_CLASS;
7762 return 1;
7763 }
7764 else if (size < 64)
7765 {
7766 classes[0] = X86_64_INTEGER_CLASS;
7767 return 1;
7768 }
7769 else if (size < 64+32)
7770 {
7771 classes[0] = X86_64_INTEGER_CLASS;
7772 classes[1] = X86_64_INTEGERSI_CLASS;
7773 return 2;
7774 }
7775 else if (size < 64+64)
7776 {
7777 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7778 return 2;
7779 }
7780 else
7781 gcc_unreachable ();
7782 }
7783 case E_CDImode:
7784 case E_TImode:
7785 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7786 return 2;
7787 case E_COImode:
7788 case E_OImode:
7789 /* OImode shouldn't be used directly. */
7790 gcc_unreachable ();
7791 case E_CTImode:
7792 return 0;
7793 case E_SFmode:
7794 if (!(bit_offset % 64))
7795 classes[0] = X86_64_SSESF_CLASS;
7796 else
7797 classes[0] = X86_64_SSE_CLASS;
7798 return 1;
7799 case E_DFmode:
7800 classes[0] = X86_64_SSEDF_CLASS;
7801 return 1;
7802 case E_XFmode:
7803 classes[0] = X86_64_X87_CLASS;
7804 classes[1] = X86_64_X87UP_CLASS;
7805 return 2;
7806 case E_TFmode:
7807 classes[0] = X86_64_SSE_CLASS;
7808 classes[1] = X86_64_SSEUP_CLASS;
7809 return 2;
7810 case E_SCmode:
7811 classes[0] = X86_64_SSE_CLASS;
7812 if (!(bit_offset % 64))
7813 return 1;
7814 else
7815 {
7816 static bool warned;
7817
7818 if (!warned && warn_psabi)
7819 {
7820 warned = true;
7821 inform (input_location,
7822 "the ABI of passing structure with complex float"
7823 " member has changed in GCC 4.4");
7824 }
7825 classes[1] = X86_64_SSESF_CLASS;
7826 return 2;
7827 }
7828 case E_DCmode:
7829 classes[0] = X86_64_SSEDF_CLASS;
7830 classes[1] = X86_64_SSEDF_CLASS;
7831 return 2;
7832 case E_XCmode:
7833 classes[0] = X86_64_COMPLEX_X87_CLASS;
7834 return 1;
7835 case E_TCmode:
7836 /* This modes is larger than 16 bytes. */
7837 return 0;
7838 case E_V8SFmode:
7839 case E_V8SImode:
7840 case E_V32QImode:
7841 case E_V16HImode:
7842 case E_V4DFmode:
7843 case E_V4DImode:
7844 classes[0] = X86_64_SSE_CLASS;
7845 classes[1] = X86_64_SSEUP_CLASS;
7846 classes[2] = X86_64_SSEUP_CLASS;
7847 classes[3] = X86_64_SSEUP_CLASS;
7848 return 4;
7849 case E_V8DFmode:
7850 case E_V16SFmode:
7851 case E_V8DImode:
7852 case E_V16SImode:
7853 case E_V32HImode:
7854 case E_V64QImode:
7855 classes[0] = X86_64_SSE_CLASS;
7856 classes[1] = X86_64_SSEUP_CLASS;
7857 classes[2] = X86_64_SSEUP_CLASS;
7858 classes[3] = X86_64_SSEUP_CLASS;
7859 classes[4] = X86_64_SSEUP_CLASS;
7860 classes[5] = X86_64_SSEUP_CLASS;
7861 classes[6] = X86_64_SSEUP_CLASS;
7862 classes[7] = X86_64_SSEUP_CLASS;
7863 return 8;
7864 case E_V4SFmode:
7865 case E_V4SImode:
7866 case E_V16QImode:
7867 case E_V8HImode:
7868 case E_V2DFmode:
7869 case E_V2DImode:
7870 classes[0] = X86_64_SSE_CLASS;
7871 classes[1] = X86_64_SSEUP_CLASS;
7872 return 2;
7873 case E_V1TImode:
7874 case E_V1DImode:
7875 case E_V2SFmode:
7876 case E_V2SImode:
7877 case E_V4HImode:
7878 case E_V8QImode:
7879 classes[0] = X86_64_SSE_CLASS;
7880 return 1;
7881 case E_BLKmode:
7882 case E_VOIDmode:
7883 return 0;
7884 default:
7885 gcc_assert (VECTOR_MODE_P (mode));
7886
7887 if (bytes > 16)
7888 return 0;
7889
7890 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
7891
7892 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
7893 classes[0] = X86_64_INTEGERSI_CLASS;
7894 else
7895 classes[0] = X86_64_INTEGER_CLASS;
7896 classes[1] = X86_64_INTEGER_CLASS;
7897 return 1 + (bytes > 8);
7898 }
7899 }
7900
7901 /* Examine the argument and return set number of register required in each
7902 class. Return true iff parameter should be passed in memory. */
7903
7904 static bool
7905 examine_argument (machine_mode mode, const_tree type, int in_return,
7906 int *int_nregs, int *sse_nregs)
7907 {
7908 enum x86_64_reg_class regclass[MAX_CLASSES];
7909 int n = classify_argument (mode, type, regclass, 0);
7910
7911 *int_nregs = 0;
7912 *sse_nregs = 0;
7913
7914 if (!n)
7915 return true;
7916 for (n--; n >= 0; n--)
7917 switch (regclass[n])
7918 {
7919 case X86_64_INTEGER_CLASS:
7920 case X86_64_INTEGERSI_CLASS:
7921 (*int_nregs)++;
7922 break;
7923 case X86_64_SSE_CLASS:
7924 case X86_64_SSESF_CLASS:
7925 case X86_64_SSEDF_CLASS:
7926 (*sse_nregs)++;
7927 break;
7928 case X86_64_NO_CLASS:
7929 case X86_64_SSEUP_CLASS:
7930 break;
7931 case X86_64_X87_CLASS:
7932 case X86_64_X87UP_CLASS:
7933 case X86_64_COMPLEX_X87_CLASS:
7934 if (!in_return)
7935 return true;
7936 break;
7937 case X86_64_MEMORY_CLASS:
7938 gcc_unreachable ();
7939 }
7940
7941 return false;
7942 }
7943
7944 /* Construct container for the argument used by GCC interface. See
7945 FUNCTION_ARG for the detailed description. */
7946
7947 static rtx
7948 construct_container (machine_mode mode, machine_mode orig_mode,
7949 const_tree type, int in_return, int nintregs, int nsseregs,
7950 const int *intreg, int sse_regno)
7951 {
7952 /* The following variables hold the static issued_error state. */
7953 static bool issued_sse_arg_error;
7954 static bool issued_sse_ret_error;
7955 static bool issued_x87_ret_error;
7956
7957 machine_mode tmpmode;
7958 int bytes =
7959 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7960 enum x86_64_reg_class regclass[MAX_CLASSES];
7961 int n;
7962 int i;
7963 int nexps = 0;
7964 int needed_sseregs, needed_intregs;
7965 rtx exp[MAX_CLASSES];
7966 rtx ret;
7967
7968 n = classify_argument (mode, type, regclass, 0);
7969 if (!n)
7970 return NULL;
7971 if (examine_argument (mode, type, in_return, &needed_intregs,
7972 &needed_sseregs))
7973 return NULL;
7974 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
7975 return NULL;
7976
7977 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
7978 some less clueful developer tries to use floating-point anyway. */
7979 if (needed_sseregs && !TARGET_SSE)
7980 {
7981 if (in_return)
7982 {
7983 if (!issued_sse_ret_error)
7984 {
7985 error ("SSE register return with SSE disabled");
7986 issued_sse_ret_error = true;
7987 }
7988 }
7989 else if (!issued_sse_arg_error)
7990 {
7991 error ("SSE register argument with SSE disabled");
7992 issued_sse_arg_error = true;
7993 }
7994 return NULL;
7995 }
7996
7997 /* Likewise, error if the ABI requires us to return values in the
7998 x87 registers and the user specified -mno-80387. */
7999 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
8000 for (i = 0; i < n; i++)
8001 if (regclass[i] == X86_64_X87_CLASS
8002 || regclass[i] == X86_64_X87UP_CLASS
8003 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
8004 {
8005 if (!issued_x87_ret_error)
8006 {
8007 error ("x87 register return with x87 disabled");
8008 issued_x87_ret_error = true;
8009 }
8010 return NULL;
8011 }
8012
8013 /* First construct simple cases. Avoid SCmode, since we want to use
8014 single register to pass this type. */
8015 if (n == 1 && mode != SCmode)
8016 switch (regclass[0])
8017 {
8018 case X86_64_INTEGER_CLASS:
8019 case X86_64_INTEGERSI_CLASS:
8020 return gen_rtx_REG (mode, intreg[0]);
8021 case X86_64_SSE_CLASS:
8022 case X86_64_SSESF_CLASS:
8023 case X86_64_SSEDF_CLASS:
8024 if (mode != BLKmode)
8025 return gen_reg_or_parallel (mode, orig_mode,
8026 SSE_REGNO (sse_regno));
8027 break;
8028 case X86_64_X87_CLASS:
8029 case X86_64_COMPLEX_X87_CLASS:
8030 return gen_rtx_REG (mode, FIRST_STACK_REG);
8031 case X86_64_NO_CLASS:
8032 /* Zero sized array, struct or class. */
8033 return NULL;
8034 default:
8035 gcc_unreachable ();
8036 }
8037 if (n == 2
8038 && regclass[0] == X86_64_SSE_CLASS
8039 && regclass[1] == X86_64_SSEUP_CLASS
8040 && mode != BLKmode)
8041 return gen_reg_or_parallel (mode, orig_mode,
8042 SSE_REGNO (sse_regno));
8043 if (n == 4
8044 && regclass[0] == X86_64_SSE_CLASS
8045 && regclass[1] == X86_64_SSEUP_CLASS
8046 && regclass[2] == X86_64_SSEUP_CLASS
8047 && regclass[3] == X86_64_SSEUP_CLASS
8048 && mode != BLKmode)
8049 return gen_reg_or_parallel (mode, orig_mode,
8050 SSE_REGNO (sse_regno));
8051 if (n == 8
8052 && regclass[0] == X86_64_SSE_CLASS
8053 && regclass[1] == X86_64_SSEUP_CLASS
8054 && regclass[2] == X86_64_SSEUP_CLASS
8055 && regclass[3] == X86_64_SSEUP_CLASS
8056 && regclass[4] == X86_64_SSEUP_CLASS
8057 && regclass[5] == X86_64_SSEUP_CLASS
8058 && regclass[6] == X86_64_SSEUP_CLASS
8059 && regclass[7] == X86_64_SSEUP_CLASS
8060 && mode != BLKmode)
8061 return gen_reg_or_parallel (mode, orig_mode,
8062 SSE_REGNO (sse_regno));
8063 if (n == 2
8064 && regclass[0] == X86_64_X87_CLASS
8065 && regclass[1] == X86_64_X87UP_CLASS)
8066 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
8067
8068 if (n == 2
8069 && regclass[0] == X86_64_INTEGER_CLASS
8070 && regclass[1] == X86_64_INTEGER_CLASS
8071 && (mode == CDImode || mode == TImode)
8072 && intreg[0] + 1 == intreg[1])
8073 return gen_rtx_REG (mode, intreg[0]);
8074
8075 /* Otherwise figure out the entries of the PARALLEL. */
8076 for (i = 0; i < n; i++)
8077 {
8078 int pos;
8079
8080 switch (regclass[i])
8081 {
8082 case X86_64_NO_CLASS:
8083 break;
8084 case X86_64_INTEGER_CLASS:
8085 case X86_64_INTEGERSI_CLASS:
8086 /* Merge TImodes on aligned occasions here too. */
8087 if (i * 8 + 8 > bytes)
8088 {
8089 unsigned int tmpbits = (bytes - i * 8) * BITS_PER_UNIT;
8090 if (!int_mode_for_size (tmpbits, 0).exists (&tmpmode))
8091 /* We've requested 24 bytes we
8092 don't have mode for. Use DImode. */
8093 tmpmode = DImode;
8094 }
8095 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
8096 tmpmode = SImode;
8097 else
8098 tmpmode = DImode;
8099 exp [nexps++]
8100 = gen_rtx_EXPR_LIST (VOIDmode,
8101 gen_rtx_REG (tmpmode, *intreg),
8102 GEN_INT (i*8));
8103 intreg++;
8104 break;
8105 case X86_64_SSESF_CLASS:
8106 exp [nexps++]
8107 = gen_rtx_EXPR_LIST (VOIDmode,
8108 gen_rtx_REG (SFmode,
8109 SSE_REGNO (sse_regno)),
8110 GEN_INT (i*8));
8111 sse_regno++;
8112 break;
8113 case X86_64_SSEDF_CLASS:
8114 exp [nexps++]
8115 = gen_rtx_EXPR_LIST (VOIDmode,
8116 gen_rtx_REG (DFmode,
8117 SSE_REGNO (sse_regno)),
8118 GEN_INT (i*8));
8119 sse_regno++;
8120 break;
8121 case X86_64_SSE_CLASS:
8122 pos = i;
8123 switch (n)
8124 {
8125 case 1:
8126 tmpmode = DImode;
8127 break;
8128 case 2:
8129 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
8130 {
8131 tmpmode = TImode;
8132 i++;
8133 }
8134 else
8135 tmpmode = DImode;
8136 break;
8137 case 4:
8138 gcc_assert (i == 0
8139 && regclass[1] == X86_64_SSEUP_CLASS
8140 && regclass[2] == X86_64_SSEUP_CLASS
8141 && regclass[3] == X86_64_SSEUP_CLASS);
8142 tmpmode = OImode;
8143 i += 3;
8144 break;
8145 case 8:
8146 gcc_assert (i == 0
8147 && regclass[1] == X86_64_SSEUP_CLASS
8148 && regclass[2] == X86_64_SSEUP_CLASS
8149 && regclass[3] == X86_64_SSEUP_CLASS
8150 && regclass[4] == X86_64_SSEUP_CLASS
8151 && regclass[5] == X86_64_SSEUP_CLASS
8152 && regclass[6] == X86_64_SSEUP_CLASS
8153 && regclass[7] == X86_64_SSEUP_CLASS);
8154 tmpmode = XImode;
8155 i += 7;
8156 break;
8157 default:
8158 gcc_unreachable ();
8159 }
8160 exp [nexps++]
8161 = gen_rtx_EXPR_LIST (VOIDmode,
8162 gen_rtx_REG (tmpmode,
8163 SSE_REGNO (sse_regno)),
8164 GEN_INT (pos*8));
8165 sse_regno++;
8166 break;
8167 default:
8168 gcc_unreachable ();
8169 }
8170 }
8171
8172 /* Empty aligned struct, union or class. */
8173 if (nexps == 0)
8174 return NULL;
8175
8176 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
8177 for (i = 0; i < nexps; i++)
8178 XVECEXP (ret, 0, i) = exp [i];
8179 return ret;
8180 }
8181
8182 /* Update the data in CUM to advance over an argument of mode MODE
8183 and data type TYPE. (TYPE is null for libcalls where that information
8184 may not be available.)
8185
8186 Return a number of integer regsiters advanced over. */
8187
8188 static int
8189 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
8190 const_tree type, HOST_WIDE_INT bytes,
8191 HOST_WIDE_INT words)
8192 {
8193 int res = 0;
8194 bool error_p = false;
8195
8196 if (TARGET_IAMCU)
8197 {
8198 /* Intel MCU psABI passes scalars and aggregates no larger than 8
8199 bytes in registers. */
8200 if (!VECTOR_MODE_P (mode) && bytes <= 8)
8201 goto pass_in_reg;
8202 return res;
8203 }
8204
8205 switch (mode)
8206 {
8207 default:
8208 break;
8209
8210 case E_BLKmode:
8211 if (bytes < 0)
8212 break;
8213 /* FALLTHRU */
8214
8215 case E_DImode:
8216 case E_SImode:
8217 case E_HImode:
8218 case E_QImode:
8219 pass_in_reg:
8220 cum->words += words;
8221 cum->nregs -= words;
8222 cum->regno += words;
8223 if (cum->nregs >= 0)
8224 res = words;
8225 if (cum->nregs <= 0)
8226 {
8227 cum->nregs = 0;
8228 cfun->machine->arg_reg_available = false;
8229 cum->regno = 0;
8230 }
8231 break;
8232
8233 case E_OImode:
8234 /* OImode shouldn't be used directly. */
8235 gcc_unreachable ();
8236
8237 case E_DFmode:
8238 if (cum->float_in_sse == -1)
8239 error_p = true;
8240 if (cum->float_in_sse < 2)
8241 break;
8242 /* FALLTHRU */
8243 case E_SFmode:
8244 if (cum->float_in_sse == -1)
8245 error_p = true;
8246 if (cum->float_in_sse < 1)
8247 break;
8248 /* FALLTHRU */
8249
8250 case E_V8SFmode:
8251 case E_V8SImode:
8252 case E_V64QImode:
8253 case E_V32HImode:
8254 case E_V16SImode:
8255 case E_V8DImode:
8256 case E_V16SFmode:
8257 case E_V8DFmode:
8258 case E_V32QImode:
8259 case E_V16HImode:
8260 case E_V4DFmode:
8261 case E_V4DImode:
8262 case E_TImode:
8263 case E_V16QImode:
8264 case E_V8HImode:
8265 case E_V4SImode:
8266 case E_V2DImode:
8267 case E_V4SFmode:
8268 case E_V2DFmode:
8269 if (!type || !AGGREGATE_TYPE_P (type))
8270 {
8271 cum->sse_words += words;
8272 cum->sse_nregs -= 1;
8273 cum->sse_regno += 1;
8274 if (cum->sse_nregs <= 0)
8275 {
8276 cum->sse_nregs = 0;
8277 cum->sse_regno = 0;
8278 }
8279 }
8280 break;
8281
8282 case E_V8QImode:
8283 case E_V4HImode:
8284 case E_V2SImode:
8285 case E_V2SFmode:
8286 case E_V1TImode:
8287 case E_V1DImode:
8288 if (!type || !AGGREGATE_TYPE_P (type))
8289 {
8290 cum->mmx_words += words;
8291 cum->mmx_nregs -= 1;
8292 cum->mmx_regno += 1;
8293 if (cum->mmx_nregs <= 0)
8294 {
8295 cum->mmx_nregs = 0;
8296 cum->mmx_regno = 0;
8297 }
8298 }
8299 break;
8300 }
8301 if (error_p)
8302 {
8303 cum->float_in_sse = 0;
8304 error ("calling %qD with SSE calling convention without "
8305 "SSE/SSE2 enabled", cum->decl);
8306 sorry ("this is a GCC bug that can be worked around by adding "
8307 "attribute used to function called");
8308 }
8309
8310 return res;
8311 }
8312
8313 static int
8314 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
8315 const_tree type, HOST_WIDE_INT words, bool named)
8316 {
8317 int int_nregs, sse_nregs;
8318
8319 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
8320 if (!named && (VALID_AVX512F_REG_MODE (mode)
8321 || VALID_AVX256_REG_MODE (mode)))
8322 return 0;
8323
8324 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
8325 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
8326 {
8327 cum->nregs -= int_nregs;
8328 cum->sse_nregs -= sse_nregs;
8329 cum->regno += int_nregs;
8330 cum->sse_regno += sse_nregs;
8331 return int_nregs;
8332 }
8333 else
8334 {
8335 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
8336 cum->words = ROUND_UP (cum->words, align);
8337 cum->words += words;
8338 return 0;
8339 }
8340 }
8341
8342 static int
8343 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
8344 HOST_WIDE_INT words)
8345 {
8346 /* Otherwise, this should be passed indirect. */
8347 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
8348
8349 cum->words += words;
8350 if (cum->nregs > 0)
8351 {
8352 cum->nregs -= 1;
8353 cum->regno += 1;
8354 return 1;
8355 }
8356 return 0;
8357 }
8358
8359 /* Update the data in CUM to advance over an argument of mode MODE and
8360 data type TYPE. (TYPE is null for libcalls where that information
8361 may not be available.) */
8362
8363 static void
8364 ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
8365 const_tree type, bool named)
8366 {
8367 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8368 HOST_WIDE_INT bytes, words;
8369 int nregs;
8370
8371 /* The argument of interrupt handler is a special case and is
8372 handled in ix86_function_arg. */
8373 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
8374 return;
8375
8376 if (mode == BLKmode)
8377 bytes = int_size_in_bytes (type);
8378 else
8379 bytes = GET_MODE_SIZE (mode);
8380 words = CEIL (bytes, UNITS_PER_WORD);
8381
8382 if (type)
8383 mode = type_natural_mode (type, NULL, false);
8384
8385 if ((type && POINTER_BOUNDS_TYPE_P (type))
8386 || POINTER_BOUNDS_MODE_P (mode))
8387 {
8388 /* If we pass bounds in BT then just update remained bounds count. */
8389 if (cum->bnds_in_bt)
8390 {
8391 cum->bnds_in_bt--;
8392 return;
8393 }
8394
8395 /* Update remained number of bounds to force. */
8396 if (cum->force_bnd_pass)
8397 cum->force_bnd_pass--;
8398
8399 cum->bnd_regno++;
8400
8401 return;
8402 }
8403
8404 /* The first arg not going to Bounds Tables resets this counter. */
8405 cum->bnds_in_bt = 0;
8406 /* For unnamed args we always pass bounds to avoid bounds mess when
8407 passed and received types do not match. If bounds do not follow
8408 unnamed arg, still pretend required number of bounds were passed. */
8409 if (cum->force_bnd_pass)
8410 {
8411 cum->bnd_regno += cum->force_bnd_pass;
8412 cum->force_bnd_pass = 0;
8413 }
8414
8415 if (TARGET_64BIT)
8416 {
8417 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
8418
8419 if (call_abi == MS_ABI)
8420 nregs = function_arg_advance_ms_64 (cum, bytes, words);
8421 else
8422 nregs = function_arg_advance_64 (cum, mode, type, words, named);
8423 }
8424 else
8425 nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
8426
8427 /* For stdarg we expect bounds to be passed for each value passed
8428 in register. */
8429 if (cum->stdarg)
8430 cum->force_bnd_pass = nregs;
8431 /* For pointers passed in memory we expect bounds passed in Bounds
8432 Table. */
8433 if (!nregs)
8434 {
8435 /* Track if there are outgoing arguments on stack. */
8436 if (cum->caller)
8437 cfun->machine->outgoing_args_on_stack = true;
8438
8439 cum->bnds_in_bt = chkp_type_bounds_count (type);
8440 }
8441 }
8442
8443 /* Define where to put the arguments to a function.
8444 Value is zero to push the argument on the stack,
8445 or a hard register in which to store the argument.
8446
8447 MODE is the argument's machine mode.
8448 TYPE is the data type of the argument (as a tree).
8449 This is null for libcalls where that information may
8450 not be available.
8451 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8452 the preceding args and about the function being called.
8453 NAMED is nonzero if this argument is a named parameter
8454 (otherwise it is an extra parameter matching an ellipsis). */
8455
8456 static rtx
8457 function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
8458 machine_mode orig_mode, const_tree type,
8459 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
8460 {
8461 bool error_p = false;
8462
8463 /* Avoid the AL settings for the Unix64 ABI. */
8464 if (mode == VOIDmode)
8465 return constm1_rtx;
8466
8467 if (TARGET_IAMCU)
8468 {
8469 /* Intel MCU psABI passes scalars and aggregates no larger than 8
8470 bytes in registers. */
8471 if (!VECTOR_MODE_P (mode) && bytes <= 8)
8472 goto pass_in_reg;
8473 return NULL_RTX;
8474 }
8475
8476 switch (mode)
8477 {
8478 default:
8479 break;
8480
8481 case E_BLKmode:
8482 if (bytes < 0)
8483 break;
8484 /* FALLTHRU */
8485 case E_DImode:
8486 case E_SImode:
8487 case E_HImode:
8488 case E_QImode:
8489 pass_in_reg:
8490 if (words <= cum->nregs)
8491 {
8492 int regno = cum->regno;
8493
8494 /* Fastcall allocates the first two DWORD (SImode) or
8495 smaller arguments to ECX and EDX if it isn't an
8496 aggregate type . */
8497 if (cum->fastcall)
8498 {
8499 if (mode == BLKmode
8500 || mode == DImode
8501 || (type && AGGREGATE_TYPE_P (type)))
8502 break;
8503
8504 /* ECX not EAX is the first allocated register. */
8505 if (regno == AX_REG)
8506 regno = CX_REG;
8507 }
8508 return gen_rtx_REG (mode, regno);
8509 }
8510 break;
8511
8512 case E_DFmode:
8513 if (cum->float_in_sse == -1)
8514 error_p = true;
8515 if (cum->float_in_sse < 2)
8516 break;
8517 /* FALLTHRU */
8518 case E_SFmode:
8519 if (cum->float_in_sse == -1)
8520 error_p = true;
8521 if (cum->float_in_sse < 1)
8522 break;
8523 /* FALLTHRU */
8524 case E_TImode:
8525 /* In 32bit, we pass TImode in xmm registers. */
8526 case E_V16QImode:
8527 case E_V8HImode:
8528 case E_V4SImode:
8529 case E_V2DImode:
8530 case E_V4SFmode:
8531 case E_V2DFmode:
8532 if (!type || !AGGREGATE_TYPE_P (type))
8533 {
8534 if (cum->sse_nregs)
8535 return gen_reg_or_parallel (mode, orig_mode,
8536 cum->sse_regno + FIRST_SSE_REG);
8537 }
8538 break;
8539
8540 case E_OImode:
8541 case E_XImode:
8542 /* OImode and XImode shouldn't be used directly. */
8543 gcc_unreachable ();
8544
8545 case E_V64QImode:
8546 case E_V32HImode:
8547 case E_V16SImode:
8548 case E_V8DImode:
8549 case E_V16SFmode:
8550 case E_V8DFmode:
8551 case E_V8SFmode:
8552 case E_V8SImode:
8553 case E_V32QImode:
8554 case E_V16HImode:
8555 case E_V4DFmode:
8556 case E_V4DImode:
8557 if (!type || !AGGREGATE_TYPE_P (type))
8558 {
8559 if (cum->sse_nregs)
8560 return gen_reg_or_parallel (mode, orig_mode,
8561 cum->sse_regno + FIRST_SSE_REG);
8562 }
8563 break;
8564
8565 case E_V8QImode:
8566 case E_V4HImode:
8567 case E_V2SImode:
8568 case E_V2SFmode:
8569 case E_V1TImode:
8570 case E_V1DImode:
8571 if (!type || !AGGREGATE_TYPE_P (type))
8572 {
8573 if (cum->mmx_nregs)
8574 return gen_reg_or_parallel (mode, orig_mode,
8575 cum->mmx_regno + FIRST_MMX_REG);
8576 }
8577 break;
8578 }
8579 if (error_p)
8580 {
8581 cum->float_in_sse = 0;
8582 error ("calling %qD with SSE calling convention without "
8583 "SSE/SSE2 enabled", cum->decl);
8584 sorry ("this is a GCC bug that can be worked around by adding "
8585 "attribute used to function called");
8586 }
8587
8588 return NULL_RTX;
8589 }
8590
8591 static rtx
8592 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
8593 machine_mode orig_mode, const_tree type, bool named)
8594 {
8595 /* Handle a hidden AL argument containing number of registers
8596 for varargs x86-64 functions. */
8597 if (mode == VOIDmode)
8598 return GEN_INT (cum->maybe_vaarg
8599 ? (cum->sse_nregs < 0
8600 ? X86_64_SSE_REGPARM_MAX
8601 : cum->sse_regno)
8602 : -1);
8603
8604 switch (mode)
8605 {
8606 default:
8607 break;
8608
8609 case E_V8SFmode:
8610 case E_V8SImode:
8611 case E_V32QImode:
8612 case E_V16HImode:
8613 case E_V4DFmode:
8614 case E_V4DImode:
8615 case E_V16SFmode:
8616 case E_V16SImode:
8617 case E_V64QImode:
8618 case E_V32HImode:
8619 case E_V8DFmode:
8620 case E_V8DImode:
8621 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8622 if (!named)
8623 return NULL;
8624 break;
8625 }
8626
8627 return construct_container (mode, orig_mode, type, 0, cum->nregs,
8628 cum->sse_nregs,
8629 &x86_64_int_parameter_registers [cum->regno],
8630 cum->sse_regno);
8631 }
8632
8633 static rtx
8634 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
8635 machine_mode orig_mode, bool named,
8636 HOST_WIDE_INT bytes)
8637 {
8638 unsigned int regno;
8639
8640 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
8641 We use value of -2 to specify that current function call is MSABI. */
8642 if (mode == VOIDmode)
8643 return GEN_INT (-2);
8644
8645 /* If we've run out of registers, it goes on the stack. */
8646 if (cum->nregs == 0)
8647 return NULL_RTX;
8648
8649 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
8650
8651 /* Only floating point modes are passed in anything but integer regs. */
8652 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
8653 {
8654 if (named)
8655 regno = cum->regno + FIRST_SSE_REG;
8656 else
8657 {
8658 rtx t1, t2;
8659
8660 /* Unnamed floating parameters are passed in both the
8661 SSE and integer registers. */
8662 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
8663 t2 = gen_rtx_REG (mode, regno);
8664 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
8665 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
8666 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
8667 }
8668 }
8669 /* Handle aggregated types passed in register. */
8670 if (orig_mode == BLKmode)
8671 {
8672 if (bytes > 0 && bytes <= 8)
8673 mode = (bytes > 4 ? DImode : SImode);
8674 if (mode == BLKmode)
8675 mode = DImode;
8676 }
8677
8678 return gen_reg_or_parallel (mode, orig_mode, regno);
8679 }
8680
8681 /* Return where to put the arguments to a function.
8682 Return zero to push the argument on the stack, or a hard register in which to store the argument.
8683
8684 MODE is the argument's machine mode. TYPE is the data type of the
8685 argument. It is null for libcalls where that information may not be
8686 available. CUM gives information about the preceding args and about
8687 the function being called. NAMED is nonzero if this argument is a
8688 named parameter (otherwise it is an extra parameter matching an
8689 ellipsis). */
8690
8691 static rtx
8692 ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
8693 const_tree type, bool named)
8694 {
8695 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8696 machine_mode mode = omode;
8697 HOST_WIDE_INT bytes, words;
8698 rtx arg;
8699
8700 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
8701 {
8702 gcc_assert (type != NULL_TREE);
8703 if (POINTER_TYPE_P (type))
8704 {
8705 /* This is the pointer argument. */
8706 gcc_assert (TYPE_MODE (type) == Pmode);
8707 /* It is at -WORD(AP) in the current frame in interrupt and
8708 exception handlers. */
8709 arg = plus_constant (Pmode, arg_pointer_rtx, -UNITS_PER_WORD);
8710 }
8711 else
8712 {
8713 gcc_assert (cfun->machine->func_type == TYPE_EXCEPTION
8714 && TREE_CODE (type) == INTEGER_TYPE
8715 && TYPE_MODE (type) == word_mode);
8716 /* The error code is the word-mode integer argument at
8717 -2 * WORD(AP) in the current frame of the exception
8718 handler. */
8719 arg = gen_rtx_MEM (word_mode,
8720 plus_constant (Pmode,
8721 arg_pointer_rtx,
8722 -2 * UNITS_PER_WORD));
8723 }
8724 return arg;
8725 }
8726
8727 /* All pointer bounds arguments are handled separately here. */
8728 if ((type && POINTER_BOUNDS_TYPE_P (type))
8729 || POINTER_BOUNDS_MODE_P (mode))
8730 {
8731 /* Return NULL if bounds are forced to go in Bounds Table. */
8732 if (cum->bnds_in_bt)
8733 arg = NULL;
8734 /* Return the next available bound reg if any. */
8735 else if (cum->bnd_regno <= LAST_BND_REG)
8736 arg = gen_rtx_REG (BNDmode, cum->bnd_regno);
8737 /* Return the next special slot number otherwise. */
8738 else
8739 arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1);
8740
8741 return arg;
8742 }
8743
8744 if (mode == BLKmode)
8745 bytes = int_size_in_bytes (type);
8746 else
8747 bytes = GET_MODE_SIZE (mode);
8748 words = CEIL (bytes, UNITS_PER_WORD);
8749
8750 /* To simplify the code below, represent vector types with a vector mode
8751 even if MMX/SSE are not active. */
8752 if (type && TREE_CODE (type) == VECTOR_TYPE)
8753 mode = type_natural_mode (type, cum, false);
8754
8755 if (TARGET_64BIT)
8756 {
8757 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
8758
8759 if (call_abi == MS_ABI)
8760 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
8761 else
8762 arg = function_arg_64 (cum, mode, omode, type, named);
8763 }
8764 else
8765 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
8766
8767 /* Track if there are outgoing arguments on stack. */
8768 if (arg == NULL_RTX && cum->caller)
8769 cfun->machine->outgoing_args_on_stack = true;
8770
8771 return arg;
8772 }
8773
8774 /* A C expression that indicates when an argument must be passed by
8775 reference. If nonzero for an argument, a copy of that argument is
8776 made in memory and a pointer to the argument is passed instead of
8777 the argument itself. The pointer is passed in whatever way is
8778 appropriate for passing a pointer to that type. */
8779
8780 static bool
8781 ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
8782 const_tree type, bool)
8783 {
8784 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8785
8786 /* Bounds are never passed by reference. */
8787 if ((type && POINTER_BOUNDS_TYPE_P (type))
8788 || POINTER_BOUNDS_MODE_P (mode))
8789 return false;
8790
8791 if (TARGET_64BIT)
8792 {
8793 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
8794
8795 /* See Windows x64 Software Convention. */
8796 if (call_abi == MS_ABI)
8797 {
8798 HOST_WIDE_INT msize = GET_MODE_SIZE (mode);
8799
8800 if (type)
8801 {
8802 /* Arrays are passed by reference. */
8803 if (TREE_CODE (type) == ARRAY_TYPE)
8804 return true;
8805
8806 if (RECORD_OR_UNION_TYPE_P (type))
8807 {
8808 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
8809 are passed by reference. */
8810 msize = int_size_in_bytes (type);
8811 }
8812 }
8813
8814 /* __m128 is passed by reference. */
8815 return msize != 1 && msize != 2 && msize != 4 && msize != 8;
8816 }
8817 else if (type && int_size_in_bytes (type) == -1)
8818 return true;
8819 }
8820
8821 return false;
8822 }
8823
8824 /* Return true when TYPE should be 128bit aligned for 32bit argument
8825 passing ABI. XXX: This function is obsolete and is only used for
8826 checking psABI compatibility with previous versions of GCC. */
8827
8828 static bool
8829 ix86_compat_aligned_value_p (const_tree type)
8830 {
8831 machine_mode mode = TYPE_MODE (type);
8832 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
8833 || mode == TDmode
8834 || mode == TFmode
8835 || mode == TCmode)
8836 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
8837 return true;
8838 if (TYPE_ALIGN (type) < 128)
8839 return false;
8840
8841 if (AGGREGATE_TYPE_P (type))
8842 {
8843 /* Walk the aggregates recursively. */
8844 switch (TREE_CODE (type))
8845 {
8846 case RECORD_TYPE:
8847 case UNION_TYPE:
8848 case QUAL_UNION_TYPE:
8849 {
8850 tree field;
8851
8852 /* Walk all the structure fields. */
8853 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8854 {
8855 if (TREE_CODE (field) == FIELD_DECL
8856 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
8857 return true;
8858 }
8859 break;
8860 }
8861
8862 case ARRAY_TYPE:
8863 /* Just for use if some languages passes arrays by value. */
8864 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
8865 return true;
8866 break;
8867
8868 default:
8869 gcc_unreachable ();
8870 }
8871 }
8872 return false;
8873 }
8874
8875 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
8876 XXX: This function is obsolete and is only used for checking psABI
8877 compatibility with previous versions of GCC. */
8878
8879 static unsigned int
8880 ix86_compat_function_arg_boundary (machine_mode mode,
8881 const_tree type, unsigned int align)
8882 {
8883 /* In 32bit, only _Decimal128 and __float128 are aligned to their
8884 natural boundaries. */
8885 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
8886 {
8887 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
8888 make an exception for SSE modes since these require 128bit
8889 alignment.
8890
8891 The handling here differs from field_alignment. ICC aligns MMX
8892 arguments to 4 byte boundaries, while structure fields are aligned
8893 to 8 byte boundaries. */
8894 if (!type)
8895 {
8896 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
8897 align = PARM_BOUNDARY;
8898 }
8899 else
8900 {
8901 if (!ix86_compat_aligned_value_p (type))
8902 align = PARM_BOUNDARY;
8903 }
8904 }
8905 if (align > BIGGEST_ALIGNMENT)
8906 align = BIGGEST_ALIGNMENT;
8907 return align;
8908 }
8909
8910 /* Return true when TYPE should be 128bit aligned for 32bit argument
8911 passing ABI. */
8912
8913 static bool
8914 ix86_contains_aligned_value_p (const_tree type)
8915 {
8916 machine_mode mode = TYPE_MODE (type);
8917
8918 if (mode == XFmode || mode == XCmode)
8919 return false;
8920
8921 if (TYPE_ALIGN (type) < 128)
8922 return false;
8923
8924 if (AGGREGATE_TYPE_P (type))
8925 {
8926 /* Walk the aggregates recursively. */
8927 switch (TREE_CODE (type))
8928 {
8929 case RECORD_TYPE:
8930 case UNION_TYPE:
8931 case QUAL_UNION_TYPE:
8932 {
8933 tree field;
8934
8935 /* Walk all the structure fields. */
8936 for (field = TYPE_FIELDS (type);
8937 field;
8938 field = DECL_CHAIN (field))
8939 {
8940 if (TREE_CODE (field) == FIELD_DECL
8941 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
8942 return true;
8943 }
8944 break;
8945 }
8946
8947 case ARRAY_TYPE:
8948 /* Just for use if some languages passes arrays by value. */
8949 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
8950 return true;
8951 break;
8952
8953 default:
8954 gcc_unreachable ();
8955 }
8956 }
8957 else
8958 return TYPE_ALIGN (type) >= 128;
8959
8960 return false;
8961 }
8962
8963 /* Gives the alignment boundary, in bits, of an argument with the
8964 specified mode and type. */
8965
8966 static unsigned int
8967 ix86_function_arg_boundary (machine_mode mode, const_tree type)
8968 {
8969 unsigned int align;
8970 if (type)
8971 {
8972 /* Since the main variant type is used for call, we convert it to
8973 the main variant type. */
8974 type = TYPE_MAIN_VARIANT (type);
8975 align = TYPE_ALIGN (type);
8976 }
8977 else
8978 align = GET_MODE_ALIGNMENT (mode);
8979 if (align < PARM_BOUNDARY)
8980 align = PARM_BOUNDARY;
8981 else
8982 {
8983 static bool warned;
8984 unsigned int saved_align = align;
8985
8986 if (!TARGET_64BIT)
8987 {
8988 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
8989 if (!type)
8990 {
8991 if (mode == XFmode || mode == XCmode)
8992 align = PARM_BOUNDARY;
8993 }
8994 else if (!ix86_contains_aligned_value_p (type))
8995 align = PARM_BOUNDARY;
8996
8997 if (align < 128)
8998 align = PARM_BOUNDARY;
8999 }
9000
9001 if (warn_psabi
9002 && !warned
9003 && align != ix86_compat_function_arg_boundary (mode, type,
9004 saved_align))
9005 {
9006 warned = true;
9007 inform (input_location,
9008 "The ABI for passing parameters with %d-byte"
9009 " alignment has changed in GCC 4.6",
9010 align / BITS_PER_UNIT);
9011 }
9012 }
9013
9014 return align;
9015 }
9016
9017 /* Return true if N is a possible register number of function value. */
9018
9019 static bool
9020 ix86_function_value_regno_p (const unsigned int regno)
9021 {
9022 switch (regno)
9023 {
9024 case AX_REG:
9025 return true;
9026 case DX_REG:
9027 return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
9028 case DI_REG:
9029 case SI_REG:
9030 return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
9031
9032 case BND0_REG:
9033 case BND1_REG:
9034 return chkp_function_instrumented_p (current_function_decl);
9035
9036 /* Complex values are returned in %st(0)/%st(1) pair. */
9037 case ST0_REG:
9038 case ST1_REG:
9039 /* TODO: The function should depend on current function ABI but
9040 builtins.c would need updating then. Therefore we use the
9041 default ABI. */
9042 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
9043 return false;
9044 return TARGET_FLOAT_RETURNS_IN_80387;
9045
9046 /* Complex values are returned in %xmm0/%xmm1 pair. */
9047 case XMM0_REG:
9048 case XMM1_REG:
9049 return TARGET_SSE;
9050
9051 case MM0_REG:
9052 if (TARGET_MACHO || TARGET_64BIT)
9053 return false;
9054 return TARGET_MMX;
9055 }
9056
9057 return false;
9058 }
9059
9060 /* Define how to find the value returned by a function.
9061 VALTYPE is the data type of the value (as a tree).
9062 If the precise function being called is known, FUNC is its FUNCTION_DECL;
9063 otherwise, FUNC is 0. */
9064
9065 static rtx
9066 function_value_32 (machine_mode orig_mode, machine_mode mode,
9067 const_tree fntype, const_tree fn)
9068 {
9069 unsigned int regno;
9070
9071 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
9072 we normally prevent this case when mmx is not available. However
9073 some ABIs may require the result to be returned like DImode. */
9074 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
9075 regno = FIRST_MMX_REG;
9076
9077 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
9078 we prevent this case when sse is not available. However some ABIs
9079 may require the result to be returned like integer TImode. */
9080 else if (mode == TImode
9081 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
9082 regno = FIRST_SSE_REG;
9083
9084 /* 32-byte vector modes in %ymm0. */
9085 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
9086 regno = FIRST_SSE_REG;
9087
9088 /* 64-byte vector modes in %zmm0. */
9089 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
9090 regno = FIRST_SSE_REG;
9091
9092 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
9093 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
9094 regno = FIRST_FLOAT_REG;
9095 else
9096 /* Most things go in %eax. */
9097 regno = AX_REG;
9098
9099 /* Override FP return register with %xmm0 for local functions when
9100 SSE math is enabled or for functions with sseregparm attribute. */
9101 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
9102 {
9103 int sse_level = ix86_function_sseregparm (fntype, fn, false);
9104 if (sse_level == -1)
9105 {
9106 error ("calling %qD with SSE calling convention without "
9107 "SSE/SSE2 enabled", fn);
9108 sorry ("this is a GCC bug that can be worked around by adding "
9109 "attribute used to function called");
9110 }
9111 else if ((sse_level >= 1 && mode == SFmode)
9112 || (sse_level == 2 && mode == DFmode))
9113 regno = FIRST_SSE_REG;
9114 }
9115
9116 /* OImode shouldn't be used directly. */
9117 gcc_assert (mode != OImode);
9118
9119 return gen_rtx_REG (orig_mode, regno);
9120 }
9121
9122 static rtx
9123 function_value_64 (machine_mode orig_mode, machine_mode mode,
9124 const_tree valtype)
9125 {
9126 rtx ret;
9127
9128 /* Handle libcalls, which don't provide a type node. */
9129 if (valtype == NULL)
9130 {
9131 unsigned int regno;
9132
9133 switch (mode)
9134 {
9135 case E_SFmode:
9136 case E_SCmode:
9137 case E_DFmode:
9138 case E_DCmode:
9139 case E_TFmode:
9140 case E_SDmode:
9141 case E_DDmode:
9142 case E_TDmode:
9143 regno = FIRST_SSE_REG;
9144 break;
9145 case E_XFmode:
9146 case E_XCmode:
9147 regno = FIRST_FLOAT_REG;
9148 break;
9149 case E_TCmode:
9150 return NULL;
9151 default:
9152 regno = AX_REG;
9153 }
9154
9155 return gen_rtx_REG (mode, regno);
9156 }
9157 else if (POINTER_TYPE_P (valtype))
9158 {
9159 /* Pointers are always returned in word_mode. */
9160 mode = word_mode;
9161 }
9162
9163 ret = construct_container (mode, orig_mode, valtype, 1,
9164 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
9165 x86_64_int_return_registers, 0);
9166
9167 /* For zero sized structures, construct_container returns NULL, but we
9168 need to keep rest of compiler happy by returning meaningful value. */
9169 if (!ret)
9170 ret = gen_rtx_REG (orig_mode, AX_REG);
9171
9172 return ret;
9173 }
9174
9175 static rtx
9176 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
9177 const_tree valtype)
9178 {
9179 unsigned int regno = AX_REG;
9180
9181 if (TARGET_SSE)
9182 {
9183 switch (GET_MODE_SIZE (mode))
9184 {
9185 case 16:
9186 if (valtype != NULL_TREE
9187 && !VECTOR_INTEGER_TYPE_P (valtype)
9188 && !VECTOR_INTEGER_TYPE_P (valtype)
9189 && !INTEGRAL_TYPE_P (valtype)
9190 && !VECTOR_FLOAT_TYPE_P (valtype))
9191 break;
9192 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
9193 && !COMPLEX_MODE_P (mode))
9194 regno = FIRST_SSE_REG;
9195 break;
9196 case 8:
9197 case 4:
9198 if (mode == SFmode || mode == DFmode)
9199 regno = FIRST_SSE_REG;
9200 break;
9201 default:
9202 break;
9203 }
9204 }
9205 return gen_rtx_REG (orig_mode, regno);
9206 }
9207
9208 static rtx
9209 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
9210 machine_mode orig_mode, machine_mode mode)
9211 {
9212 const_tree fn, fntype;
9213
9214 fn = NULL_TREE;
9215 if (fntype_or_decl && DECL_P (fntype_or_decl))
9216 fn = fntype_or_decl;
9217 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
9218
9219 if ((valtype && POINTER_BOUNDS_TYPE_P (valtype))
9220 || POINTER_BOUNDS_MODE_P (mode))
9221 return gen_rtx_REG (BNDmode, FIRST_BND_REG);
9222 else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
9223 return function_value_ms_64 (orig_mode, mode, valtype);
9224 else if (TARGET_64BIT)
9225 return function_value_64 (orig_mode, mode, valtype);
9226 else
9227 return function_value_32 (orig_mode, mode, fntype, fn);
9228 }
9229
9230 static rtx
9231 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
9232 {
9233 machine_mode mode, orig_mode;
9234
9235 orig_mode = TYPE_MODE (valtype);
9236 mode = type_natural_mode (valtype, NULL, true);
9237 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
9238 }
9239
9240 /* Return an RTX representing a place where a function returns
9241 or recieves pointer bounds or NULL if no bounds are returned.
9242
9243 VALTYPE is a data type of a value returned by the function.
9244
9245 FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL
9246 or FUNCTION_TYPE of the function.
9247
9248 If OUTGOING is false, return a place in which the caller will
9249 see the return value. Otherwise, return a place where a
9250 function returns a value. */
9251
9252 static rtx
9253 ix86_function_value_bounds (const_tree valtype,
9254 const_tree fntype_or_decl ATTRIBUTE_UNUSED,
9255 bool outgoing ATTRIBUTE_UNUSED)
9256 {
9257 rtx res = NULL_RTX;
9258
9259 if (BOUNDED_TYPE_P (valtype))
9260 res = gen_rtx_REG (BNDmode, FIRST_BND_REG);
9261 else if (chkp_type_has_pointer (valtype))
9262 {
9263 bitmap slots;
9264 rtx bounds[2];
9265 bitmap_iterator bi;
9266 unsigned i, bnd_no = 0;
9267
9268 bitmap_obstack_initialize (NULL);
9269 slots = BITMAP_ALLOC (NULL);
9270 chkp_find_bound_slots (valtype, slots);
9271
9272 EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi)
9273 {
9274 rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no);
9275 rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT);
9276 gcc_assert (bnd_no < 2);
9277 bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs);
9278 }
9279
9280 res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds));
9281
9282 BITMAP_FREE (slots);
9283 bitmap_obstack_release (NULL);
9284 }
9285 else
9286 res = NULL_RTX;
9287
9288 return res;
9289 }
9290
9291 /* Pointer function arguments and return values are promoted to
9292 word_mode for normal functions. */
9293
9294 static machine_mode
9295 ix86_promote_function_mode (const_tree type, machine_mode mode,
9296 int *punsignedp, const_tree fntype,
9297 int for_return)
9298 {
9299 if (cfun->machine->func_type == TYPE_NORMAL
9300 && type != NULL_TREE
9301 && POINTER_TYPE_P (type))
9302 {
9303 *punsignedp = POINTERS_EXTEND_UNSIGNED;
9304 return word_mode;
9305 }
9306 return default_promote_function_mode (type, mode, punsignedp, fntype,
9307 for_return);
9308 }
9309
9310 /* Return true if a structure, union or array with MODE containing FIELD
9311 should be accessed using BLKmode. */
9312
9313 static bool
9314 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
9315 {
9316 /* Union with XFmode must be in BLKmode. */
9317 return (mode == XFmode
9318 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
9319 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
9320 }
9321
9322 rtx
9323 ix86_libcall_value (machine_mode mode)
9324 {
9325 return ix86_function_value_1 (NULL, NULL, mode, mode);
9326 }
9327
9328 /* Return true iff type is returned in memory. */
9329
9330 static bool
9331 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9332 {
9333 #ifdef SUBTARGET_RETURN_IN_MEMORY
9334 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
9335 #else
9336 const machine_mode mode = type_natural_mode (type, NULL, true);
9337 HOST_WIDE_INT size;
9338
9339 if (POINTER_BOUNDS_TYPE_P (type))
9340 return false;
9341
9342 if (TARGET_64BIT)
9343 {
9344 if (ix86_function_type_abi (fntype) == MS_ABI)
9345 {
9346 size = int_size_in_bytes (type);
9347
9348 /* __m128 is returned in xmm0. */
9349 if ((!type || VECTOR_INTEGER_TYPE_P (type)
9350 || INTEGRAL_TYPE_P (type)
9351 || VECTOR_FLOAT_TYPE_P (type))
9352 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
9353 && !COMPLEX_MODE_P (mode)
9354 && (GET_MODE_SIZE (mode) == 16 || size == 16))
9355 return false;
9356
9357 /* Otherwise, the size must be exactly in [1248]. */
9358 return size != 1 && size != 2 && size != 4 && size != 8;
9359 }
9360 else
9361 {
9362 int needed_intregs, needed_sseregs;
9363
9364 return examine_argument (mode, type, 1,
9365 &needed_intregs, &needed_sseregs);
9366 }
9367 }
9368 else
9369 {
9370 size = int_size_in_bytes (type);
9371
9372 /* Intel MCU psABI returns scalars and aggregates no larger than 8
9373 bytes in registers. */
9374 if (TARGET_IAMCU)
9375 return VECTOR_MODE_P (mode) || size < 0 || size > 8;
9376
9377 if (mode == BLKmode)
9378 return true;
9379
9380 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
9381 return false;
9382
9383 if (VECTOR_MODE_P (mode) || mode == TImode)
9384 {
9385 /* User-created vectors small enough to fit in EAX. */
9386 if (size < 8)
9387 return false;
9388
9389 /* Unless ABI prescibes otherwise,
9390 MMX/3dNow values are returned in MM0 if available. */
9391
9392 if (size == 8)
9393 return TARGET_VECT8_RETURNS || !TARGET_MMX;
9394
9395 /* SSE values are returned in XMM0 if available. */
9396 if (size == 16)
9397 return !TARGET_SSE;
9398
9399 /* AVX values are returned in YMM0 if available. */
9400 if (size == 32)
9401 return !TARGET_AVX;
9402
9403 /* AVX512F values are returned in ZMM0 if available. */
9404 if (size == 64)
9405 return !TARGET_AVX512F;
9406 }
9407
9408 if (mode == XFmode)
9409 return false;
9410
9411 if (size > 12)
9412 return true;
9413
9414 /* OImode shouldn't be used directly. */
9415 gcc_assert (mode != OImode);
9416
9417 return false;
9418 }
9419 #endif
9420 }
9421
9422 \f
9423 /* Create the va_list data type. */
9424
9425 static tree
9426 ix86_build_builtin_va_list_64 (void)
9427 {
9428 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
9429
9430 record = lang_hooks.types.make_type (RECORD_TYPE);
9431 type_decl = build_decl (BUILTINS_LOCATION,
9432 TYPE_DECL, get_identifier ("__va_list_tag"), record);
9433
9434 f_gpr = build_decl (BUILTINS_LOCATION,
9435 FIELD_DECL, get_identifier ("gp_offset"),
9436 unsigned_type_node);
9437 f_fpr = build_decl (BUILTINS_LOCATION,
9438 FIELD_DECL, get_identifier ("fp_offset"),
9439 unsigned_type_node);
9440 f_ovf = build_decl (BUILTINS_LOCATION,
9441 FIELD_DECL, get_identifier ("overflow_arg_area"),
9442 ptr_type_node);
9443 f_sav = build_decl (BUILTINS_LOCATION,
9444 FIELD_DECL, get_identifier ("reg_save_area"),
9445 ptr_type_node);
9446
9447 va_list_gpr_counter_field = f_gpr;
9448 va_list_fpr_counter_field = f_fpr;
9449
9450 DECL_FIELD_CONTEXT (f_gpr) = record;
9451 DECL_FIELD_CONTEXT (f_fpr) = record;
9452 DECL_FIELD_CONTEXT (f_ovf) = record;
9453 DECL_FIELD_CONTEXT (f_sav) = record;
9454
9455 TYPE_STUB_DECL (record) = type_decl;
9456 TYPE_NAME (record) = type_decl;
9457 TYPE_FIELDS (record) = f_gpr;
9458 DECL_CHAIN (f_gpr) = f_fpr;
9459 DECL_CHAIN (f_fpr) = f_ovf;
9460 DECL_CHAIN (f_ovf) = f_sav;
9461
9462 layout_type (record);
9463
9464 TYPE_ATTRIBUTES (record) = tree_cons (get_identifier ("sysv_abi va_list"),
9465 NULL_TREE, TYPE_ATTRIBUTES (record));
9466
9467 /* The correct type is an array type of one element. */
9468 return build_array_type (record, build_index_type (size_zero_node));
9469 }
9470
9471 /* Setup the builtin va_list data type and for 64-bit the additional
9472 calling convention specific va_list data types. */
9473
9474 static tree
9475 ix86_build_builtin_va_list (void)
9476 {
9477 if (TARGET_64BIT)
9478 {
9479 /* Initialize ABI specific va_list builtin types.
9480
9481 In lto1, we can encounter two va_list types:
9482 - one as a result of the type-merge across TUs, and
9483 - the one constructed here.
9484 These two types will not have the same TYPE_MAIN_VARIANT, and therefore
9485 a type identity check in canonical_va_list_type based on
9486 TYPE_MAIN_VARIANT (which we used to have) will not work.
9487 Instead, we tag each va_list_type_node with its unique attribute, and
9488 look for the attribute in the type identity check in
9489 canonical_va_list_type.
9490
9491 Tagging sysv_va_list_type_node directly with the attribute is
9492 problematic since it's a array of one record, which will degrade into a
9493 pointer to record when used as parameter (see build_va_arg comments for
9494 an example), dropping the attribute in the process. So we tag the
9495 record instead. */
9496
9497 /* For SYSV_ABI we use an array of one record. */
9498 sysv_va_list_type_node = ix86_build_builtin_va_list_64 ();
9499
9500 /* For MS_ABI we use plain pointer to argument area. */
9501 tree char_ptr_type = build_pointer_type (char_type_node);
9502 tree attr = tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE,
9503 TYPE_ATTRIBUTES (char_ptr_type));
9504 ms_va_list_type_node = build_type_attribute_variant (char_ptr_type, attr);
9505
9506 return ((ix86_abi == MS_ABI)
9507 ? ms_va_list_type_node
9508 : sysv_va_list_type_node);
9509 }
9510 else
9511 {
9512 /* For i386 we use plain pointer to argument area. */
9513 return build_pointer_type (char_type_node);
9514 }
9515 }
9516
9517 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
9518
9519 static void
9520 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
9521 {
9522 rtx save_area, mem;
9523 alias_set_type set;
9524 int i, max;
9525
9526 /* GPR size of varargs save area. */
9527 if (cfun->va_list_gpr_size)
9528 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
9529 else
9530 ix86_varargs_gpr_size = 0;
9531
9532 /* FPR size of varargs save area. We don't need it if we don't pass
9533 anything in SSE registers. */
9534 if (TARGET_SSE && cfun->va_list_fpr_size)
9535 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
9536 else
9537 ix86_varargs_fpr_size = 0;
9538
9539 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
9540 return;
9541
9542 save_area = frame_pointer_rtx;
9543 set = get_varargs_alias_set ();
9544
9545 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
9546 if (max > X86_64_REGPARM_MAX)
9547 max = X86_64_REGPARM_MAX;
9548
9549 for (i = cum->regno; i < max; i++)
9550 {
9551 mem = gen_rtx_MEM (word_mode,
9552 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
9553 MEM_NOTRAP_P (mem) = 1;
9554 set_mem_alias_set (mem, set);
9555 emit_move_insn (mem,
9556 gen_rtx_REG (word_mode,
9557 x86_64_int_parameter_registers[i]));
9558 }
9559
9560 if (ix86_varargs_fpr_size)
9561 {
9562 machine_mode smode;
9563 rtx_code_label *label;
9564 rtx test;
9565
9566 /* Now emit code to save SSE registers. The AX parameter contains number
9567 of SSE parameter registers used to call this function, though all we
9568 actually check here is the zero/non-zero status. */
9569
9570 label = gen_label_rtx ();
9571 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
9572 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
9573 label));
9574
9575 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
9576 we used movdqa (i.e. TImode) instead? Perhaps even better would
9577 be if we could determine the real mode of the data, via a hook
9578 into pass_stdarg. Ignore all that for now. */
9579 smode = V4SFmode;
9580 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
9581 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
9582
9583 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
9584 if (max > X86_64_SSE_REGPARM_MAX)
9585 max = X86_64_SSE_REGPARM_MAX;
9586
9587 for (i = cum->sse_regno; i < max; ++i)
9588 {
9589 mem = plus_constant (Pmode, save_area,
9590 i * 16 + ix86_varargs_gpr_size);
9591 mem = gen_rtx_MEM (smode, mem);
9592 MEM_NOTRAP_P (mem) = 1;
9593 set_mem_alias_set (mem, set);
9594 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
9595
9596 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
9597 }
9598
9599 emit_label (label);
9600 }
9601 }
9602
9603 static void
9604 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
9605 {
9606 alias_set_type set = get_varargs_alias_set ();
9607 int i;
9608
9609 /* Reset to zero, as there might be a sysv vaarg used
9610 before. */
9611 ix86_varargs_gpr_size = 0;
9612 ix86_varargs_fpr_size = 0;
9613
9614 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
9615 {
9616 rtx reg, mem;
9617
9618 mem = gen_rtx_MEM (Pmode,
9619 plus_constant (Pmode, virtual_incoming_args_rtx,
9620 i * UNITS_PER_WORD));
9621 MEM_NOTRAP_P (mem) = 1;
9622 set_mem_alias_set (mem, set);
9623
9624 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
9625 emit_move_insn (mem, reg);
9626 }
9627 }
9628
9629 static void
9630 ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
9631 tree type, int *, int no_rtl)
9632 {
9633 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9634 CUMULATIVE_ARGS next_cum;
9635 tree fntype;
9636
9637 /* This argument doesn't appear to be used anymore. Which is good,
9638 because the old code here didn't suppress rtl generation. */
9639 gcc_assert (!no_rtl);
9640
9641 if (!TARGET_64BIT)
9642 return;
9643
9644 fntype = TREE_TYPE (current_function_decl);
9645
9646 /* For varargs, we do not want to skip the dummy va_dcl argument.
9647 For stdargs, we do want to skip the last named argument. */
9648 next_cum = *cum;
9649 if (stdarg_p (fntype))
9650 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
9651 true);
9652
9653 if (cum->call_abi == MS_ABI)
9654 setup_incoming_varargs_ms_64 (&next_cum);
9655 else
9656 setup_incoming_varargs_64 (&next_cum);
9657 }
9658
9659 static void
9660 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
9661 machine_mode mode,
9662 tree type,
9663 int *pretend_size ATTRIBUTE_UNUSED,
9664 int no_rtl)
9665 {
9666 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9667 CUMULATIVE_ARGS next_cum;
9668 tree fntype;
9669 rtx save_area;
9670 int bnd_reg, i, max;
9671
9672 gcc_assert (!no_rtl);
9673
9674 /* Do nothing if we use plain pointer to argument area. */
9675 if (!TARGET_64BIT || cum->call_abi == MS_ABI)
9676 return;
9677
9678 fntype = TREE_TYPE (current_function_decl);
9679
9680 /* For varargs, we do not want to skip the dummy va_dcl argument.
9681 For stdargs, we do want to skip the last named argument. */
9682 next_cum = *cum;
9683 if (stdarg_p (fntype))
9684 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
9685 true);
9686 save_area = frame_pointer_rtx;
9687
9688 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
9689 if (max > X86_64_REGPARM_MAX)
9690 max = X86_64_REGPARM_MAX;
9691
9692 bnd_reg = cum->bnd_regno + cum->force_bnd_pass;
9693 if (chkp_function_instrumented_p (current_function_decl))
9694 for (i = cum->regno; i < max; i++)
9695 {
9696 rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD);
9697 rtx ptr = gen_rtx_REG (Pmode,
9698 x86_64_int_parameter_registers[i]);
9699 rtx bounds;
9700
9701 if (bnd_reg <= LAST_BND_REG)
9702 bounds = gen_rtx_REG (BNDmode, bnd_reg);
9703 else
9704 {
9705 rtx ldx_addr =
9706 plus_constant (Pmode, arg_pointer_rtx,
9707 (LAST_BND_REG - bnd_reg) * GET_MODE_SIZE (Pmode));
9708 bounds = gen_reg_rtx (BNDmode);
9709 emit_insn (BNDmode == BND64mode
9710 ? gen_bnd64_ldx (bounds, ldx_addr, ptr)
9711 : gen_bnd32_ldx (bounds, ldx_addr, ptr));
9712 }
9713
9714 emit_insn (BNDmode == BND64mode
9715 ? gen_bnd64_stx (addr, ptr, bounds)
9716 : gen_bnd32_stx (addr, ptr, bounds));
9717
9718 bnd_reg++;
9719 }
9720 }
9721
9722
9723 /* Checks if TYPE is of kind va_list char *. */
9724
9725 static bool
9726 is_va_list_char_pointer (tree type)
9727 {
9728 tree canonic;
9729
9730 /* For 32-bit it is always true. */
9731 if (!TARGET_64BIT)
9732 return true;
9733 canonic = ix86_canonical_va_list_type (type);
9734 return (canonic == ms_va_list_type_node
9735 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
9736 }
9737
9738 /* Implement va_start. */
9739
9740 static void
9741 ix86_va_start (tree valist, rtx nextarg)
9742 {
9743 HOST_WIDE_INT words, n_gpr, n_fpr;
9744 tree f_gpr, f_fpr, f_ovf, f_sav;
9745 tree gpr, fpr, ovf, sav, t;
9746 tree type;
9747 rtx ovf_rtx;
9748
9749 if (flag_split_stack
9750 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
9751 {
9752 unsigned int scratch_regno;
9753
9754 /* When we are splitting the stack, we can't refer to the stack
9755 arguments using internal_arg_pointer, because they may be on
9756 the old stack. The split stack prologue will arrange to
9757 leave a pointer to the old stack arguments in a scratch
9758 register, which we here copy to a pseudo-register. The split
9759 stack prologue can't set the pseudo-register directly because
9760 it (the prologue) runs before any registers have been saved. */
9761
9762 scratch_regno = split_stack_prologue_scratch_regno ();
9763 if (scratch_regno != INVALID_REGNUM)
9764 {
9765 rtx reg;
9766 rtx_insn *seq;
9767
9768 reg = gen_reg_rtx (Pmode);
9769 cfun->machine->split_stack_varargs_pointer = reg;
9770
9771 start_sequence ();
9772 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
9773 seq = get_insns ();
9774 end_sequence ();
9775
9776 push_topmost_sequence ();
9777 emit_insn_after (seq, entry_of_function ());
9778 pop_topmost_sequence ();
9779 }
9780 }
9781
9782 /* Only 64bit target needs something special. */
9783 if (is_va_list_char_pointer (TREE_TYPE (valist)))
9784 {
9785 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
9786 std_expand_builtin_va_start (valist, nextarg);
9787 else
9788 {
9789 rtx va_r, next;
9790
9791 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
9792 next = expand_binop (ptr_mode, add_optab,
9793 cfun->machine->split_stack_varargs_pointer,
9794 crtl->args.arg_offset_rtx,
9795 NULL_RTX, 0, OPTAB_LIB_WIDEN);
9796 convert_move (va_r, next, 0);
9797
9798 /* Store zero bounds for va_list. */
9799 if (chkp_function_instrumented_p (current_function_decl))
9800 chkp_expand_bounds_reset_for_mem (valist,
9801 make_tree (TREE_TYPE (valist),
9802 next));
9803
9804 }
9805 return;
9806 }
9807
9808 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
9809 f_fpr = DECL_CHAIN (f_gpr);
9810 f_ovf = DECL_CHAIN (f_fpr);
9811 f_sav = DECL_CHAIN (f_ovf);
9812
9813 valist = build_simple_mem_ref (valist);
9814 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
9815 /* The following should be folded into the MEM_REF offset. */
9816 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
9817 f_gpr, NULL_TREE);
9818 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
9819 f_fpr, NULL_TREE);
9820 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
9821 f_ovf, NULL_TREE);
9822 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
9823 f_sav, NULL_TREE);
9824
9825 /* Count number of gp and fp argument registers used. */
9826 words = crtl->args.info.words;
9827 n_gpr = crtl->args.info.regno;
9828 n_fpr = crtl->args.info.sse_regno;
9829
9830 if (cfun->va_list_gpr_size)
9831 {
9832 type = TREE_TYPE (gpr);
9833 t = build2 (MODIFY_EXPR, type,
9834 gpr, build_int_cst (type, n_gpr * 8));
9835 TREE_SIDE_EFFECTS (t) = 1;
9836 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9837 }
9838
9839 if (TARGET_SSE && cfun->va_list_fpr_size)
9840 {
9841 type = TREE_TYPE (fpr);
9842 t = build2 (MODIFY_EXPR, type, fpr,
9843 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
9844 TREE_SIDE_EFFECTS (t) = 1;
9845 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9846 }
9847
9848 /* Find the overflow area. */
9849 type = TREE_TYPE (ovf);
9850 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
9851 ovf_rtx = crtl->args.internal_arg_pointer;
9852 else
9853 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
9854 t = make_tree (type, ovf_rtx);
9855 if (words != 0)
9856 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
9857
9858 /* Store zero bounds for overflow area pointer. */
9859 if (chkp_function_instrumented_p (current_function_decl))
9860 chkp_expand_bounds_reset_for_mem (ovf, t);
9861
9862 t = build2 (MODIFY_EXPR, type, ovf, t);
9863 TREE_SIDE_EFFECTS (t) = 1;
9864 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9865
9866 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
9867 {
9868 /* Find the register save area.
9869 Prologue of the function save it right above stack frame. */
9870 type = TREE_TYPE (sav);
9871 t = make_tree (type, frame_pointer_rtx);
9872 if (!ix86_varargs_gpr_size)
9873 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
9874
9875 /* Store zero bounds for save area pointer. */
9876 if (chkp_function_instrumented_p (current_function_decl))
9877 chkp_expand_bounds_reset_for_mem (sav, t);
9878
9879 t = build2 (MODIFY_EXPR, type, sav, t);
9880 TREE_SIDE_EFFECTS (t) = 1;
9881 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9882 }
9883 }
9884
9885 /* Implement va_arg. */
9886
9887 static tree
9888 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
9889 gimple_seq *post_p)
9890 {
9891 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
9892 tree f_gpr, f_fpr, f_ovf, f_sav;
9893 tree gpr, fpr, ovf, sav, t;
9894 int size, rsize;
9895 tree lab_false, lab_over = NULL_TREE;
9896 tree addr, t2;
9897 rtx container;
9898 int indirect_p = 0;
9899 tree ptrtype;
9900 machine_mode nat_mode;
9901 unsigned int arg_boundary;
9902
9903 /* Only 64bit target needs something special. */
9904 if (is_va_list_char_pointer (TREE_TYPE (valist)))
9905 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
9906
9907 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
9908 f_fpr = DECL_CHAIN (f_gpr);
9909 f_ovf = DECL_CHAIN (f_fpr);
9910 f_sav = DECL_CHAIN (f_ovf);
9911
9912 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
9913 valist, f_gpr, NULL_TREE);
9914
9915 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
9916 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
9917 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
9918
9919 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
9920 if (indirect_p)
9921 type = build_pointer_type (type);
9922 size = arg_int_size_in_bytes (type);
9923 rsize = CEIL (size, UNITS_PER_WORD);
9924
9925 nat_mode = type_natural_mode (type, NULL, false);
9926 switch (nat_mode)
9927 {
9928 case E_V8SFmode:
9929 case E_V8SImode:
9930 case E_V32QImode:
9931 case E_V16HImode:
9932 case E_V4DFmode:
9933 case E_V4DImode:
9934 case E_V16SFmode:
9935 case E_V16SImode:
9936 case E_V64QImode:
9937 case E_V32HImode:
9938 case E_V8DFmode:
9939 case E_V8DImode:
9940 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
9941 if (!TARGET_64BIT_MS_ABI)
9942 {
9943 container = NULL;
9944 break;
9945 }
9946 /* FALLTHRU */
9947
9948 default:
9949 container = construct_container (nat_mode, TYPE_MODE (type),
9950 type, 0, X86_64_REGPARM_MAX,
9951 X86_64_SSE_REGPARM_MAX, intreg,
9952 0);
9953 break;
9954 }
9955
9956 /* Pull the value out of the saved registers. */
9957
9958 addr = create_tmp_var (ptr_type_node, "addr");
9959
9960 if (container)
9961 {
9962 int needed_intregs, needed_sseregs;
9963 bool need_temp;
9964 tree int_addr, sse_addr;
9965
9966 lab_false = create_artificial_label (UNKNOWN_LOCATION);
9967 lab_over = create_artificial_label (UNKNOWN_LOCATION);
9968
9969 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
9970
9971 need_temp = (!REG_P (container)
9972 && ((needed_intregs && TYPE_ALIGN (type) > 64)
9973 || TYPE_ALIGN (type) > 128));
9974
9975 /* In case we are passing structure, verify that it is consecutive block
9976 on the register save area. If not we need to do moves. */
9977 if (!need_temp && !REG_P (container))
9978 {
9979 /* Verify that all registers are strictly consecutive */
9980 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
9981 {
9982 int i;
9983
9984 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9985 {
9986 rtx slot = XVECEXP (container, 0, i);
9987 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
9988 || INTVAL (XEXP (slot, 1)) != i * 16)
9989 need_temp = true;
9990 }
9991 }
9992 else
9993 {
9994 int i;
9995
9996 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9997 {
9998 rtx slot = XVECEXP (container, 0, i);
9999 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
10000 || INTVAL (XEXP (slot, 1)) != i * 8)
10001 need_temp = true;
10002 }
10003 }
10004 }
10005 if (!need_temp)
10006 {
10007 int_addr = addr;
10008 sse_addr = addr;
10009 }
10010 else
10011 {
10012 int_addr = create_tmp_var (ptr_type_node, "int_addr");
10013 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
10014 }
10015
10016 /* First ensure that we fit completely in registers. */
10017 if (needed_intregs)
10018 {
10019 t = build_int_cst (TREE_TYPE (gpr),
10020 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
10021 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
10022 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
10023 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
10024 gimplify_and_add (t, pre_p);
10025 }
10026 if (needed_sseregs)
10027 {
10028 t = build_int_cst (TREE_TYPE (fpr),
10029 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
10030 + X86_64_REGPARM_MAX * 8);
10031 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
10032 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
10033 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
10034 gimplify_and_add (t, pre_p);
10035 }
10036
10037 /* Compute index to start of area used for integer regs. */
10038 if (needed_intregs)
10039 {
10040 /* int_addr = gpr + sav; */
10041 t = fold_build_pointer_plus (sav, gpr);
10042 gimplify_assign (int_addr, t, pre_p);
10043 }
10044 if (needed_sseregs)
10045 {
10046 /* sse_addr = fpr + sav; */
10047 t = fold_build_pointer_plus (sav, fpr);
10048 gimplify_assign (sse_addr, t, pre_p);
10049 }
10050 if (need_temp)
10051 {
10052 int i, prev_size = 0;
10053 tree temp = create_tmp_var (type, "va_arg_tmp");
10054
10055 /* addr = &temp; */
10056 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
10057 gimplify_assign (addr, t, pre_p);
10058
10059 for (i = 0; i < XVECLEN (container, 0); i++)
10060 {
10061 rtx slot = XVECEXP (container, 0, i);
10062 rtx reg = XEXP (slot, 0);
10063 machine_mode mode = GET_MODE (reg);
10064 tree piece_type;
10065 tree addr_type;
10066 tree daddr_type;
10067 tree src_addr, src;
10068 int src_offset;
10069 tree dest_addr, dest;
10070 int cur_size = GET_MODE_SIZE (mode);
10071
10072 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
10073 prev_size = INTVAL (XEXP (slot, 1));
10074 if (prev_size + cur_size > size)
10075 {
10076 cur_size = size - prev_size;
10077 unsigned int nbits = cur_size * BITS_PER_UNIT;
10078 if (!int_mode_for_size (nbits, 1).exists (&mode))
10079 mode = QImode;
10080 }
10081 piece_type = lang_hooks.types.type_for_mode (mode, 1);
10082 if (mode == GET_MODE (reg))
10083 addr_type = build_pointer_type (piece_type);
10084 else
10085 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
10086 true);
10087 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
10088 true);
10089
10090 if (SSE_REGNO_P (REGNO (reg)))
10091 {
10092 src_addr = sse_addr;
10093 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
10094 }
10095 else
10096 {
10097 src_addr = int_addr;
10098 src_offset = REGNO (reg) * 8;
10099 }
10100 src_addr = fold_convert (addr_type, src_addr);
10101 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
10102
10103 dest_addr = fold_convert (daddr_type, addr);
10104 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
10105 if (cur_size == GET_MODE_SIZE (mode))
10106 {
10107 src = build_va_arg_indirect_ref (src_addr);
10108 dest = build_va_arg_indirect_ref (dest_addr);
10109
10110 gimplify_assign (dest, src, pre_p);
10111 }
10112 else
10113 {
10114 tree copy
10115 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
10116 3, dest_addr, src_addr,
10117 size_int (cur_size));
10118 gimplify_and_add (copy, pre_p);
10119 }
10120 prev_size += cur_size;
10121 }
10122 }
10123
10124 if (needed_intregs)
10125 {
10126 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
10127 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
10128 gimplify_assign (gpr, t, pre_p);
10129 }
10130
10131 if (needed_sseregs)
10132 {
10133 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
10134 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
10135 gimplify_assign (unshare_expr (fpr), t, pre_p);
10136 }
10137
10138 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
10139
10140 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
10141 }
10142
10143 /* ... otherwise out of the overflow area. */
10144
10145 /* When we align parameter on stack for caller, if the parameter
10146 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
10147 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
10148 here with caller. */
10149 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
10150 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
10151 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
10152
10153 /* Care for on-stack alignment if needed. */
10154 if (arg_boundary <= 64 || size == 0)
10155 t = ovf;
10156 else
10157 {
10158 HOST_WIDE_INT align = arg_boundary / 8;
10159 t = fold_build_pointer_plus_hwi (ovf, align - 1);
10160 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
10161 build_int_cst (TREE_TYPE (t), -align));
10162 }
10163
10164 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
10165 gimplify_assign (addr, t, pre_p);
10166
10167 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
10168 gimplify_assign (unshare_expr (ovf), t, pre_p);
10169
10170 if (container)
10171 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
10172
10173 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
10174 addr = fold_convert (ptrtype, addr);
10175
10176 if (indirect_p)
10177 addr = build_va_arg_indirect_ref (addr);
10178 return build_va_arg_indirect_ref (addr);
10179 }
10180 \f
10181 /* Return true if OPNUM's MEM should be matched
10182 in movabs* patterns. */
10183
10184 bool
10185 ix86_check_movabs (rtx insn, int opnum)
10186 {
10187 rtx set, mem;
10188
10189 set = PATTERN (insn);
10190 if (GET_CODE (set) == PARALLEL)
10191 set = XVECEXP (set, 0, 0);
10192 gcc_assert (GET_CODE (set) == SET);
10193 mem = XEXP (set, opnum);
10194 while (SUBREG_P (mem))
10195 mem = SUBREG_REG (mem);
10196 gcc_assert (MEM_P (mem));
10197 return volatile_ok || !MEM_VOLATILE_P (mem);
10198 }
10199
10200 /* Return false if INSN contains a MEM with a non-default address space. */
10201 bool
10202 ix86_check_no_addr_space (rtx insn)
10203 {
10204 subrtx_var_iterator::array_type array;
10205 FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL)
10206 {
10207 rtx x = *iter;
10208 if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)))
10209 return false;
10210 }
10211 return true;
10212 }
10213 \f
10214 /* Initialize the table of extra 80387 mathematical constants. */
10215
10216 static void
10217 init_ext_80387_constants (void)
10218 {
10219 static const char * cst[5] =
10220 {
10221 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
10222 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
10223 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
10224 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
10225 "3.1415926535897932385128089594061862044", /* 4: fldpi */
10226 };
10227 int i;
10228
10229 for (i = 0; i < 5; i++)
10230 {
10231 real_from_string (&ext_80387_constants_table[i], cst[i]);
10232 /* Ensure each constant is rounded to XFmode precision. */
10233 real_convert (&ext_80387_constants_table[i],
10234 XFmode, &ext_80387_constants_table[i]);
10235 }
10236
10237 ext_80387_constants_init = 1;
10238 }
10239
10240 /* Return non-zero if the constant is something that
10241 can be loaded with a special instruction. */
10242
10243 int
10244 standard_80387_constant_p (rtx x)
10245 {
10246 machine_mode mode = GET_MODE (x);
10247
10248 const REAL_VALUE_TYPE *r;
10249
10250 if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
10251 return -1;
10252
10253 if (x == CONST0_RTX (mode))
10254 return 1;
10255 if (x == CONST1_RTX (mode))
10256 return 2;
10257
10258 r = CONST_DOUBLE_REAL_VALUE (x);
10259
10260 /* For XFmode constants, try to find a special 80387 instruction when
10261 optimizing for size or on those CPUs that benefit from them. */
10262 if (mode == XFmode
10263 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
10264 {
10265 int i;
10266
10267 if (! ext_80387_constants_init)
10268 init_ext_80387_constants ();
10269
10270 for (i = 0; i < 5; i++)
10271 if (real_identical (r, &ext_80387_constants_table[i]))
10272 return i + 3;
10273 }
10274
10275 /* Load of the constant -0.0 or -1.0 will be split as
10276 fldz;fchs or fld1;fchs sequence. */
10277 if (real_isnegzero (r))
10278 return 8;
10279 if (real_identical (r, &dconstm1))
10280 return 9;
10281
10282 return 0;
10283 }
10284
10285 /* Return the opcode of the special instruction to be used to load
10286 the constant X. */
10287
10288 const char *
10289 standard_80387_constant_opcode (rtx x)
10290 {
10291 switch (standard_80387_constant_p (x))
10292 {
10293 case 1:
10294 return "fldz";
10295 case 2:
10296 return "fld1";
10297 case 3:
10298 return "fldlg2";
10299 case 4:
10300 return "fldln2";
10301 case 5:
10302 return "fldl2e";
10303 case 6:
10304 return "fldl2t";
10305 case 7:
10306 return "fldpi";
10307 case 8:
10308 case 9:
10309 return "#";
10310 default:
10311 gcc_unreachable ();
10312 }
10313 }
10314
10315 /* Return the CONST_DOUBLE representing the 80387 constant that is
10316 loaded by the specified special instruction. The argument IDX
10317 matches the return value from standard_80387_constant_p. */
10318
10319 rtx
10320 standard_80387_constant_rtx (int idx)
10321 {
10322 int i;
10323
10324 if (! ext_80387_constants_init)
10325 init_ext_80387_constants ();
10326
10327 switch (idx)
10328 {
10329 case 3:
10330 case 4:
10331 case 5:
10332 case 6:
10333 case 7:
10334 i = idx - 3;
10335 break;
10336
10337 default:
10338 gcc_unreachable ();
10339 }
10340
10341 return const_double_from_real_value (ext_80387_constants_table[i],
10342 XFmode);
10343 }
10344
10345 /* Return 1 if X is all bits 0 and 2 if X is all bits 1
10346 in supported SSE/AVX vector mode. */
10347
10348 int
10349 standard_sse_constant_p (rtx x, machine_mode pred_mode)
10350 {
10351 machine_mode mode;
10352
10353 if (!TARGET_SSE)
10354 return 0;
10355
10356 mode = GET_MODE (x);
10357
10358 if (x == const0_rtx || const0_operand (x, mode))
10359 return 1;
10360
10361 if (x == constm1_rtx || vector_all_ones_operand (x, mode))
10362 {
10363 /* VOIDmode integer constant, get mode from the predicate. */
10364 if (mode == VOIDmode)
10365 mode = pred_mode;
10366
10367 switch (GET_MODE_SIZE (mode))
10368 {
10369 case 64:
10370 if (TARGET_AVX512F)
10371 return 2;
10372 break;
10373 case 32:
10374 if (TARGET_AVX2)
10375 return 2;
10376 break;
10377 case 16:
10378 if (TARGET_SSE2)
10379 return 2;
10380 break;
10381 case 0:
10382 /* VOIDmode */
10383 gcc_unreachable ();
10384 default:
10385 break;
10386 }
10387 }
10388
10389 return 0;
10390 }
10391
10392 /* Return the opcode of the special instruction to be used to load
10393 the constant operands[1] into operands[0]. */
10394
10395 const char *
10396 standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
10397 {
10398 machine_mode mode;
10399 rtx x = operands[1];
10400
10401 gcc_assert (TARGET_SSE);
10402
10403 mode = GET_MODE (x);
10404
10405 if (x == const0_rtx || const0_operand (x, mode))
10406 {
10407 switch (get_attr_mode (insn))
10408 {
10409 case MODE_TI:
10410 if (!EXT_REX_SSE_REG_P (operands[0]))
10411 return "%vpxor\t%0, %d0";
10412 /* FALLTHRU */
10413 case MODE_XI:
10414 case MODE_OI:
10415 if (EXT_REX_SSE_REG_P (operands[0]))
10416 return (TARGET_AVX512VL
10417 ? "vpxord\t%x0, %x0, %x0"
10418 : "vpxord\t%g0, %g0, %g0");
10419 return "vpxor\t%x0, %x0, %x0";
10420
10421 case MODE_V2DF:
10422 if (!EXT_REX_SSE_REG_P (operands[0]))
10423 return "%vxorpd\t%0, %d0";
10424 /* FALLTHRU */
10425 case MODE_V8DF:
10426 case MODE_V4DF:
10427 if (!EXT_REX_SSE_REG_P (operands[0]))
10428 return "vxorpd\t%x0, %x0, %x0";
10429 else if (TARGET_AVX512DQ)
10430 return (TARGET_AVX512VL
10431 ? "vxorpd\t%x0, %x0, %x0"
10432 : "vxorpd\t%g0, %g0, %g0");
10433 else
10434 return (TARGET_AVX512VL
10435 ? "vpxorq\t%x0, %x0, %x0"
10436 : "vpxorq\t%g0, %g0, %g0");
10437
10438 case MODE_V4SF:
10439 if (!EXT_REX_SSE_REG_P (operands[0]))
10440 return "%vxorps\t%0, %d0";
10441 /* FALLTHRU */
10442 case MODE_V16SF:
10443 case MODE_V8SF:
10444 if (!EXT_REX_SSE_REG_P (operands[0]))
10445 return "vxorps\t%x0, %x0, %x0";
10446 else if (TARGET_AVX512DQ)
10447 return (TARGET_AVX512VL
10448 ? "vxorps\t%x0, %x0, %x0"
10449 : "vxorps\t%g0, %g0, %g0");
10450 else
10451 return (TARGET_AVX512VL
10452 ? "vpxord\t%x0, %x0, %x0"
10453 : "vpxord\t%g0, %g0, %g0");
10454
10455 default:
10456 gcc_unreachable ();
10457 }
10458 }
10459 else if (x == constm1_rtx || vector_all_ones_operand (x, mode))
10460 {
10461 enum attr_mode insn_mode = get_attr_mode (insn);
10462
10463 switch (insn_mode)
10464 {
10465 case MODE_XI:
10466 case MODE_V8DF:
10467 case MODE_V16SF:
10468 gcc_assert (TARGET_AVX512F);
10469 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
10470
10471 case MODE_OI:
10472 case MODE_V4DF:
10473 case MODE_V8SF:
10474 gcc_assert (TARGET_AVX2);
10475 /* FALLTHRU */
10476 case MODE_TI:
10477 case MODE_V2DF:
10478 case MODE_V4SF:
10479 gcc_assert (TARGET_SSE2);
10480 if (!EXT_REX_SSE_REG_P (operands[0]))
10481 return (TARGET_AVX
10482 ? "vpcmpeqd\t%0, %0, %0"
10483 : "pcmpeqd\t%0, %0");
10484 else if (TARGET_AVX512VL)
10485 return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
10486 else
10487 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
10488
10489 default:
10490 gcc_unreachable ();
10491 }
10492 }
10493
10494 gcc_unreachable ();
10495 }
10496
10497 /* Returns true if INSN can be transformed from a memory load
10498 to a supported FP constant load. */
10499
10500 bool
10501 ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst)
10502 {
10503 rtx src = find_constant_src (insn);
10504
10505 gcc_assert (REG_P (dst));
10506
10507 if (src == NULL
10508 || (SSE_REGNO_P (REGNO (dst))
10509 && standard_sse_constant_p (src, GET_MODE (dst)) != 1)
10510 || (STACK_REGNO_P (REGNO (dst))
10511 && standard_80387_constant_p (src) < 1))
10512 return false;
10513
10514 return true;
10515 }
10516
10517 /* Returns true if OP contains a symbol reference */
10518
10519 bool
10520 symbolic_reference_mentioned_p (rtx op)
10521 {
10522 const char *fmt;
10523 int i;
10524
10525 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
10526 return true;
10527
10528 fmt = GET_RTX_FORMAT (GET_CODE (op));
10529 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
10530 {
10531 if (fmt[i] == 'E')
10532 {
10533 int j;
10534
10535 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
10536 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
10537 return true;
10538 }
10539
10540 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
10541 return true;
10542 }
10543
10544 return false;
10545 }
10546
10547 /* Return true if it is appropriate to emit `ret' instructions in the
10548 body of a function. Do this only if the epilogue is simple, needing a
10549 couple of insns. Prior to reloading, we can't tell how many registers
10550 must be saved, so return false then. Return false if there is no frame
10551 marker to de-allocate. */
10552
10553 bool
10554 ix86_can_use_return_insn_p (void)
10555 {
10556 if (ix86_function_naked (current_function_decl))
10557 return false;
10558
10559 /* Don't use `ret' instruction in interrupt handler. */
10560 if (! reload_completed
10561 || frame_pointer_needed
10562 || cfun->machine->func_type != TYPE_NORMAL)
10563 return 0;
10564
10565 /* Don't allow more than 32k pop, since that's all we can do
10566 with one instruction. */
10567 if (crtl->args.pops_args && crtl->args.size >= 32768)
10568 return 0;
10569
10570 struct ix86_frame &frame = cfun->machine->frame;
10571 return (frame.stack_pointer_offset == UNITS_PER_WORD
10572 && (frame.nregs + frame.nsseregs) == 0);
10573 }
10574 \f
10575 /* Value should be nonzero if functions must have frame pointers.
10576 Zero means the frame pointer need not be set up (and parms may
10577 be accessed via the stack pointer) in functions that seem suitable. */
10578
10579 static bool
10580 ix86_frame_pointer_required (void)
10581 {
10582 /* If we accessed previous frames, then the generated code expects
10583 to be able to access the saved ebp value in our frame. */
10584 if (cfun->machine->accesses_prev_frame)
10585 return true;
10586
10587 /* Several x86 os'es need a frame pointer for other reasons,
10588 usually pertaining to setjmp. */
10589 if (SUBTARGET_FRAME_POINTER_REQUIRED)
10590 return true;
10591
10592 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
10593 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
10594 return true;
10595
10596 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
10597 allocation is 4GB. */
10598 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
10599 return true;
10600
10601 /* SSE saves require frame-pointer when stack is misaligned. */
10602 if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128)
10603 return true;
10604
10605 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
10606 turns off the frame pointer by default. Turn it back on now if
10607 we've not got a leaf function. */
10608 if (TARGET_OMIT_LEAF_FRAME_POINTER
10609 && (!crtl->is_leaf
10610 || ix86_current_function_calls_tls_descriptor))
10611 return true;
10612
10613 if (crtl->profile && !flag_fentry)
10614 return true;
10615
10616 return false;
10617 }
10618
10619 /* Record that the current function accesses previous call frames. */
10620
10621 void
10622 ix86_setup_frame_addresses (void)
10623 {
10624 cfun->machine->accesses_prev_frame = 1;
10625 }
10626 \f
10627 #ifndef USE_HIDDEN_LINKONCE
10628 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
10629 # define USE_HIDDEN_LINKONCE 1
10630 # else
10631 # define USE_HIDDEN_LINKONCE 0
10632 # endif
10633 #endif
10634
10635 static int pic_labels_used;
10636
10637 /* Fills in the label name that should be used for a pc thunk for
10638 the given register. */
10639
10640 static void
10641 get_pc_thunk_name (char name[32], unsigned int regno)
10642 {
10643 gcc_assert (!TARGET_64BIT);
10644
10645 if (USE_HIDDEN_LINKONCE)
10646 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
10647 else
10648 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
10649 }
10650
10651
10652 /* This function generates code for -fpic that loads %ebx with
10653 the return address of the caller and then returns. */
10654
10655 static void
10656 ix86_code_end (void)
10657 {
10658 rtx xops[2];
10659 int regno;
10660
10661 for (regno = FIRST_INT_REG; regno <= LAST_INT_REG; regno++)
10662 {
10663 char name[32];
10664 tree decl;
10665
10666 if (!(pic_labels_used & (1 << regno)))
10667 continue;
10668
10669 get_pc_thunk_name (name, regno);
10670
10671 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
10672 get_identifier (name),
10673 build_function_type_list (void_type_node, NULL_TREE));
10674 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
10675 NULL_TREE, void_type_node);
10676 TREE_PUBLIC (decl) = 1;
10677 TREE_STATIC (decl) = 1;
10678 DECL_IGNORED_P (decl) = 1;
10679
10680 #if TARGET_MACHO
10681 if (TARGET_MACHO)
10682 {
10683 switch_to_section (darwin_sections[picbase_thunk_section]);
10684 fputs ("\t.weak_definition\t", asm_out_file);
10685 assemble_name (asm_out_file, name);
10686 fputs ("\n\t.private_extern\t", asm_out_file);
10687 assemble_name (asm_out_file, name);
10688 putc ('\n', asm_out_file);
10689 ASM_OUTPUT_LABEL (asm_out_file, name);
10690 DECL_WEAK (decl) = 1;
10691 }
10692 else
10693 #endif
10694 if (USE_HIDDEN_LINKONCE)
10695 {
10696 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
10697
10698 targetm.asm_out.unique_section (decl, 0);
10699 switch_to_section (get_named_section (decl, NULL, 0));
10700
10701 targetm.asm_out.globalize_label (asm_out_file, name);
10702 fputs ("\t.hidden\t", asm_out_file);
10703 assemble_name (asm_out_file, name);
10704 putc ('\n', asm_out_file);
10705 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
10706 }
10707 else
10708 {
10709 switch_to_section (text_section);
10710 ASM_OUTPUT_LABEL (asm_out_file, name);
10711 }
10712
10713 DECL_INITIAL (decl) = make_node (BLOCK);
10714 current_function_decl = decl;
10715 allocate_struct_function (decl, false);
10716 init_function_start (decl);
10717 /* We're about to hide the function body from callees of final_* by
10718 emitting it directly; tell them we're a thunk, if they care. */
10719 cfun->is_thunk = true;
10720 first_function_block_is_cold = false;
10721 /* Make sure unwind info is emitted for the thunk if needed. */
10722 final_start_function (emit_barrier (), asm_out_file, 1);
10723
10724 /* Pad stack IP move with 4 instructions (two NOPs count
10725 as one instruction). */
10726 if (TARGET_PAD_SHORT_FUNCTION)
10727 {
10728 int i = 8;
10729
10730 while (i--)
10731 fputs ("\tnop\n", asm_out_file);
10732 }
10733
10734 xops[0] = gen_rtx_REG (Pmode, regno);
10735 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
10736 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
10737 output_asm_insn ("%!ret", NULL);
10738 final_end_function ();
10739 init_insn_lengths ();
10740 free_after_compilation (cfun);
10741 set_cfun (NULL);
10742 current_function_decl = NULL;
10743 }
10744
10745 if (flag_split_stack)
10746 file_end_indicate_split_stack ();
10747 }
10748
10749 /* Emit code for the SET_GOT patterns. */
10750
10751 const char *
10752 output_set_got (rtx dest, rtx label)
10753 {
10754 rtx xops[3];
10755
10756 xops[0] = dest;
10757
10758 if (TARGET_VXWORKS_RTP && flag_pic)
10759 {
10760 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
10761 xops[2] = gen_rtx_MEM (Pmode,
10762 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
10763 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
10764
10765 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
10766 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
10767 an unadorned address. */
10768 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
10769 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
10770 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
10771 return "";
10772 }
10773
10774 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
10775
10776 if (flag_pic)
10777 {
10778 char name[32];
10779 get_pc_thunk_name (name, REGNO (dest));
10780 pic_labels_used |= 1 << REGNO (dest);
10781
10782 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
10783 xops[2] = gen_rtx_MEM (QImode, xops[2]);
10784 output_asm_insn ("%!call\t%X2", xops);
10785
10786 #if TARGET_MACHO
10787 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
10788 This is what will be referenced by the Mach-O PIC subsystem. */
10789 if (machopic_should_output_picbase_label () || !label)
10790 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
10791
10792 /* When we are restoring the pic base at the site of a nonlocal label,
10793 and we decided to emit the pic base above, we will still output a
10794 local label used for calculating the correction offset (even though
10795 the offset will be 0 in that case). */
10796 if (label)
10797 targetm.asm_out.internal_label (asm_out_file, "L",
10798 CODE_LABEL_NUMBER (label));
10799 #endif
10800 }
10801 else
10802 {
10803 if (TARGET_MACHO)
10804 /* We don't need a pic base, we're not producing pic. */
10805 gcc_unreachable ();
10806
10807 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
10808 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
10809 targetm.asm_out.internal_label (asm_out_file, "L",
10810 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
10811 }
10812
10813 if (!TARGET_MACHO)
10814 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
10815
10816 return "";
10817 }
10818
10819 /* Generate an "push" pattern for input ARG. */
10820
10821 static rtx
10822 gen_push (rtx arg)
10823 {
10824 struct machine_function *m = cfun->machine;
10825
10826 if (m->fs.cfa_reg == stack_pointer_rtx)
10827 m->fs.cfa_offset += UNITS_PER_WORD;
10828 m->fs.sp_offset += UNITS_PER_WORD;
10829
10830 if (REG_P (arg) && GET_MODE (arg) != word_mode)
10831 arg = gen_rtx_REG (word_mode, REGNO (arg));
10832
10833 return gen_rtx_SET (gen_rtx_MEM (word_mode,
10834 gen_rtx_PRE_DEC (Pmode,
10835 stack_pointer_rtx)),
10836 arg);
10837 }
10838
10839 /* Generate an "pop" pattern for input ARG. */
10840
10841 static rtx
10842 gen_pop (rtx arg)
10843 {
10844 if (REG_P (arg) && GET_MODE (arg) != word_mode)
10845 arg = gen_rtx_REG (word_mode, REGNO (arg));
10846
10847 return gen_rtx_SET (arg,
10848 gen_rtx_MEM (word_mode,
10849 gen_rtx_POST_INC (Pmode,
10850 stack_pointer_rtx)));
10851 }
10852
10853 /* Return >= 0 if there is an unused call-clobbered register available
10854 for the entire function. */
10855
10856 static unsigned int
10857 ix86_select_alt_pic_regnum (void)
10858 {
10859 if (ix86_use_pseudo_pic_reg ())
10860 return INVALID_REGNUM;
10861
10862 if (crtl->is_leaf
10863 && !crtl->profile
10864 && !ix86_current_function_calls_tls_descriptor)
10865 {
10866 int i, drap;
10867 /* Can't use the same register for both PIC and DRAP. */
10868 if (crtl->drap_reg)
10869 drap = REGNO (crtl->drap_reg);
10870 else
10871 drap = -1;
10872 for (i = 2; i >= 0; --i)
10873 if (i != drap && !df_regs_ever_live_p (i))
10874 return i;
10875 }
10876
10877 return INVALID_REGNUM;
10878 }
10879
10880 /* Return true if REGNO is used by the epilogue. */
10881
10882 bool
10883 ix86_epilogue_uses (int regno)
10884 {
10885 /* If there are no caller-saved registers, we preserve all registers,
10886 except for MMX and x87 registers which aren't supported when saving
10887 and restoring registers. Don't explicitly save SP register since
10888 it is always preserved. */
10889 return (epilogue_completed
10890 && cfun->machine->no_caller_saved_registers
10891 && !fixed_regs[regno]
10892 && !STACK_REGNO_P (regno)
10893 && !MMX_REGNO_P (regno));
10894 }
10895
10896 /* Return nonzero if register REGNO can be used as a scratch register
10897 in peephole2. */
10898
10899 static bool
10900 ix86_hard_regno_scratch_ok (unsigned int regno)
10901 {
10902 /* If there are no caller-saved registers, we can't use any register
10903 as a scratch register after epilogue and use REGNO as scratch
10904 register only if it has been used before to avoid saving and
10905 restoring it. */
10906 return (!cfun->machine->no_caller_saved_registers
10907 || (!epilogue_completed
10908 && df_regs_ever_live_p (regno)));
10909 }
10910
10911 /* Return true if register class CL should be an additional allocno
10912 class. */
10913
10914 static bool
10915 ix86_additional_allocno_class_p (reg_class_t cl)
10916 {
10917 return cl == MOD4_SSE_REGS;
10918 }
10919
10920 /* Return TRUE if we need to save REGNO. */
10921
10922 static bool
10923 ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined)
10924 {
10925 /* If there are no caller-saved registers, we preserve all registers,
10926 except for MMX and x87 registers which aren't supported when saving
10927 and restoring registers. Don't explicitly save SP register since
10928 it is always preserved. */
10929 if (cfun->machine->no_caller_saved_registers)
10930 {
10931 /* Don't preserve registers used for function return value. */
10932 rtx reg = crtl->return_rtx;
10933 if (reg)
10934 {
10935 unsigned int i = REGNO (reg);
10936 unsigned int nregs = REG_NREGS (reg);
10937 while (nregs-- > 0)
10938 if ((i + nregs) == regno)
10939 return false;
10940
10941 reg = crtl->return_bnd;
10942 if (reg)
10943 {
10944 i = REGNO (reg);
10945 nregs = REG_NREGS (reg);
10946 while (nregs-- > 0)
10947 if ((i + nregs) == regno)
10948 return false;
10949 }
10950 }
10951
10952 return (df_regs_ever_live_p (regno)
10953 && !fixed_regs[regno]
10954 && !STACK_REGNO_P (regno)
10955 && !MMX_REGNO_P (regno)
10956 && (regno != HARD_FRAME_POINTER_REGNUM
10957 || !frame_pointer_needed));
10958 }
10959
10960 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
10961 && pic_offset_table_rtx)
10962 {
10963 if (ix86_use_pseudo_pic_reg ())
10964 {
10965 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
10966 _mcount in prologue. */
10967 if (!TARGET_64BIT && flag_pic && crtl->profile)
10968 return true;
10969 }
10970 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10971 || crtl->profile
10972 || crtl->calls_eh_return
10973 || crtl->uses_const_pool
10974 || cfun->has_nonlocal_label)
10975 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
10976 }
10977
10978 if (crtl->calls_eh_return && maybe_eh_return)
10979 {
10980 unsigned i;
10981 for (i = 0; ; i++)
10982 {
10983 unsigned test = EH_RETURN_DATA_REGNO (i);
10984 if (test == INVALID_REGNUM)
10985 break;
10986 if (test == regno)
10987 return true;
10988 }
10989 }
10990
10991 if (ignore_outlined && cfun->machine->call_ms2sysv)
10992 {
10993 unsigned count = cfun->machine->call_ms2sysv_extra_regs
10994 + xlogue_layout::MIN_REGS;
10995 if (xlogue_layout::is_stub_managed_reg (regno, count))
10996 return false;
10997 }
10998
10999 if (crtl->drap_reg
11000 && regno == REGNO (crtl->drap_reg)
11001 && !cfun->machine->no_drap_save_restore)
11002 return true;
11003
11004 return (df_regs_ever_live_p (regno)
11005 && !call_used_regs[regno]
11006 && !fixed_regs[regno]
11007 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
11008 }
11009
11010 /* Return number of saved general prupose registers. */
11011
11012 static int
11013 ix86_nsaved_regs (void)
11014 {
11015 int nregs = 0;
11016 int regno;
11017
11018 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11019 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
11020 nregs ++;
11021 return nregs;
11022 }
11023
11024 /* Return number of saved SSE registers. */
11025
11026 static int
11027 ix86_nsaved_sseregs (void)
11028 {
11029 int nregs = 0;
11030 int regno;
11031
11032 if (!TARGET_64BIT_MS_ABI)
11033 return 0;
11034 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11035 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
11036 nregs ++;
11037 return nregs;
11038 }
11039
11040 /* Given FROM and TO register numbers, say whether this elimination is
11041 allowed. If stack alignment is needed, we can only replace argument
11042 pointer with hard frame pointer, or replace frame pointer with stack
11043 pointer. Otherwise, frame pointer elimination is automatically
11044 handled and all other eliminations are valid. */
11045
11046 static bool
11047 ix86_can_eliminate (const int from, const int to)
11048 {
11049 if (stack_realign_fp)
11050 return ((from == ARG_POINTER_REGNUM
11051 && to == HARD_FRAME_POINTER_REGNUM)
11052 || (from == FRAME_POINTER_REGNUM
11053 && to == STACK_POINTER_REGNUM));
11054 else
11055 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
11056 }
11057
11058 /* Return the offset between two registers, one to be eliminated, and the other
11059 its replacement, at the start of a routine. */
11060
11061 HOST_WIDE_INT
11062 ix86_initial_elimination_offset (int from, int to)
11063 {
11064 struct ix86_frame &frame = cfun->machine->frame;
11065
11066 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
11067 return frame.hard_frame_pointer_offset;
11068 else if (from == FRAME_POINTER_REGNUM
11069 && to == HARD_FRAME_POINTER_REGNUM)
11070 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
11071 else
11072 {
11073 gcc_assert (to == STACK_POINTER_REGNUM);
11074
11075 if (from == ARG_POINTER_REGNUM)
11076 return frame.stack_pointer_offset;
11077
11078 gcc_assert (from == FRAME_POINTER_REGNUM);
11079 return frame.stack_pointer_offset - frame.frame_pointer_offset;
11080 }
11081 }
11082
11083 /* In a dynamically-aligned function, we can't know the offset from
11084 stack pointer to frame pointer, so we must ensure that setjmp
11085 eliminates fp against the hard fp (%ebp) rather than trying to
11086 index from %esp up to the top of the frame across a gap that is
11087 of unknown (at compile-time) size. */
11088 static rtx
11089 ix86_builtin_setjmp_frame_value (void)
11090 {
11091 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
11092 }
11093
11094 /* Emits a warning for unsupported msabi to sysv pro/epilogues. */
11095 static void warn_once_call_ms2sysv_xlogues (const char *feature)
11096 {
11097 static bool warned_once = false;
11098 if (!warned_once)
11099 {
11100 warning (0, "-mcall-ms2sysv-xlogues is not compatible with %s",
11101 feature);
11102 warned_once = true;
11103 }
11104 }
11105
11106 /* When using -fsplit-stack, the allocation routines set a field in
11107 the TCB to the bottom of the stack plus this much space, measured
11108 in bytes. */
11109
11110 #define SPLIT_STACK_AVAILABLE 256
11111
11112 /* Fill structure ix86_frame about frame of currently computed function. */
11113
11114 static void
11115 ix86_compute_frame_layout (void)
11116 {
11117 struct ix86_frame *frame = &cfun->machine->frame;
11118 struct machine_function *m = cfun->machine;
11119 unsigned HOST_WIDE_INT stack_alignment_needed;
11120 HOST_WIDE_INT offset;
11121 unsigned HOST_WIDE_INT preferred_alignment;
11122 HOST_WIDE_INT size = get_frame_size ();
11123 HOST_WIDE_INT to_allocate;
11124
11125 /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
11126 * ms_abi functions that call a sysv function. We now need to prune away
11127 * cases where it should be disabled. */
11128 if (TARGET_64BIT && m->call_ms2sysv)
11129 {
11130 gcc_assert (TARGET_64BIT_MS_ABI);
11131 gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES);
11132 gcc_assert (!TARGET_SEH);
11133 gcc_assert (TARGET_SSE);
11134 gcc_assert (!ix86_using_red_zone ());
11135
11136 if (crtl->calls_eh_return)
11137 {
11138 gcc_assert (!reload_completed);
11139 m->call_ms2sysv = false;
11140 warn_once_call_ms2sysv_xlogues ("__builtin_eh_return");
11141 }
11142
11143 else if (ix86_static_chain_on_stack)
11144 {
11145 gcc_assert (!reload_completed);
11146 m->call_ms2sysv = false;
11147 warn_once_call_ms2sysv_xlogues ("static call chains");
11148 }
11149
11150 /* Finally, compute which registers the stub will manage. */
11151 else
11152 {
11153 unsigned count = xlogue_layout::count_stub_managed_regs ();
11154 m->call_ms2sysv_extra_regs = count - xlogue_layout::MIN_REGS;
11155 m->call_ms2sysv_pad_in = 0;
11156 }
11157 }
11158
11159 frame->nregs = ix86_nsaved_regs ();
11160 frame->nsseregs = ix86_nsaved_sseregs ();
11161
11162 /* 64-bit MS ABI seem to require stack alignment to be always 16,
11163 except for function prologues, leaf functions and when the defult
11164 incoming stack boundary is overriden at command line or via
11165 force_align_arg_pointer attribute. */
11166 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
11167 && (!crtl->is_leaf || cfun->calls_alloca != 0
11168 || ix86_current_function_calls_tls_descriptor
11169 || ix86_incoming_stack_boundary < 128))
11170 {
11171 crtl->preferred_stack_boundary = 128;
11172 crtl->stack_alignment_needed = 128;
11173 }
11174
11175 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
11176 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
11177
11178 gcc_assert (!size || stack_alignment_needed);
11179 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
11180 gcc_assert (preferred_alignment <= stack_alignment_needed);
11181
11182 /* The only ABI saving SSE regs should be 64-bit ms_abi. */
11183 gcc_assert (TARGET_64BIT || !frame->nsseregs);
11184 if (TARGET_64BIT && m->call_ms2sysv)
11185 {
11186 gcc_assert (stack_alignment_needed >= 16);
11187 gcc_assert (!frame->nsseregs);
11188 }
11189
11190 /* For SEH we have to limit the amount of code movement into the prologue.
11191 At present we do this via a BLOCKAGE, at which point there's very little
11192 scheduling that can be done, which means that there's very little point
11193 in doing anything except PUSHs. */
11194 if (TARGET_SEH)
11195 m->use_fast_prologue_epilogue = false;
11196 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun)))
11197 {
11198 int count = frame->nregs;
11199 struct cgraph_node *node = cgraph_node::get (current_function_decl);
11200
11201 /* The fast prologue uses move instead of push to save registers. This
11202 is significantly longer, but also executes faster as modern hardware
11203 can execute the moves in parallel, but can't do that for push/pop.
11204
11205 Be careful about choosing what prologue to emit: When function takes
11206 many instructions to execute we may use slow version as well as in
11207 case function is known to be outside hot spot (this is known with
11208 feedback only). Weight the size of function by number of registers
11209 to save as it is cheap to use one or two push instructions but very
11210 slow to use many of them. */
11211 if (count)
11212 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
11213 if (node->frequency < NODE_FREQUENCY_NORMAL
11214 || (flag_branch_probabilities
11215 && node->frequency < NODE_FREQUENCY_HOT))
11216 m->use_fast_prologue_epilogue = false;
11217 else
11218 m->use_fast_prologue_epilogue
11219 = !expensive_function_p (count);
11220 }
11221
11222 frame->save_regs_using_mov
11223 = (TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue
11224 /* If static stack checking is enabled and done with probes,
11225 the registers need to be saved before allocating the frame. */
11226 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
11227
11228 /* Skip return address and error code in exception handler. */
11229 offset = INCOMING_FRAME_SP_OFFSET;
11230
11231 /* Skip pushed static chain. */
11232 if (ix86_static_chain_on_stack)
11233 offset += UNITS_PER_WORD;
11234
11235 /* Skip saved base pointer. */
11236 if (frame_pointer_needed)
11237 offset += UNITS_PER_WORD;
11238 frame->hfp_save_offset = offset;
11239
11240 /* The traditional frame pointer location is at the top of the frame. */
11241 frame->hard_frame_pointer_offset = offset;
11242
11243 /* Register save area */
11244 offset += frame->nregs * UNITS_PER_WORD;
11245 frame->reg_save_offset = offset;
11246
11247 /* On SEH target, registers are pushed just before the frame pointer
11248 location. */
11249 if (TARGET_SEH)
11250 frame->hard_frame_pointer_offset = offset;
11251
11252 /* Calculate the size of the va-arg area (not including padding, if any). */
11253 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
11254
11255 if (stack_realign_fp)
11256 {
11257 /* We may need a 16-byte aligned stack for the remainder of the
11258 register save area, but the stack frame for the local function
11259 may require a greater alignment if using AVX/2/512. In order
11260 to avoid wasting space, we first calculate the space needed for
11261 the rest of the register saves, add that to the stack pointer,
11262 and then realign the stack to the boundary of the start of the
11263 frame for the local function. */
11264 HOST_WIDE_INT space_needed = 0;
11265 HOST_WIDE_INT sse_reg_space_needed = 0;
11266
11267 if (TARGET_64BIT)
11268 {
11269 if (m->call_ms2sysv)
11270 {
11271 m->call_ms2sysv_pad_in = 0;
11272 space_needed = xlogue_layout::get_instance ().get_stack_space_used ();
11273 }
11274
11275 else if (frame->nsseregs)
11276 /* The only ABI that has saved SSE registers (Win64) also has a
11277 16-byte aligned default stack. However, many programs violate
11278 the ABI, and Wine64 forces stack realignment to compensate. */
11279 space_needed = frame->nsseregs * 16;
11280
11281 sse_reg_space_needed = space_needed = ROUND_UP (space_needed, 16);
11282
11283 /* 64-bit frame->va_arg_size should always be a multiple of 16, but
11284 rounding to be pedantic. */
11285 space_needed = ROUND_UP (space_needed + frame->va_arg_size, 16);
11286 }
11287 else
11288 space_needed = frame->va_arg_size;
11289
11290 /* Record the allocation size required prior to the realignment AND. */
11291 frame->stack_realign_allocate = space_needed;
11292
11293 /* The re-aligned stack starts at frame->stack_realign_offset. Values
11294 before this point are not directly comparable with values below
11295 this point. Use sp_valid_at to determine if the stack pointer is
11296 valid for a given offset, fp_valid_at for the frame pointer, or
11297 choose_baseaddr to have a base register chosen for you.
11298
11299 Note that the result of (frame->stack_realign_offset
11300 & (stack_alignment_needed - 1)) may not equal zero. */
11301 offset = ROUND_UP (offset + space_needed, stack_alignment_needed);
11302 frame->stack_realign_offset = offset - space_needed;
11303 frame->sse_reg_save_offset = frame->stack_realign_offset
11304 + sse_reg_space_needed;
11305 }
11306 else
11307 {
11308 frame->stack_realign_offset = offset;
11309
11310 if (TARGET_64BIT && m->call_ms2sysv)
11311 {
11312 m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD);
11313 offset += xlogue_layout::get_instance ().get_stack_space_used ();
11314 }
11315
11316 /* Align and set SSE register save area. */
11317 else if (frame->nsseregs)
11318 {
11319 /* If the incoming stack boundary is at least 16 bytes, or DRAP is
11320 required and the DRAP re-alignment boundary is at least 16 bytes,
11321 then we want the SSE register save area properly aligned. */
11322 if (ix86_incoming_stack_boundary >= 128
11323 || (stack_realign_drap && stack_alignment_needed >= 16))
11324 offset = ROUND_UP (offset, 16);
11325 offset += frame->nsseregs * 16;
11326 }
11327 frame->sse_reg_save_offset = offset;
11328 offset += frame->va_arg_size;
11329 }
11330
11331 /* Align start of frame for local function. */
11332 if (m->call_ms2sysv
11333 || frame->va_arg_size != 0
11334 || size != 0
11335 || !crtl->is_leaf
11336 || cfun->calls_alloca
11337 || ix86_current_function_calls_tls_descriptor)
11338 offset = ROUND_UP (offset, stack_alignment_needed);
11339
11340 /* Frame pointer points here. */
11341 frame->frame_pointer_offset = offset;
11342
11343 offset += size;
11344
11345 /* Add outgoing arguments area. Can be skipped if we eliminated
11346 all the function calls as dead code.
11347 Skipping is however impossible when function calls alloca. Alloca
11348 expander assumes that last crtl->outgoing_args_size
11349 of stack frame are unused. */
11350 if (ACCUMULATE_OUTGOING_ARGS
11351 && (!crtl->is_leaf || cfun->calls_alloca
11352 || ix86_current_function_calls_tls_descriptor))
11353 {
11354 offset += crtl->outgoing_args_size;
11355 frame->outgoing_arguments_size = crtl->outgoing_args_size;
11356 }
11357 else
11358 frame->outgoing_arguments_size = 0;
11359
11360 /* Align stack boundary. Only needed if we're calling another function
11361 or using alloca. */
11362 if (!crtl->is_leaf || cfun->calls_alloca
11363 || ix86_current_function_calls_tls_descriptor)
11364 offset = ROUND_UP (offset, preferred_alignment);
11365
11366 /* We've reached end of stack frame. */
11367 frame->stack_pointer_offset = offset;
11368
11369 /* Size prologue needs to allocate. */
11370 to_allocate = offset - frame->sse_reg_save_offset;
11371
11372 if ((!to_allocate && frame->nregs <= 1)
11373 || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000)))
11374 frame->save_regs_using_mov = false;
11375
11376 if (ix86_using_red_zone ()
11377 && crtl->sp_is_unchanging
11378 && crtl->is_leaf
11379 && !ix86_pc_thunk_call_expanded
11380 && !ix86_current_function_calls_tls_descriptor)
11381 {
11382 frame->red_zone_size = to_allocate;
11383 if (frame->save_regs_using_mov)
11384 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
11385 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
11386 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
11387 }
11388 else
11389 frame->red_zone_size = 0;
11390 frame->stack_pointer_offset -= frame->red_zone_size;
11391
11392 /* The SEH frame pointer location is near the bottom of the frame.
11393 This is enforced by the fact that the difference between the
11394 stack pointer and the frame pointer is limited to 240 bytes in
11395 the unwind data structure. */
11396 if (TARGET_SEH)
11397 {
11398 HOST_WIDE_INT diff;
11399
11400 /* If we can leave the frame pointer where it is, do so. Also, returns
11401 the establisher frame for __builtin_frame_address (0). */
11402 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
11403 if (diff <= SEH_MAX_FRAME_SIZE
11404 && (diff > 240 || (diff & 15) != 0)
11405 && !crtl->accesses_prior_frames)
11406 {
11407 /* Ideally we'd determine what portion of the local stack frame
11408 (within the constraint of the lowest 240) is most heavily used.
11409 But without that complication, simply bias the frame pointer
11410 by 128 bytes so as to maximize the amount of the local stack
11411 frame that is addressable with 8-bit offsets. */
11412 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
11413 }
11414 }
11415 }
11416
11417 /* This is semi-inlined memory_address_length, but simplified
11418 since we know that we're always dealing with reg+offset, and
11419 to avoid having to create and discard all that rtl. */
11420
11421 static inline int
11422 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
11423 {
11424 int len = 4;
11425
11426 if (offset == 0)
11427 {
11428 /* EBP and R13 cannot be encoded without an offset. */
11429 len = (regno == BP_REG || regno == R13_REG);
11430 }
11431 else if (IN_RANGE (offset, -128, 127))
11432 len = 1;
11433
11434 /* ESP and R12 must be encoded with a SIB byte. */
11435 if (regno == SP_REG || regno == R12_REG)
11436 len++;
11437
11438 return len;
11439 }
11440
11441 /* Determine if the stack pointer is valid for accessing the CFA_OFFSET in
11442 the frame save area. The register is saved at CFA - CFA_OFFSET. */
11443
11444 static bool
11445 sp_valid_at (HOST_WIDE_INT cfa_offset)
11446 {
11447 const struct machine_frame_state &fs = cfun->machine->fs;
11448 if (fs.sp_realigned && cfa_offset <= fs.sp_realigned_offset)
11449 {
11450 /* Validate that the cfa_offset isn't in a "no-man's land". */
11451 gcc_assert (cfa_offset <= fs.sp_realigned_fp_last);
11452 return false;
11453 }
11454 return fs.sp_valid;
11455 }
11456
11457 /* Determine if the frame pointer is valid for accessing the CFA_OFFSET in
11458 the frame save area. The register is saved at CFA - CFA_OFFSET. */
11459
11460 static inline bool
11461 fp_valid_at (HOST_WIDE_INT cfa_offset)
11462 {
11463 const struct machine_frame_state &fs = cfun->machine->fs;
11464 if (fs.sp_realigned && cfa_offset > fs.sp_realigned_fp_last)
11465 {
11466 /* Validate that the cfa_offset isn't in a "no-man's land". */
11467 gcc_assert (cfa_offset >= fs.sp_realigned_offset);
11468 return false;
11469 }
11470 return fs.fp_valid;
11471 }
11472
11473 /* Choose a base register based upon alignment requested, speed and/or
11474 size. */
11475
11476 static void
11477 choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg,
11478 HOST_WIDE_INT &base_offset,
11479 unsigned int align_reqested, unsigned int *align)
11480 {
11481 const struct machine_function *m = cfun->machine;
11482 unsigned int hfp_align;
11483 unsigned int drap_align;
11484 unsigned int sp_align;
11485 bool hfp_ok = fp_valid_at (cfa_offset);
11486 bool drap_ok = m->fs.drap_valid;
11487 bool sp_ok = sp_valid_at (cfa_offset);
11488
11489 hfp_align = drap_align = sp_align = INCOMING_STACK_BOUNDARY;
11490
11491 /* Filter out any registers that don't meet the requested alignment
11492 criteria. */
11493 if (align_reqested)
11494 {
11495 if (m->fs.realigned)
11496 hfp_align = drap_align = sp_align = crtl->stack_alignment_needed;
11497 /* SEH unwind code does do not currently support REG_CFA_EXPRESSION
11498 notes (which we would need to use a realigned stack pointer),
11499 so disable on SEH targets. */
11500 else if (m->fs.sp_realigned)
11501 sp_align = crtl->stack_alignment_needed;
11502
11503 hfp_ok = hfp_ok && hfp_align >= align_reqested;
11504 drap_ok = drap_ok && drap_align >= align_reqested;
11505 sp_ok = sp_ok && sp_align >= align_reqested;
11506 }
11507
11508 if (m->use_fast_prologue_epilogue)
11509 {
11510 /* Choose the base register most likely to allow the most scheduling
11511 opportunities. Generally FP is valid throughout the function,
11512 while DRAP must be reloaded within the epilogue. But choose either
11513 over the SP due to increased encoding size. */
11514
11515 if (hfp_ok)
11516 {
11517 base_reg = hard_frame_pointer_rtx;
11518 base_offset = m->fs.fp_offset - cfa_offset;
11519 }
11520 else if (drap_ok)
11521 {
11522 base_reg = crtl->drap_reg;
11523 base_offset = 0 - cfa_offset;
11524 }
11525 else if (sp_ok)
11526 {
11527 base_reg = stack_pointer_rtx;
11528 base_offset = m->fs.sp_offset - cfa_offset;
11529 }
11530 }
11531 else
11532 {
11533 HOST_WIDE_INT toffset;
11534 int len = 16, tlen;
11535
11536 /* Choose the base register with the smallest address encoding.
11537 With a tie, choose FP > DRAP > SP. */
11538 if (sp_ok)
11539 {
11540 base_reg = stack_pointer_rtx;
11541 base_offset = m->fs.sp_offset - cfa_offset;
11542 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
11543 }
11544 if (drap_ok)
11545 {
11546 toffset = 0 - cfa_offset;
11547 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
11548 if (tlen <= len)
11549 {
11550 base_reg = crtl->drap_reg;
11551 base_offset = toffset;
11552 len = tlen;
11553 }
11554 }
11555 if (hfp_ok)
11556 {
11557 toffset = m->fs.fp_offset - cfa_offset;
11558 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
11559 if (tlen <= len)
11560 {
11561 base_reg = hard_frame_pointer_rtx;
11562 base_offset = toffset;
11563 len = tlen;
11564 }
11565 }
11566 }
11567
11568 /* Set the align return value. */
11569 if (align)
11570 {
11571 if (base_reg == stack_pointer_rtx)
11572 *align = sp_align;
11573 else if (base_reg == crtl->drap_reg)
11574 *align = drap_align;
11575 else if (base_reg == hard_frame_pointer_rtx)
11576 *align = hfp_align;
11577 }
11578 }
11579
11580 /* Return an RTX that points to CFA_OFFSET within the stack frame and
11581 the alignment of address. If ALIGN is non-null, it should point to
11582 an alignment value (in bits) that is preferred or zero and will
11583 recieve the alignment of the base register that was selected,
11584 irrespective of rather or not CFA_OFFSET is a multiple of that
11585 alignment value. If it is possible for the base register offset to be
11586 non-immediate then SCRATCH_REGNO should specify a scratch register to
11587 use.
11588
11589 The valid base registers are taken from CFUN->MACHINE->FS. */
11590
11591 static rtx
11592 choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align,
11593 unsigned int scratch_regno = INVALID_REGNUM)
11594 {
11595 rtx base_reg = NULL;
11596 HOST_WIDE_INT base_offset = 0;
11597
11598 /* If a specific alignment is requested, try to get a base register
11599 with that alignment first. */
11600 if (align && *align)
11601 choose_basereg (cfa_offset, base_reg, base_offset, *align, align);
11602
11603 if (!base_reg)
11604 choose_basereg (cfa_offset, base_reg, base_offset, 0, align);
11605
11606 gcc_assert (base_reg != NULL);
11607
11608 rtx base_offset_rtx = GEN_INT (base_offset);
11609
11610 if (!x86_64_immediate_operand (base_offset_rtx, Pmode))
11611 {
11612 gcc_assert (scratch_regno != INVALID_REGNUM);
11613
11614 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11615 emit_move_insn (scratch_reg, base_offset_rtx);
11616
11617 return gen_rtx_PLUS (Pmode, base_reg, scratch_reg);
11618 }
11619
11620 return plus_constant (Pmode, base_reg, base_offset);
11621 }
11622
11623 /* Emit code to save registers in the prologue. */
11624
11625 static void
11626 ix86_emit_save_regs (void)
11627 {
11628 unsigned int regno;
11629 rtx_insn *insn;
11630
11631 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
11632 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
11633 {
11634 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
11635 RTX_FRAME_RELATED_P (insn) = 1;
11636 }
11637 }
11638
11639 /* Emit a single register save at CFA - CFA_OFFSET. */
11640
11641 static void
11642 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
11643 HOST_WIDE_INT cfa_offset)
11644 {
11645 struct machine_function *m = cfun->machine;
11646 rtx reg = gen_rtx_REG (mode, regno);
11647 rtx mem, addr, base, insn;
11648 unsigned int align = GET_MODE_ALIGNMENT (mode);
11649
11650 addr = choose_baseaddr (cfa_offset, &align);
11651 mem = gen_frame_mem (mode, addr);
11652
11653 /* The location aligment depends upon the base register. */
11654 align = MIN (GET_MODE_ALIGNMENT (mode), align);
11655 gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
11656 set_mem_align (mem, align);
11657
11658 insn = emit_insn (gen_rtx_SET (mem, reg));
11659 RTX_FRAME_RELATED_P (insn) = 1;
11660
11661 base = addr;
11662 if (GET_CODE (base) == PLUS)
11663 base = XEXP (base, 0);
11664 gcc_checking_assert (REG_P (base));
11665
11666 /* When saving registers into a re-aligned local stack frame, avoid
11667 any tricky guessing by dwarf2out. */
11668 if (m->fs.realigned)
11669 {
11670 gcc_checking_assert (stack_realign_drap);
11671
11672 if (regno == REGNO (crtl->drap_reg))
11673 {
11674 /* A bit of a hack. We force the DRAP register to be saved in
11675 the re-aligned stack frame, which provides us with a copy
11676 of the CFA that will last past the prologue. Install it. */
11677 gcc_checking_assert (cfun->machine->fs.fp_valid);
11678 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
11679 cfun->machine->fs.fp_offset - cfa_offset);
11680 mem = gen_rtx_MEM (mode, addr);
11681 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
11682 }
11683 else
11684 {
11685 /* The frame pointer is a stable reference within the
11686 aligned frame. Use it. */
11687 gcc_checking_assert (cfun->machine->fs.fp_valid);
11688 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
11689 cfun->machine->fs.fp_offset - cfa_offset);
11690 mem = gen_rtx_MEM (mode, addr);
11691 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
11692 }
11693 }
11694
11695 else if (base == stack_pointer_rtx && m->fs.sp_realigned
11696 && cfa_offset >= m->fs.sp_realigned_offset)
11697 {
11698 gcc_checking_assert (stack_realign_fp);
11699 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
11700 }
11701
11702 /* The memory may not be relative to the current CFA register,
11703 which means that we may need to generate a new pattern for
11704 use by the unwind info. */
11705 else if (base != m->fs.cfa_reg)
11706 {
11707 addr = plus_constant (Pmode, m->fs.cfa_reg,
11708 m->fs.cfa_offset - cfa_offset);
11709 mem = gen_rtx_MEM (mode, addr);
11710 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
11711 }
11712 }
11713
11714 /* Emit code to save registers using MOV insns.
11715 First register is stored at CFA - CFA_OFFSET. */
11716 static void
11717 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
11718 {
11719 unsigned int regno;
11720
11721 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11722 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
11723 {
11724 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
11725 cfa_offset -= UNITS_PER_WORD;
11726 }
11727 }
11728
11729 /* Emit code to save SSE registers using MOV insns.
11730 First register is stored at CFA - CFA_OFFSET. */
11731 static void
11732 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
11733 {
11734 unsigned int regno;
11735
11736 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11737 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
11738 {
11739 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
11740 cfa_offset -= GET_MODE_SIZE (V4SFmode);
11741 }
11742 }
11743
11744 static GTY(()) rtx queued_cfa_restores;
11745
11746 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
11747 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
11748 Don't add the note if the previously saved value will be left untouched
11749 within stack red-zone till return, as unwinders can find the same value
11750 in the register and on the stack. */
11751
11752 static void
11753 ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
11754 {
11755 if (!crtl->shrink_wrapped
11756 && cfa_offset <= cfun->machine->fs.red_zone_offset)
11757 return;
11758
11759 if (insn)
11760 {
11761 add_reg_note (insn, REG_CFA_RESTORE, reg);
11762 RTX_FRAME_RELATED_P (insn) = 1;
11763 }
11764 else
11765 queued_cfa_restores
11766 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
11767 }
11768
11769 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
11770
11771 static void
11772 ix86_add_queued_cfa_restore_notes (rtx insn)
11773 {
11774 rtx last;
11775 if (!queued_cfa_restores)
11776 return;
11777 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
11778 ;
11779 XEXP (last, 1) = REG_NOTES (insn);
11780 REG_NOTES (insn) = queued_cfa_restores;
11781 queued_cfa_restores = NULL_RTX;
11782 RTX_FRAME_RELATED_P (insn) = 1;
11783 }
11784
11785 /* Expand prologue or epilogue stack adjustment.
11786 The pattern exist to put a dependency on all ebp-based memory accesses.
11787 STYLE should be negative if instructions should be marked as frame related,
11788 zero if %r11 register is live and cannot be freely used and positive
11789 otherwise. */
11790
11791 static rtx
11792 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
11793 int style, bool set_cfa)
11794 {
11795 struct machine_function *m = cfun->machine;
11796 rtx insn;
11797 bool add_frame_related_expr = false;
11798
11799 if (Pmode == SImode)
11800 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
11801 else if (x86_64_immediate_operand (offset, DImode))
11802 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
11803 else
11804 {
11805 rtx tmp;
11806 /* r11 is used by indirect sibcall return as well, set before the
11807 epilogue and used after the epilogue. */
11808 if (style)
11809 tmp = gen_rtx_REG (DImode, R11_REG);
11810 else
11811 {
11812 gcc_assert (src != hard_frame_pointer_rtx
11813 && dest != hard_frame_pointer_rtx);
11814 tmp = hard_frame_pointer_rtx;
11815 }
11816 insn = emit_insn (gen_rtx_SET (tmp, offset));
11817 if (style < 0)
11818 add_frame_related_expr = true;
11819
11820 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
11821 }
11822
11823 insn = emit_insn (insn);
11824 if (style >= 0)
11825 ix86_add_queued_cfa_restore_notes (insn);
11826
11827 if (set_cfa)
11828 {
11829 rtx r;
11830
11831 gcc_assert (m->fs.cfa_reg == src);
11832 m->fs.cfa_offset += INTVAL (offset);
11833 m->fs.cfa_reg = dest;
11834
11835 r = gen_rtx_PLUS (Pmode, src, offset);
11836 r = gen_rtx_SET (dest, r);
11837 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
11838 RTX_FRAME_RELATED_P (insn) = 1;
11839 }
11840 else if (style < 0)
11841 {
11842 RTX_FRAME_RELATED_P (insn) = 1;
11843 if (add_frame_related_expr)
11844 {
11845 rtx r = gen_rtx_PLUS (Pmode, src, offset);
11846 r = gen_rtx_SET (dest, r);
11847 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
11848 }
11849 }
11850
11851 if (dest == stack_pointer_rtx)
11852 {
11853 HOST_WIDE_INT ooffset = m->fs.sp_offset;
11854 bool valid = m->fs.sp_valid;
11855 bool realigned = m->fs.sp_realigned;
11856
11857 if (src == hard_frame_pointer_rtx)
11858 {
11859 valid = m->fs.fp_valid;
11860 realigned = false;
11861 ooffset = m->fs.fp_offset;
11862 }
11863 else if (src == crtl->drap_reg)
11864 {
11865 valid = m->fs.drap_valid;
11866 realigned = false;
11867 ooffset = 0;
11868 }
11869 else
11870 {
11871 /* Else there are two possibilities: SP itself, which we set
11872 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
11873 taken care of this by hand along the eh_return path. */
11874 gcc_checking_assert (src == stack_pointer_rtx
11875 || offset == const0_rtx);
11876 }
11877
11878 m->fs.sp_offset = ooffset - INTVAL (offset);
11879 m->fs.sp_valid = valid;
11880 m->fs.sp_realigned = realigned;
11881 }
11882 return insn;
11883 }
11884
11885 /* Find an available register to be used as dynamic realign argument
11886 pointer regsiter. Such a register will be written in prologue and
11887 used in begin of body, so it must not be
11888 1. parameter passing register.
11889 2. GOT pointer.
11890 We reuse static-chain register if it is available. Otherwise, we
11891 use DI for i386 and R13 for x86-64. We chose R13 since it has
11892 shorter encoding.
11893
11894 Return: the regno of chosen register. */
11895
11896 static unsigned int
11897 find_drap_reg (void)
11898 {
11899 tree decl = cfun->decl;
11900
11901 /* Always use callee-saved register if there are no caller-saved
11902 registers. */
11903 if (TARGET_64BIT)
11904 {
11905 /* Use R13 for nested function or function need static chain.
11906 Since function with tail call may use any caller-saved
11907 registers in epilogue, DRAP must not use caller-saved
11908 register in such case. */
11909 if (DECL_STATIC_CHAIN (decl)
11910 || cfun->machine->no_caller_saved_registers
11911 || crtl->tail_call_emit)
11912 return R13_REG;
11913
11914 return R10_REG;
11915 }
11916 else
11917 {
11918 /* Use DI for nested function or function need static chain.
11919 Since function with tail call may use any caller-saved
11920 registers in epilogue, DRAP must not use caller-saved
11921 register in such case. */
11922 if (DECL_STATIC_CHAIN (decl)
11923 || cfun->machine->no_caller_saved_registers
11924 || crtl->tail_call_emit)
11925 return DI_REG;
11926
11927 /* Reuse static chain register if it isn't used for parameter
11928 passing. */
11929 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
11930 {
11931 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
11932 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
11933 return CX_REG;
11934 }
11935 return DI_REG;
11936 }
11937 }
11938
11939 /* Handle a "force_align_arg_pointer" attribute. */
11940
11941 static tree
11942 ix86_handle_force_align_arg_pointer_attribute (tree *node, tree name,
11943 tree, int, bool *no_add_attrs)
11944 {
11945 if (TREE_CODE (*node) != FUNCTION_TYPE
11946 && TREE_CODE (*node) != METHOD_TYPE
11947 && TREE_CODE (*node) != FIELD_DECL
11948 && TREE_CODE (*node) != TYPE_DECL)
11949 {
11950 warning (OPT_Wattributes, "%qE attribute only applies to functions",
11951 name);
11952 *no_add_attrs = true;
11953 }
11954
11955 return NULL_TREE;
11956 }
11957
11958 /* Return minimum incoming stack alignment. */
11959
11960 static unsigned int
11961 ix86_minimum_incoming_stack_boundary (bool sibcall)
11962 {
11963 unsigned int incoming_stack_boundary;
11964
11965 /* Stack of interrupt handler is aligned to 128 bits in 64bit mode. */
11966 if (cfun->machine->func_type != TYPE_NORMAL)
11967 incoming_stack_boundary = TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY;
11968 /* Prefer the one specified at command line. */
11969 else if (ix86_user_incoming_stack_boundary)
11970 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
11971 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
11972 if -mstackrealign is used, it isn't used for sibcall check and
11973 estimated stack alignment is 128bit. */
11974 else if (!sibcall
11975 && ix86_force_align_arg_pointer
11976 && crtl->stack_alignment_estimated == 128)
11977 incoming_stack_boundary = MIN_STACK_BOUNDARY;
11978 else
11979 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
11980
11981 /* Incoming stack alignment can be changed on individual functions
11982 via force_align_arg_pointer attribute. We use the smallest
11983 incoming stack boundary. */
11984 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
11985 && lookup_attribute (ix86_force_align_arg_pointer_string,
11986 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
11987 incoming_stack_boundary = MIN_STACK_BOUNDARY;
11988
11989 /* The incoming stack frame has to be aligned at least at
11990 parm_stack_boundary. */
11991 if (incoming_stack_boundary < crtl->parm_stack_boundary)
11992 incoming_stack_boundary = crtl->parm_stack_boundary;
11993
11994 /* Stack at entrance of main is aligned by runtime. We use the
11995 smallest incoming stack boundary. */
11996 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
11997 && DECL_NAME (current_function_decl)
11998 && MAIN_NAME_P (DECL_NAME (current_function_decl))
11999 && DECL_FILE_SCOPE_P (current_function_decl))
12000 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
12001
12002 return incoming_stack_boundary;
12003 }
12004
12005 /* Update incoming stack boundary and estimated stack alignment. */
12006
12007 static void
12008 ix86_update_stack_boundary (void)
12009 {
12010 ix86_incoming_stack_boundary
12011 = ix86_minimum_incoming_stack_boundary (false);
12012
12013 /* x86_64 vararg needs 16byte stack alignment for register save
12014 area. */
12015 if (TARGET_64BIT
12016 && cfun->stdarg
12017 && crtl->stack_alignment_estimated < 128)
12018 crtl->stack_alignment_estimated = 128;
12019
12020 /* __tls_get_addr needs to be called with 16-byte aligned stack. */
12021 if (ix86_tls_descriptor_calls_expanded_in_cfun
12022 && crtl->preferred_stack_boundary < 128)
12023 crtl->preferred_stack_boundary = 128;
12024 }
12025
12026 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
12027 needed or an rtx for DRAP otherwise. */
12028
12029 static rtx
12030 ix86_get_drap_rtx (void)
12031 {
12032 /* We must use DRAP if there are outgoing arguments on stack and
12033 ACCUMULATE_OUTGOING_ARGS is false. */
12034 if (ix86_force_drap
12035 || (cfun->machine->outgoing_args_on_stack
12036 && !ACCUMULATE_OUTGOING_ARGS))
12037 crtl->need_drap = true;
12038
12039 if (stack_realign_drap)
12040 {
12041 /* Assign DRAP to vDRAP and returns vDRAP */
12042 unsigned int regno = find_drap_reg ();
12043 rtx drap_vreg;
12044 rtx arg_ptr;
12045 rtx_insn *seq, *insn;
12046
12047 arg_ptr = gen_rtx_REG (Pmode, regno);
12048 crtl->drap_reg = arg_ptr;
12049
12050 start_sequence ();
12051 drap_vreg = copy_to_reg (arg_ptr);
12052 seq = get_insns ();
12053 end_sequence ();
12054
12055 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
12056 if (!optimize)
12057 {
12058 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
12059 RTX_FRAME_RELATED_P (insn) = 1;
12060 }
12061 return drap_vreg;
12062 }
12063 else
12064 return NULL;
12065 }
12066
12067 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
12068
12069 static rtx
12070 ix86_internal_arg_pointer (void)
12071 {
12072 return virtual_incoming_args_rtx;
12073 }
12074
12075 struct scratch_reg {
12076 rtx reg;
12077 bool saved;
12078 };
12079
12080 /* Return a short-lived scratch register for use on function entry.
12081 In 32-bit mode, it is valid only after the registers are saved
12082 in the prologue. This register must be released by means of
12083 release_scratch_register_on_entry once it is dead. */
12084
12085 static void
12086 get_scratch_register_on_entry (struct scratch_reg *sr)
12087 {
12088 int regno;
12089
12090 sr->saved = false;
12091
12092 if (TARGET_64BIT)
12093 {
12094 /* We always use R11 in 64-bit mode. */
12095 regno = R11_REG;
12096 }
12097 else
12098 {
12099 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
12100 bool fastcall_p
12101 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
12102 bool thiscall_p
12103 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
12104 bool static_chain_p = DECL_STATIC_CHAIN (decl);
12105 int regparm = ix86_function_regparm (fntype, decl);
12106 int drap_regno
12107 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
12108
12109 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
12110 for the static chain register. */
12111 if ((regparm < 1 || (fastcall_p && !static_chain_p))
12112 && drap_regno != AX_REG)
12113 regno = AX_REG;
12114 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
12115 for the static chain register. */
12116 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
12117 regno = AX_REG;
12118 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
12119 regno = DX_REG;
12120 /* ecx is the static chain register. */
12121 else if (regparm < 3 && !fastcall_p && !thiscall_p
12122 && !static_chain_p
12123 && drap_regno != CX_REG)
12124 regno = CX_REG;
12125 else if (ix86_save_reg (BX_REG, true, false))
12126 regno = BX_REG;
12127 /* esi is the static chain register. */
12128 else if (!(regparm == 3 && static_chain_p)
12129 && ix86_save_reg (SI_REG, true, false))
12130 regno = SI_REG;
12131 else if (ix86_save_reg (DI_REG, true, false))
12132 regno = DI_REG;
12133 else
12134 {
12135 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
12136 sr->saved = true;
12137 }
12138 }
12139
12140 sr->reg = gen_rtx_REG (Pmode, regno);
12141 if (sr->saved)
12142 {
12143 rtx_insn *insn = emit_insn (gen_push (sr->reg));
12144 RTX_FRAME_RELATED_P (insn) = 1;
12145 }
12146 }
12147
12148 /* Release a scratch register obtained from the preceding function. */
12149
12150 static void
12151 release_scratch_register_on_entry (struct scratch_reg *sr)
12152 {
12153 if (sr->saved)
12154 {
12155 struct machine_function *m = cfun->machine;
12156 rtx x, insn = emit_insn (gen_pop (sr->reg));
12157
12158 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
12159 RTX_FRAME_RELATED_P (insn) = 1;
12160 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
12161 x = gen_rtx_SET (stack_pointer_rtx, x);
12162 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
12163 m->fs.sp_offset -= UNITS_PER_WORD;
12164 }
12165 }
12166
12167 /* Return the probing interval for -fstack-clash-protection. */
12168
12169 static HOST_WIDE_INT
12170 get_probe_interval (void)
12171 {
12172 if (flag_stack_clash_protection)
12173 return (HOST_WIDE_INT_1U
12174 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL));
12175 else
12176 return (HOST_WIDE_INT_1U << STACK_CHECK_PROBE_INTERVAL_EXP);
12177 }
12178
12179 /* Emit code to adjust the stack pointer by SIZE bytes while probing it.
12180
12181 This differs from the next routine in that it tries hard to prevent
12182 attacks that jump the stack guard. Thus it is never allowed to allocate
12183 more than PROBE_INTERVAL bytes of stack space without a suitable
12184 probe. */
12185
12186 static void
12187 ix86_adjust_stack_and_probe_stack_clash (const HOST_WIDE_INT size)
12188 {
12189 struct machine_function *m = cfun->machine;
12190
12191 /* If this function does not statically allocate stack space, then
12192 no probes are needed. */
12193 if (!size)
12194 {
12195 /* However, the allocation of space via pushes for register
12196 saves could be viewed as allocating space, but without the
12197 need to probe. */
12198 if (m->frame.nregs || m->frame.nsseregs || frame_pointer_needed)
12199 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
12200 else
12201 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
12202 return;
12203 }
12204
12205 /* If we are a noreturn function, then we have to consider the
12206 possibility that we're called via a jump rather than a call.
12207
12208 Thus we don't have the implicit probe generated by saving the
12209 return address into the stack at the call. Thus, the stack
12210 pointer could be anywhere in the guard page. The safe thing
12211 to do is emit a probe now.
12212
12213 ?!? This should be revamped to work like aarch64 and s390 where
12214 we track the offset from the most recent probe. Normally that
12215 offset would be zero. For a noreturn function we would reset
12216 it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then
12217 we just probe when we cross PROBE_INTERVAL. */
12218 if (TREE_THIS_VOLATILE (cfun->decl))
12219 {
12220 /* We can safely use any register here since we're just going to push
12221 its value and immediately pop it back. But we do try and avoid
12222 argument passing registers so as not to introduce dependencies in
12223 the pipeline. For 32 bit we use %esi and for 64 bit we use %rax. */
12224 rtx dummy_reg = gen_rtx_REG (word_mode, TARGET_64BIT ? AX_REG : SI_REG);
12225 rtx_insn *insn = emit_insn (gen_push (dummy_reg));
12226 RTX_FRAME_RELATED_P (insn) = 1;
12227 ix86_emit_restore_reg_using_pop (dummy_reg);
12228 emit_insn (gen_blockage ());
12229 }
12230
12231 /* If we allocate less than the size of the guard statically,
12232 then no probing is necessary, but we do need to allocate
12233 the stack. */
12234 if (size < (1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE)))
12235 {
12236 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12237 GEN_INT (-size), -1,
12238 m->fs.cfa_reg == stack_pointer_rtx);
12239 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
12240 return;
12241 }
12242
12243 /* We're allocating a large enough stack frame that we need to
12244 emit probes. Either emit them inline or in a loop depending
12245 on the size. */
12246 HOST_WIDE_INT probe_interval = get_probe_interval ();
12247 if (size <= 4 * probe_interval)
12248 {
12249 HOST_WIDE_INT i;
12250 for (i = probe_interval; i <= size; i += probe_interval)
12251 {
12252 /* Allocate PROBE_INTERVAL bytes. */
12253 rtx insn
12254 = pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12255 GEN_INT (-probe_interval), -1,
12256 m->fs.cfa_reg == stack_pointer_rtx);
12257 add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
12258
12259 /* And probe at *sp. */
12260 emit_stack_probe (stack_pointer_rtx);
12261 emit_insn (gen_blockage ());
12262 }
12263
12264 /* We need to allocate space for the residual, but we do not need
12265 to probe the residual. */
12266 HOST_WIDE_INT residual = (i - probe_interval - size);
12267 if (residual)
12268 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12269 GEN_INT (residual), -1,
12270 m->fs.cfa_reg == stack_pointer_rtx);
12271 dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
12272 }
12273 else
12274 {
12275 struct scratch_reg sr;
12276 get_scratch_register_on_entry (&sr);
12277
12278 /* Step 1: round SIZE down to a multiple of the interval. */
12279 HOST_WIDE_INT rounded_size = size & -probe_interval;
12280
12281 /* Step 2: compute final value of the loop counter. Use lea if
12282 possible. */
12283 rtx addr = plus_constant (Pmode, stack_pointer_rtx, -rounded_size);
12284 rtx insn;
12285 if (address_no_seg_operand (addr, Pmode))
12286 insn = emit_insn (gen_rtx_SET (sr.reg, addr));
12287 else
12288 {
12289 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
12290 insn = emit_insn (gen_rtx_SET (sr.reg,
12291 gen_rtx_PLUS (Pmode, sr.reg,
12292 stack_pointer_rtx)));
12293 }
12294 if (m->fs.cfa_reg == stack_pointer_rtx)
12295 {
12296 add_reg_note (insn, REG_CFA_DEF_CFA,
12297 plus_constant (Pmode, sr.reg,
12298 m->fs.cfa_offset + rounded_size));
12299 RTX_FRAME_RELATED_P (insn) = 1;
12300 }
12301
12302 /* Step 3: the loop. */
12303 rtx size_rtx = GEN_INT (rounded_size);
12304 insn = emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg,
12305 size_rtx));
12306 if (m->fs.cfa_reg == stack_pointer_rtx)
12307 {
12308 m->fs.cfa_offset += rounded_size;
12309 add_reg_note (insn, REG_CFA_DEF_CFA,
12310 plus_constant (Pmode, stack_pointer_rtx,
12311 m->fs.cfa_offset));
12312 RTX_FRAME_RELATED_P (insn) = 1;
12313 }
12314 m->fs.sp_offset += rounded_size;
12315 emit_insn (gen_blockage ());
12316
12317 /* Step 4: adjust SP if we cannot assert at compile-time that SIZE
12318 is equal to ROUNDED_SIZE. */
12319
12320 if (size != rounded_size)
12321 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12322 GEN_INT (rounded_size - size), -1,
12323 m->fs.cfa_reg == stack_pointer_rtx);
12324 dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
12325
12326 release_scratch_register_on_entry (&sr);
12327 }
12328
12329 /* Make sure nothing is scheduled before we are done. */
12330 emit_insn (gen_blockage ());
12331 }
12332
12333 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
12334
12335 static void
12336 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
12337 {
12338 /* We skip the probe for the first interval + a small dope of 4 words and
12339 probe that many bytes past the specified size to maintain a protection
12340 area at the botton of the stack. */
12341 const int dope = 4 * UNITS_PER_WORD;
12342 rtx size_rtx = GEN_INT (size), last;
12343
12344 /* See if we have a constant small number of probes to generate. If so,
12345 that's the easy case. The run-time loop is made up of 9 insns in the
12346 generic case while the compile-time loop is made up of 3+2*(n-1) insns
12347 for n # of intervals. */
12348 if (size <= 4 * get_probe_interval ())
12349 {
12350 HOST_WIDE_INT i, adjust;
12351 bool first_probe = true;
12352
12353 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
12354 values of N from 1 until it exceeds SIZE. If only one probe is
12355 needed, this will not generate any code. Then adjust and probe
12356 to PROBE_INTERVAL + SIZE. */
12357 for (i = get_probe_interval (); i < size; i += get_probe_interval ())
12358 {
12359 if (first_probe)
12360 {
12361 adjust = 2 * get_probe_interval () + dope;
12362 first_probe = false;
12363 }
12364 else
12365 adjust = get_probe_interval ();
12366
12367 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12368 plus_constant (Pmode, stack_pointer_rtx,
12369 -adjust)));
12370 emit_stack_probe (stack_pointer_rtx);
12371 }
12372
12373 if (first_probe)
12374 adjust = size + get_probe_interval () + dope;
12375 else
12376 adjust = size + get_probe_interval () - i;
12377
12378 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12379 plus_constant (Pmode, stack_pointer_rtx,
12380 -adjust)));
12381 emit_stack_probe (stack_pointer_rtx);
12382
12383 /* Adjust back to account for the additional first interval. */
12384 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
12385 plus_constant (Pmode, stack_pointer_rtx,
12386 (get_probe_interval ()
12387 + dope))));
12388 }
12389
12390 /* Otherwise, do the same as above, but in a loop. Note that we must be
12391 extra careful with variables wrapping around because we might be at
12392 the very top (or the very bottom) of the address space and we have
12393 to be able to handle this case properly; in particular, we use an
12394 equality test for the loop condition. */
12395 else
12396 {
12397 HOST_WIDE_INT rounded_size;
12398 struct scratch_reg sr;
12399
12400 get_scratch_register_on_entry (&sr);
12401
12402
12403 /* Step 1: round SIZE to the previous multiple of the interval. */
12404
12405 rounded_size = ROUND_DOWN (size, get_probe_interval ());
12406
12407
12408 /* Step 2: compute initial and final value of the loop counter. */
12409
12410 /* SP = SP_0 + PROBE_INTERVAL. */
12411 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12412 plus_constant (Pmode, stack_pointer_rtx,
12413 - (get_probe_interval () + dope))));
12414
12415 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
12416 if (rounded_size <= (HOST_WIDE_INT_1 << 31))
12417 emit_insn (gen_rtx_SET (sr.reg,
12418 plus_constant (Pmode, stack_pointer_rtx,
12419 -rounded_size)));
12420 else
12421 {
12422 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
12423 emit_insn (gen_rtx_SET (sr.reg,
12424 gen_rtx_PLUS (Pmode, sr.reg,
12425 stack_pointer_rtx)));
12426 }
12427
12428
12429 /* Step 3: the loop
12430
12431 do
12432 {
12433 SP = SP + PROBE_INTERVAL
12434 probe at SP
12435 }
12436 while (SP != LAST_ADDR)
12437
12438 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
12439 values of N from 1 until it is equal to ROUNDED_SIZE. */
12440
12441 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
12442
12443
12444 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
12445 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
12446
12447 if (size != rounded_size)
12448 {
12449 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12450 plus_constant (Pmode, stack_pointer_rtx,
12451 rounded_size - size)));
12452 emit_stack_probe (stack_pointer_rtx);
12453 }
12454
12455 /* Adjust back to account for the additional first interval. */
12456 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
12457 plus_constant (Pmode, stack_pointer_rtx,
12458 (get_probe_interval ()
12459 + dope))));
12460
12461 release_scratch_register_on_entry (&sr);
12462 }
12463
12464 /* Even if the stack pointer isn't the CFA register, we need to correctly
12465 describe the adjustments made to it, in particular differentiate the
12466 frame-related ones from the frame-unrelated ones. */
12467 if (size > 0)
12468 {
12469 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
12470 XVECEXP (expr, 0, 0)
12471 = gen_rtx_SET (stack_pointer_rtx,
12472 plus_constant (Pmode, stack_pointer_rtx, -size));
12473 XVECEXP (expr, 0, 1)
12474 = gen_rtx_SET (stack_pointer_rtx,
12475 plus_constant (Pmode, stack_pointer_rtx,
12476 get_probe_interval () + dope + size));
12477 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
12478 RTX_FRAME_RELATED_P (last) = 1;
12479
12480 cfun->machine->fs.sp_offset += size;
12481 }
12482
12483 /* Make sure nothing is scheduled before we are done. */
12484 emit_insn (gen_blockage ());
12485 }
12486
12487 /* Adjust the stack pointer up to REG while probing it. */
12488
12489 const char *
12490 output_adjust_stack_and_probe (rtx reg)
12491 {
12492 static int labelno = 0;
12493 char loop_lab[32];
12494 rtx xops[2];
12495
12496 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
12497
12498 /* Loop. */
12499 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
12500
12501 /* SP = SP + PROBE_INTERVAL. */
12502 xops[0] = stack_pointer_rtx;
12503 xops[1] = GEN_INT (get_probe_interval ());
12504 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
12505
12506 /* Probe at SP. */
12507 xops[1] = const0_rtx;
12508 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
12509
12510 /* Test if SP == LAST_ADDR. */
12511 xops[0] = stack_pointer_rtx;
12512 xops[1] = reg;
12513 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
12514
12515 /* Branch. */
12516 fputs ("\tjne\t", asm_out_file);
12517 assemble_name_raw (asm_out_file, loop_lab);
12518 fputc ('\n', asm_out_file);
12519
12520 return "";
12521 }
12522
12523 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
12524 inclusive. These are offsets from the current stack pointer. */
12525
12526 static void
12527 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
12528 {
12529 /* See if we have a constant small number of probes to generate. If so,
12530 that's the easy case. The run-time loop is made up of 6 insns in the
12531 generic case while the compile-time loop is made up of n insns for n #
12532 of intervals. */
12533 if (size <= 6 * get_probe_interval ())
12534 {
12535 HOST_WIDE_INT i;
12536
12537 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
12538 it exceeds SIZE. If only one probe is needed, this will not
12539 generate any code. Then probe at FIRST + SIZE. */
12540 for (i = get_probe_interval (); i < size; i += get_probe_interval ())
12541 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
12542 -(first + i)));
12543
12544 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
12545 -(first + size)));
12546 }
12547
12548 /* Otherwise, do the same as above, but in a loop. Note that we must be
12549 extra careful with variables wrapping around because we might be at
12550 the very top (or the very bottom) of the address space and we have
12551 to be able to handle this case properly; in particular, we use an
12552 equality test for the loop condition. */
12553 else
12554 {
12555 HOST_WIDE_INT rounded_size, last;
12556 struct scratch_reg sr;
12557
12558 get_scratch_register_on_entry (&sr);
12559
12560
12561 /* Step 1: round SIZE to the previous multiple of the interval. */
12562
12563 rounded_size = ROUND_DOWN (size, get_probe_interval ());
12564
12565
12566 /* Step 2: compute initial and final value of the loop counter. */
12567
12568 /* TEST_OFFSET = FIRST. */
12569 emit_move_insn (sr.reg, GEN_INT (-first));
12570
12571 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
12572 last = first + rounded_size;
12573
12574
12575 /* Step 3: the loop
12576
12577 do
12578 {
12579 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
12580 probe at TEST_ADDR
12581 }
12582 while (TEST_ADDR != LAST_ADDR)
12583
12584 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
12585 until it is equal to ROUNDED_SIZE. */
12586
12587 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
12588
12589
12590 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
12591 that SIZE is equal to ROUNDED_SIZE. */
12592
12593 if (size != rounded_size)
12594 emit_stack_probe (plus_constant (Pmode,
12595 gen_rtx_PLUS (Pmode,
12596 stack_pointer_rtx,
12597 sr.reg),
12598 rounded_size - size));
12599
12600 release_scratch_register_on_entry (&sr);
12601 }
12602
12603 /* Make sure nothing is scheduled before we are done. */
12604 emit_insn (gen_blockage ());
12605 }
12606
12607 /* Probe a range of stack addresses from REG to END, inclusive. These are
12608 offsets from the current stack pointer. */
12609
12610 const char *
12611 output_probe_stack_range (rtx reg, rtx end)
12612 {
12613 static int labelno = 0;
12614 char loop_lab[32];
12615 rtx xops[3];
12616
12617 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
12618
12619 /* Loop. */
12620 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
12621
12622 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
12623 xops[0] = reg;
12624 xops[1] = GEN_INT (get_probe_interval ());
12625 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
12626
12627 /* Probe at TEST_ADDR. */
12628 xops[0] = stack_pointer_rtx;
12629 xops[1] = reg;
12630 xops[2] = const0_rtx;
12631 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
12632
12633 /* Test if TEST_ADDR == LAST_ADDR. */
12634 xops[0] = reg;
12635 xops[1] = end;
12636 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
12637
12638 /* Branch. */
12639 fputs ("\tjne\t", asm_out_file);
12640 assemble_name_raw (asm_out_file, loop_lab);
12641 fputc ('\n', asm_out_file);
12642
12643 return "";
12644 }
12645
12646 /* Finalize stack_realign_needed and frame_pointer_needed flags, which
12647 will guide prologue/epilogue to be generated in correct form. */
12648
12649 static void
12650 ix86_finalize_stack_frame_flags (void)
12651 {
12652 /* Check if stack realign is really needed after reload, and
12653 stores result in cfun */
12654 unsigned int incoming_stack_boundary
12655 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
12656 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
12657 unsigned int stack_alignment
12658 = (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor
12659 ? crtl->max_used_stack_slot_alignment
12660 : crtl->stack_alignment_needed);
12661 unsigned int stack_realign
12662 = (incoming_stack_boundary < stack_alignment);
12663 bool recompute_frame_layout_p = false;
12664
12665 if (crtl->stack_realign_finalized)
12666 {
12667 /* After stack_realign_needed is finalized, we can't no longer
12668 change it. */
12669 gcc_assert (crtl->stack_realign_needed == stack_realign);
12670 return;
12671 }
12672
12673 /* If the only reason for frame_pointer_needed is that we conservatively
12674 assumed stack realignment might be needed or -fno-omit-frame-pointer
12675 is used, but in the end nothing that needed the stack alignment had
12676 been spilled nor stack access, clear frame_pointer_needed and say we
12677 don't need stack realignment. */
12678 if ((stack_realign || !flag_omit_frame_pointer)
12679 && frame_pointer_needed
12680 && crtl->is_leaf
12681 && crtl->sp_is_unchanging
12682 && !ix86_current_function_calls_tls_descriptor
12683 && !crtl->accesses_prior_frames
12684 && !cfun->calls_alloca
12685 && !crtl->calls_eh_return
12686 /* See ira_setup_eliminable_regset for the rationale. */
12687 && !(STACK_CHECK_MOVING_SP
12688 && flag_stack_check
12689 && flag_exceptions
12690 && cfun->can_throw_non_call_exceptions)
12691 && !ix86_frame_pointer_required ()
12692 && get_frame_size () == 0
12693 && ix86_nsaved_sseregs () == 0
12694 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
12695 {
12696 HARD_REG_SET set_up_by_prologue, prologue_used;
12697 basic_block bb;
12698
12699 CLEAR_HARD_REG_SET (prologue_used);
12700 CLEAR_HARD_REG_SET (set_up_by_prologue);
12701 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
12702 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
12703 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
12704 HARD_FRAME_POINTER_REGNUM);
12705
12706 /* The preferred stack alignment is the minimum stack alignment. */
12707 if (stack_alignment > crtl->preferred_stack_boundary)
12708 stack_alignment = crtl->preferred_stack_boundary;
12709
12710 bool require_stack_frame = false;
12711
12712 FOR_EACH_BB_FN (bb, cfun)
12713 {
12714 rtx_insn *insn;
12715 FOR_BB_INSNS (bb, insn)
12716 if (NONDEBUG_INSN_P (insn)
12717 && requires_stack_frame_p (insn, prologue_used,
12718 set_up_by_prologue))
12719 {
12720 require_stack_frame = true;
12721
12722 if (stack_realign)
12723 {
12724 /* Find the maximum stack alignment. */
12725 subrtx_iterator::array_type array;
12726 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12727 if (MEM_P (*iter)
12728 && (reg_mentioned_p (stack_pointer_rtx,
12729 *iter)
12730 || reg_mentioned_p (frame_pointer_rtx,
12731 *iter)))
12732 {
12733 unsigned int alignment = MEM_ALIGN (*iter);
12734 if (alignment > stack_alignment)
12735 stack_alignment = alignment;
12736 }
12737 }
12738 }
12739 }
12740
12741 if (require_stack_frame)
12742 {
12743 /* Stack frame is required. If stack alignment needed is less
12744 than incoming stack boundary, don't realign stack. */
12745 stack_realign = incoming_stack_boundary < stack_alignment;
12746 if (!stack_realign)
12747 {
12748 crtl->max_used_stack_slot_alignment
12749 = incoming_stack_boundary;
12750 crtl->stack_alignment_needed
12751 = incoming_stack_boundary;
12752 /* Also update preferred_stack_boundary for leaf
12753 functions. */
12754 crtl->preferred_stack_boundary
12755 = incoming_stack_boundary;
12756 }
12757 }
12758 else
12759 {
12760 /* If drap has been set, but it actually isn't live at the
12761 start of the function, there is no reason to set it up. */
12762 if (crtl->drap_reg)
12763 {
12764 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
12765 if (! REGNO_REG_SET_P (DF_LR_IN (bb),
12766 REGNO (crtl->drap_reg)))
12767 {
12768 crtl->drap_reg = NULL_RTX;
12769 crtl->need_drap = false;
12770 }
12771 }
12772 else
12773 cfun->machine->no_drap_save_restore = true;
12774
12775 frame_pointer_needed = false;
12776 stack_realign = false;
12777 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
12778 crtl->stack_alignment_needed = incoming_stack_boundary;
12779 crtl->stack_alignment_estimated = incoming_stack_boundary;
12780 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
12781 crtl->preferred_stack_boundary = incoming_stack_boundary;
12782 df_finish_pass (true);
12783 df_scan_alloc (NULL);
12784 df_scan_blocks ();
12785 df_compute_regs_ever_live (true);
12786 df_analyze ();
12787
12788 if (flag_var_tracking)
12789 {
12790 /* Since frame pointer is no longer available, replace it with
12791 stack pointer - UNITS_PER_WORD in debug insns. */
12792 df_ref ref, next;
12793 for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM);
12794 ref; ref = next)
12795 {
12796 next = DF_REF_NEXT_REG (ref);
12797 if (!DF_REF_INSN_INFO (ref))
12798 continue;
12799
12800 /* Make sure the next ref is for a different instruction,
12801 so that we're not affected by the rescan. */
12802 rtx_insn *insn = DF_REF_INSN (ref);
12803 while (next && DF_REF_INSN (next) == insn)
12804 next = DF_REF_NEXT_REG (next);
12805
12806 if (DEBUG_INSN_P (insn))
12807 {
12808 bool changed = false;
12809 for (; ref != next; ref = DF_REF_NEXT_REG (ref))
12810 {
12811 rtx *loc = DF_REF_LOC (ref);
12812 if (*loc == hard_frame_pointer_rtx)
12813 {
12814 *loc = plus_constant (Pmode,
12815 stack_pointer_rtx,
12816 -UNITS_PER_WORD);
12817 changed = true;
12818 }
12819 }
12820 if (changed)
12821 df_insn_rescan (insn);
12822 }
12823 }
12824 }
12825
12826 recompute_frame_layout_p = true;
12827 }
12828 }
12829
12830 if (crtl->stack_realign_needed != stack_realign)
12831 recompute_frame_layout_p = true;
12832 crtl->stack_realign_needed = stack_realign;
12833 crtl->stack_realign_finalized = true;
12834 if (recompute_frame_layout_p)
12835 ix86_compute_frame_layout ();
12836 }
12837
12838 /* Delete SET_GOT right after entry block if it is allocated to reg. */
12839
12840 static void
12841 ix86_elim_entry_set_got (rtx reg)
12842 {
12843 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
12844 rtx_insn *c_insn = BB_HEAD (bb);
12845 if (!NONDEBUG_INSN_P (c_insn))
12846 c_insn = next_nonnote_nondebug_insn (c_insn);
12847 if (c_insn && NONJUMP_INSN_P (c_insn))
12848 {
12849 rtx pat = PATTERN (c_insn);
12850 if (GET_CODE (pat) == PARALLEL)
12851 {
12852 rtx vec = XVECEXP (pat, 0, 0);
12853 if (GET_CODE (vec) == SET
12854 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
12855 && REGNO (XEXP (vec, 0)) == REGNO (reg))
12856 delete_insn (c_insn);
12857 }
12858 }
12859 }
12860
12861 static rtx
12862 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
12863 {
12864 rtx addr, mem;
12865
12866 if (offset)
12867 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
12868 mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg);
12869 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
12870 }
12871
12872 static inline rtx
12873 gen_frame_load (rtx reg, rtx frame_reg, int offset)
12874 {
12875 return gen_frame_set (reg, frame_reg, offset, false);
12876 }
12877
12878 static inline rtx
12879 gen_frame_store (rtx reg, rtx frame_reg, int offset)
12880 {
12881 return gen_frame_set (reg, frame_reg, offset, true);
12882 }
12883
12884 static void
12885 ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame)
12886 {
12887 struct machine_function *m = cfun->machine;
12888 const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
12889 + m->call_ms2sysv_extra_regs;
12890 rtvec v = rtvec_alloc (ncregs + 1);
12891 unsigned int align, i, vi = 0;
12892 rtx_insn *insn;
12893 rtx sym, addr;
12894 rtx rax = gen_rtx_REG (word_mode, AX_REG);
12895 const struct xlogue_layout &xlogue = xlogue_layout::get_instance ();
12896
12897 /* AL should only be live with sysv_abi. */
12898 gcc_assert (!ix86_eax_live_at_start_p ());
12899 gcc_assert (m->fs.sp_offset >= frame.sse_reg_save_offset);
12900
12901 /* Setup RAX as the stub's base pointer. We use stack_realign_offset rather
12902 we've actually realigned the stack or not. */
12903 align = GET_MODE_ALIGNMENT (V4SFmode);
12904 addr = choose_baseaddr (frame.stack_realign_offset
12905 + xlogue.get_stub_ptr_offset (), &align, AX_REG);
12906 gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
12907
12908 emit_insn (gen_rtx_SET (rax, addr));
12909
12910 /* Get the stub symbol. */
12911 sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP
12912 : XLOGUE_STUB_SAVE);
12913 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
12914
12915 for (i = 0; i < ncregs; ++i)
12916 {
12917 const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
12918 rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode),
12919 r.regno);
12920 RTVEC_ELT (v, vi++) = gen_frame_store (reg, rax, -r.offset);
12921 }
12922
12923 gcc_assert (vi == (unsigned)GET_NUM_ELEM (v));
12924
12925 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v));
12926 RTX_FRAME_RELATED_P (insn) = true;
12927 }
12928
12929 /* Expand the prologue into a bunch of separate insns. */
12930
12931 void
12932 ix86_expand_prologue (void)
12933 {
12934 struct machine_function *m = cfun->machine;
12935 rtx insn, t;
12936 struct ix86_frame frame;
12937 HOST_WIDE_INT allocate;
12938 bool int_registers_saved;
12939 bool sse_registers_saved;
12940 bool save_stub_call_needed;
12941 rtx static_chain = NULL_RTX;
12942
12943 if (ix86_function_naked (current_function_decl))
12944 return;
12945
12946 ix86_finalize_stack_frame_flags ();
12947
12948 /* DRAP should not coexist with stack_realign_fp */
12949 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
12950
12951 memset (&m->fs, 0, sizeof (m->fs));
12952
12953 /* Initialize CFA state for before the prologue. */
12954 m->fs.cfa_reg = stack_pointer_rtx;
12955 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
12956
12957 /* Track SP offset to the CFA. We continue tracking this after we've
12958 swapped the CFA register away from SP. In the case of re-alignment
12959 this is fudged; we're interested to offsets within the local frame. */
12960 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
12961 m->fs.sp_valid = true;
12962 m->fs.sp_realigned = false;
12963
12964 frame = m->frame;
12965
12966 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
12967 {
12968 /* We should have already generated an error for any use of
12969 ms_hook on a nested function. */
12970 gcc_checking_assert (!ix86_static_chain_on_stack);
12971
12972 /* Check if profiling is active and we shall use profiling before
12973 prologue variant. If so sorry. */
12974 if (crtl->profile && flag_fentry != 0)
12975 sorry ("ms_hook_prologue attribute isn%'t compatible "
12976 "with -mfentry for 32-bit");
12977
12978 /* In ix86_asm_output_function_label we emitted:
12979 8b ff movl.s %edi,%edi
12980 55 push %ebp
12981 8b ec movl.s %esp,%ebp
12982
12983 This matches the hookable function prologue in Win32 API
12984 functions in Microsoft Windows XP Service Pack 2 and newer.
12985 Wine uses this to enable Windows apps to hook the Win32 API
12986 functions provided by Wine.
12987
12988 What that means is that we've already set up the frame pointer. */
12989
12990 if (frame_pointer_needed
12991 && !(crtl->drap_reg && crtl->stack_realign_needed))
12992 {
12993 rtx push, mov;
12994
12995 /* We've decided to use the frame pointer already set up.
12996 Describe this to the unwinder by pretending that both
12997 push and mov insns happen right here.
12998
12999 Putting the unwind info here at the end of the ms_hook
13000 is done so that we can make absolutely certain we get
13001 the required byte sequence at the start of the function,
13002 rather than relying on an assembler that can produce
13003 the exact encoding required.
13004
13005 However it does mean (in the unpatched case) that we have
13006 a 1 insn window where the asynchronous unwind info is
13007 incorrect. However, if we placed the unwind info at
13008 its correct location we would have incorrect unwind info
13009 in the patched case. Which is probably all moot since
13010 I don't expect Wine generates dwarf2 unwind info for the
13011 system libraries that use this feature. */
13012
13013 insn = emit_insn (gen_blockage ());
13014
13015 push = gen_push (hard_frame_pointer_rtx);
13016 mov = gen_rtx_SET (hard_frame_pointer_rtx,
13017 stack_pointer_rtx);
13018 RTX_FRAME_RELATED_P (push) = 1;
13019 RTX_FRAME_RELATED_P (mov) = 1;
13020
13021 RTX_FRAME_RELATED_P (insn) = 1;
13022 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
13023 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
13024
13025 /* Note that gen_push incremented m->fs.cfa_offset, even
13026 though we didn't emit the push insn here. */
13027 m->fs.cfa_reg = hard_frame_pointer_rtx;
13028 m->fs.fp_offset = m->fs.cfa_offset;
13029 m->fs.fp_valid = true;
13030 }
13031 else
13032 {
13033 /* The frame pointer is not needed so pop %ebp again.
13034 This leaves us with a pristine state. */
13035 emit_insn (gen_pop (hard_frame_pointer_rtx));
13036 }
13037 }
13038
13039 /* The first insn of a function that accepts its static chain on the
13040 stack is to push the register that would be filled in by a direct
13041 call. This insn will be skipped by the trampoline. */
13042 else if (ix86_static_chain_on_stack)
13043 {
13044 static_chain = ix86_static_chain (cfun->decl, false);
13045 insn = emit_insn (gen_push (static_chain));
13046 emit_insn (gen_blockage ());
13047
13048 /* We don't want to interpret this push insn as a register save,
13049 only as a stack adjustment. The real copy of the register as
13050 a save will be done later, if needed. */
13051 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
13052 t = gen_rtx_SET (stack_pointer_rtx, t);
13053 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
13054 RTX_FRAME_RELATED_P (insn) = 1;
13055 }
13056
13057 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
13058 of DRAP is needed and stack realignment is really needed after reload */
13059 if (stack_realign_drap)
13060 {
13061 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
13062
13063 /* Can't use DRAP in interrupt function. */
13064 if (cfun->machine->func_type != TYPE_NORMAL)
13065 sorry ("Dynamic Realign Argument Pointer (DRAP) not supported "
13066 "in interrupt service routine. This may be worked "
13067 "around by avoiding functions with aggregate return.");
13068
13069 /* Only need to push parameter pointer reg if it is caller saved. */
13070 if (!call_used_regs[REGNO (crtl->drap_reg)])
13071 {
13072 /* Push arg pointer reg */
13073 insn = emit_insn (gen_push (crtl->drap_reg));
13074 RTX_FRAME_RELATED_P (insn) = 1;
13075 }
13076
13077 /* Grab the argument pointer. */
13078 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
13079 insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
13080 RTX_FRAME_RELATED_P (insn) = 1;
13081 m->fs.cfa_reg = crtl->drap_reg;
13082 m->fs.cfa_offset = 0;
13083
13084 /* Align the stack. */
13085 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
13086 stack_pointer_rtx,
13087 GEN_INT (-align_bytes)));
13088 RTX_FRAME_RELATED_P (insn) = 1;
13089
13090 /* Replicate the return address on the stack so that return
13091 address can be reached via (argp - 1) slot. This is needed
13092 to implement macro RETURN_ADDR_RTX and intrinsic function
13093 expand_builtin_return_addr etc. */
13094 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
13095 t = gen_frame_mem (word_mode, t);
13096 insn = emit_insn (gen_push (t));
13097 RTX_FRAME_RELATED_P (insn) = 1;
13098
13099 /* For the purposes of frame and register save area addressing,
13100 we've started over with a new frame. */
13101 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
13102 m->fs.realigned = true;
13103
13104 if (static_chain)
13105 {
13106 /* Replicate static chain on the stack so that static chain
13107 can be reached via (argp - 2) slot. This is needed for
13108 nested function with stack realignment. */
13109 insn = emit_insn (gen_push (static_chain));
13110 RTX_FRAME_RELATED_P (insn) = 1;
13111 }
13112 }
13113
13114 int_registers_saved = (frame.nregs == 0);
13115 sse_registers_saved = (frame.nsseregs == 0);
13116 save_stub_call_needed = (m->call_ms2sysv);
13117 gcc_assert (sse_registers_saved || !save_stub_call_needed);
13118
13119 if (frame_pointer_needed && !m->fs.fp_valid)
13120 {
13121 /* Note: AT&T enter does NOT have reversed args. Enter is probably
13122 slower on all targets. Also sdb didn't like it. */
13123 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
13124 RTX_FRAME_RELATED_P (insn) = 1;
13125
13126 /* Push registers now, before setting the frame pointer
13127 on SEH target. */
13128 if (!int_registers_saved
13129 && TARGET_SEH
13130 && !frame.save_regs_using_mov)
13131 {
13132 ix86_emit_save_regs ();
13133 int_registers_saved = true;
13134 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
13135 }
13136
13137 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
13138 {
13139 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
13140 RTX_FRAME_RELATED_P (insn) = 1;
13141
13142 if (m->fs.cfa_reg == stack_pointer_rtx)
13143 m->fs.cfa_reg = hard_frame_pointer_rtx;
13144 m->fs.fp_offset = m->fs.sp_offset;
13145 m->fs.fp_valid = true;
13146 }
13147 }
13148
13149 if (!int_registers_saved)
13150 {
13151 /* If saving registers via PUSH, do so now. */
13152 if (!frame.save_regs_using_mov)
13153 {
13154 ix86_emit_save_regs ();
13155 int_registers_saved = true;
13156 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
13157 }
13158
13159 /* When using red zone we may start register saving before allocating
13160 the stack frame saving one cycle of the prologue. However, avoid
13161 doing this if we have to probe the stack; at least on x86_64 the
13162 stack probe can turn into a call that clobbers a red zone location. */
13163 else if (ix86_using_red_zone ()
13164 && (! TARGET_STACK_PROBE
13165 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
13166 {
13167 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
13168 int_registers_saved = true;
13169 }
13170 }
13171
13172 if (stack_realign_fp)
13173 {
13174 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
13175 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
13176
13177 /* Record last valid frame pointer offset. */
13178 m->fs.sp_realigned_fp_last = frame.reg_save_offset;
13179
13180 /* The computation of the size of the re-aligned stack frame means
13181 that we must allocate the size of the register save area before
13182 performing the actual alignment. Otherwise we cannot guarantee
13183 that there's enough storage above the realignment point. */
13184 allocate = frame.reg_save_offset - m->fs.sp_offset
13185 + frame.stack_realign_allocate;
13186 if (allocate)
13187 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13188 GEN_INT (-allocate), -1, false);
13189
13190 /* Align the stack. */
13191 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
13192 stack_pointer_rtx,
13193 GEN_INT (-align_bytes)));
13194 m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes);
13195 m->fs.sp_realigned_offset = m->fs.sp_offset
13196 - frame.stack_realign_allocate;
13197 /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset.
13198 Beyond this point, stack access should be done via choose_baseaddr or
13199 by using sp_valid_at and fp_valid_at to determine the correct base
13200 register. Henceforth, any CFA offset should be thought of as logical
13201 and not physical. */
13202 gcc_assert (m->fs.sp_realigned_offset >= m->fs.sp_realigned_fp_last);
13203 gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset);
13204 m->fs.sp_realigned = true;
13205
13206 /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which
13207 is needed to describe where a register is saved using a realigned
13208 stack pointer, so we need to invalidate the stack pointer for that
13209 target. */
13210 if (TARGET_SEH)
13211 m->fs.sp_valid = false;
13212
13213 /* If SP offset is non-immediate after allocation of the stack frame,
13214 then emit SSE saves or stub call prior to allocating the rest of the
13215 stack frame. This is less efficient for the out-of-line stub because
13216 we can't combine allocations across the call barrier, but it's better
13217 than using a scratch register. */
13218 else if (!x86_64_immediate_operand (GEN_INT (frame.stack_pointer_offset
13219 - m->fs.sp_realigned_offset),
13220 Pmode))
13221 {
13222 if (!sse_registers_saved)
13223 {
13224 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
13225 sse_registers_saved = true;
13226 }
13227 else if (save_stub_call_needed)
13228 {
13229 ix86_emit_outlined_ms2sysv_save (frame);
13230 save_stub_call_needed = false;
13231 }
13232 }
13233 }
13234
13235 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
13236
13237 if (flag_stack_usage_info)
13238 {
13239 /* We start to count from ARG_POINTER. */
13240 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
13241
13242 /* If it was realigned, take into account the fake frame. */
13243 if (stack_realign_drap)
13244 {
13245 if (ix86_static_chain_on_stack)
13246 stack_size += UNITS_PER_WORD;
13247
13248 if (!call_used_regs[REGNO (crtl->drap_reg)])
13249 stack_size += UNITS_PER_WORD;
13250
13251 /* This over-estimates by 1 minimal-stack-alignment-unit but
13252 mitigates that by counting in the new return address slot. */
13253 current_function_dynamic_stack_size
13254 += crtl->stack_alignment_needed / BITS_PER_UNIT;
13255 }
13256
13257 current_function_static_stack_size = stack_size;
13258 }
13259
13260 /* On SEH target with very large frame size, allocate an area to save
13261 SSE registers (as the very large allocation won't be described). */
13262 if (TARGET_SEH
13263 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
13264 && !sse_registers_saved)
13265 {
13266 HOST_WIDE_INT sse_size =
13267 frame.sse_reg_save_offset - frame.reg_save_offset;
13268
13269 gcc_assert (int_registers_saved);
13270
13271 /* No need to do stack checking as the area will be immediately
13272 written. */
13273 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13274 GEN_INT (-sse_size), -1,
13275 m->fs.cfa_reg == stack_pointer_rtx);
13276 allocate -= sse_size;
13277 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
13278 sse_registers_saved = true;
13279 }
13280
13281 /* The stack has already been decremented by the instruction calling us
13282 so probe if the size is non-negative to preserve the protection area. */
13283 if (allocate >= 0
13284 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
13285 || flag_stack_clash_protection))
13286 {
13287 /* This assert wants to verify that integer registers were saved
13288 prior to probing. This is necessary when probing may be implemented
13289 as a function call (Windows). It is not necessary for stack clash
13290 protection probing. */
13291 if (!flag_stack_clash_protection)
13292 gcc_assert (int_registers_saved);
13293
13294 if (flag_stack_clash_protection)
13295 {
13296 ix86_adjust_stack_and_probe_stack_clash (allocate);
13297 allocate = 0;
13298 }
13299 else if (STACK_CHECK_MOVING_SP)
13300 {
13301 if (!(crtl->is_leaf && !cfun->calls_alloca
13302 && allocate <= get_probe_interval ()))
13303 {
13304 ix86_adjust_stack_and_probe (allocate);
13305 allocate = 0;
13306 }
13307 }
13308 else
13309 {
13310 HOST_WIDE_INT size = allocate;
13311
13312 if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000))
13313 size = 0x80000000 - get_stack_check_protect () - 1;
13314
13315 if (TARGET_STACK_PROBE)
13316 {
13317 if (crtl->is_leaf && !cfun->calls_alloca)
13318 {
13319 if (size > get_probe_interval ())
13320 ix86_emit_probe_stack_range (0, size);
13321 }
13322 else
13323 ix86_emit_probe_stack_range (0,
13324 size + get_stack_check_protect ());
13325 }
13326 else
13327 {
13328 if (crtl->is_leaf && !cfun->calls_alloca)
13329 {
13330 if (size > get_probe_interval ()
13331 && size > get_stack_check_protect ())
13332 ix86_emit_probe_stack_range (get_stack_check_protect (),
13333 size - get_stack_check_protect ());
13334 }
13335 else
13336 ix86_emit_probe_stack_range (get_stack_check_protect (), size);
13337 }
13338 }
13339 }
13340
13341 if (allocate == 0)
13342 ;
13343 else if (!ix86_target_stack_probe ()
13344 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
13345 {
13346 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13347 GEN_INT (-allocate), -1,
13348 m->fs.cfa_reg == stack_pointer_rtx);
13349 }
13350 else
13351 {
13352 rtx eax = gen_rtx_REG (Pmode, AX_REG);
13353 rtx r10 = NULL;
13354 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
13355 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
13356 bool eax_live = ix86_eax_live_at_start_p ();
13357 bool r10_live = false;
13358
13359 if (TARGET_64BIT)
13360 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
13361
13362 if (eax_live)
13363 {
13364 insn = emit_insn (gen_push (eax));
13365 allocate -= UNITS_PER_WORD;
13366 /* Note that SEH directives need to continue tracking the stack
13367 pointer even after the frame pointer has been set up. */
13368 if (sp_is_cfa_reg || TARGET_SEH)
13369 {
13370 if (sp_is_cfa_reg)
13371 m->fs.cfa_offset += UNITS_PER_WORD;
13372 RTX_FRAME_RELATED_P (insn) = 1;
13373 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
13374 gen_rtx_SET (stack_pointer_rtx,
13375 plus_constant (Pmode, stack_pointer_rtx,
13376 -UNITS_PER_WORD)));
13377 }
13378 }
13379
13380 if (r10_live)
13381 {
13382 r10 = gen_rtx_REG (Pmode, R10_REG);
13383 insn = emit_insn (gen_push (r10));
13384 allocate -= UNITS_PER_WORD;
13385 if (sp_is_cfa_reg || TARGET_SEH)
13386 {
13387 if (sp_is_cfa_reg)
13388 m->fs.cfa_offset += UNITS_PER_WORD;
13389 RTX_FRAME_RELATED_P (insn) = 1;
13390 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
13391 gen_rtx_SET (stack_pointer_rtx,
13392 plus_constant (Pmode, stack_pointer_rtx,
13393 -UNITS_PER_WORD)));
13394 }
13395 }
13396
13397 emit_move_insn (eax, GEN_INT (allocate));
13398 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
13399
13400 /* Use the fact that AX still contains ALLOCATE. */
13401 adjust_stack_insn = (Pmode == DImode
13402 ? gen_pro_epilogue_adjust_stack_di_sub
13403 : gen_pro_epilogue_adjust_stack_si_sub);
13404
13405 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
13406 stack_pointer_rtx, eax));
13407
13408 if (sp_is_cfa_reg || TARGET_SEH)
13409 {
13410 if (sp_is_cfa_reg)
13411 m->fs.cfa_offset += allocate;
13412 RTX_FRAME_RELATED_P (insn) = 1;
13413 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
13414 gen_rtx_SET (stack_pointer_rtx,
13415 plus_constant (Pmode, stack_pointer_rtx,
13416 -allocate)));
13417 }
13418 m->fs.sp_offset += allocate;
13419
13420 /* Use stack_pointer_rtx for relative addressing so that code
13421 works for realigned stack, too. */
13422 if (r10_live && eax_live)
13423 {
13424 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
13425 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
13426 gen_frame_mem (word_mode, t));
13427 t = plus_constant (Pmode, t, UNITS_PER_WORD);
13428 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
13429 gen_frame_mem (word_mode, t));
13430 }
13431 else if (eax_live || r10_live)
13432 {
13433 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
13434 emit_move_insn (gen_rtx_REG (word_mode,
13435 (eax_live ? AX_REG : R10_REG)),
13436 gen_frame_mem (word_mode, t));
13437 }
13438 }
13439 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
13440
13441 /* If we havn't already set up the frame pointer, do so now. */
13442 if (frame_pointer_needed && !m->fs.fp_valid)
13443 {
13444 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
13445 GEN_INT (frame.stack_pointer_offset
13446 - frame.hard_frame_pointer_offset));
13447 insn = emit_insn (insn);
13448 RTX_FRAME_RELATED_P (insn) = 1;
13449 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
13450
13451 if (m->fs.cfa_reg == stack_pointer_rtx)
13452 m->fs.cfa_reg = hard_frame_pointer_rtx;
13453 m->fs.fp_offset = frame.hard_frame_pointer_offset;
13454 m->fs.fp_valid = true;
13455 }
13456
13457 if (!int_registers_saved)
13458 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
13459 if (!sse_registers_saved)
13460 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
13461 else if (save_stub_call_needed)
13462 ix86_emit_outlined_ms2sysv_save (frame);
13463
13464 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
13465 in PROLOGUE. */
13466 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
13467 {
13468 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
13469 insn = emit_insn (gen_set_got (pic));
13470 RTX_FRAME_RELATED_P (insn) = 1;
13471 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
13472 emit_insn (gen_prologue_use (pic));
13473 /* Deleting already emmitted SET_GOT if exist and allocated to
13474 REAL_PIC_OFFSET_TABLE_REGNUM. */
13475 ix86_elim_entry_set_got (pic);
13476 }
13477
13478 if (crtl->drap_reg && !crtl->stack_realign_needed)
13479 {
13480 /* vDRAP is setup but after reload it turns out stack realign
13481 isn't necessary, here we will emit prologue to setup DRAP
13482 without stack realign adjustment */
13483 t = choose_baseaddr (0, NULL);
13484 emit_insn (gen_rtx_SET (crtl->drap_reg, t));
13485 }
13486
13487 /* Prevent instructions from being scheduled into register save push
13488 sequence when access to the redzone area is done through frame pointer.
13489 The offset between the frame pointer and the stack pointer is calculated
13490 relative to the value of the stack pointer at the end of the function
13491 prologue, and moving instructions that access redzone area via frame
13492 pointer inside push sequence violates this assumption. */
13493 if (frame_pointer_needed && frame.red_zone_size)
13494 emit_insn (gen_memory_blockage ());
13495
13496 /* SEH requires that the prologue end within 256 bytes of the start of
13497 the function. Prevent instruction schedules that would extend that.
13498 Further, prevent alloca modifications to the stack pointer from being
13499 combined with prologue modifications. */
13500 if (TARGET_SEH)
13501 emit_insn (gen_prologue_use (stack_pointer_rtx));
13502 }
13503
13504 /* Emit code to restore REG using a POP insn. */
13505
13506 static void
13507 ix86_emit_restore_reg_using_pop (rtx reg)
13508 {
13509 struct machine_function *m = cfun->machine;
13510 rtx_insn *insn = emit_insn (gen_pop (reg));
13511
13512 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
13513 m->fs.sp_offset -= UNITS_PER_WORD;
13514
13515 if (m->fs.cfa_reg == crtl->drap_reg
13516 && REGNO (reg) == REGNO (crtl->drap_reg))
13517 {
13518 /* Previously we'd represented the CFA as an expression
13519 like *(%ebp - 8). We've just popped that value from
13520 the stack, which means we need to reset the CFA to
13521 the drap register. This will remain until we restore
13522 the stack pointer. */
13523 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
13524 RTX_FRAME_RELATED_P (insn) = 1;
13525
13526 /* This means that the DRAP register is valid for addressing too. */
13527 m->fs.drap_valid = true;
13528 return;
13529 }
13530
13531 if (m->fs.cfa_reg == stack_pointer_rtx)
13532 {
13533 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
13534 x = gen_rtx_SET (stack_pointer_rtx, x);
13535 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
13536 RTX_FRAME_RELATED_P (insn) = 1;
13537
13538 m->fs.cfa_offset -= UNITS_PER_WORD;
13539 }
13540
13541 /* When the frame pointer is the CFA, and we pop it, we are
13542 swapping back to the stack pointer as the CFA. This happens
13543 for stack frames that don't allocate other data, so we assume
13544 the stack pointer is now pointing at the return address, i.e.
13545 the function entry state, which makes the offset be 1 word. */
13546 if (reg == hard_frame_pointer_rtx)
13547 {
13548 m->fs.fp_valid = false;
13549 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
13550 {
13551 m->fs.cfa_reg = stack_pointer_rtx;
13552 m->fs.cfa_offset -= UNITS_PER_WORD;
13553
13554 add_reg_note (insn, REG_CFA_DEF_CFA,
13555 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13556 GEN_INT (m->fs.cfa_offset)));
13557 RTX_FRAME_RELATED_P (insn) = 1;
13558 }
13559 }
13560 }
13561
13562 /* Emit code to restore saved registers using POP insns. */
13563
13564 static void
13565 ix86_emit_restore_regs_using_pop (void)
13566 {
13567 unsigned int regno;
13568
13569 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13570 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
13571 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
13572 }
13573
13574 /* Emit code and notes for the LEAVE instruction. If insn is non-null,
13575 omits the emit and only attaches the notes. */
13576
13577 static void
13578 ix86_emit_leave (rtx_insn *insn)
13579 {
13580 struct machine_function *m = cfun->machine;
13581 if (!insn)
13582 insn = emit_insn (ix86_gen_leave ());
13583
13584 ix86_add_queued_cfa_restore_notes (insn);
13585
13586 gcc_assert (m->fs.fp_valid);
13587 m->fs.sp_valid = true;
13588 m->fs.sp_realigned = false;
13589 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
13590 m->fs.fp_valid = false;
13591
13592 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
13593 {
13594 m->fs.cfa_reg = stack_pointer_rtx;
13595 m->fs.cfa_offset = m->fs.sp_offset;
13596
13597 add_reg_note (insn, REG_CFA_DEF_CFA,
13598 plus_constant (Pmode, stack_pointer_rtx,
13599 m->fs.sp_offset));
13600 RTX_FRAME_RELATED_P (insn) = 1;
13601 }
13602 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
13603 m->fs.fp_offset);
13604 }
13605
13606 /* Emit code to restore saved registers using MOV insns.
13607 First register is restored from CFA - CFA_OFFSET. */
13608 static void
13609 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
13610 bool maybe_eh_return)
13611 {
13612 struct machine_function *m = cfun->machine;
13613 unsigned int regno;
13614
13615 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13616 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
13617 {
13618 rtx reg = gen_rtx_REG (word_mode, regno);
13619 rtx mem;
13620 rtx_insn *insn;
13621
13622 mem = choose_baseaddr (cfa_offset, NULL);
13623 mem = gen_frame_mem (word_mode, mem);
13624 insn = emit_move_insn (reg, mem);
13625
13626 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
13627 {
13628 /* Previously we'd represented the CFA as an expression
13629 like *(%ebp - 8). We've just popped that value from
13630 the stack, which means we need to reset the CFA to
13631 the drap register. This will remain until we restore
13632 the stack pointer. */
13633 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
13634 RTX_FRAME_RELATED_P (insn) = 1;
13635
13636 /* This means that the DRAP register is valid for addressing. */
13637 m->fs.drap_valid = true;
13638 }
13639 else
13640 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
13641
13642 cfa_offset -= UNITS_PER_WORD;
13643 }
13644 }
13645
13646 /* Emit code to restore saved registers using MOV insns.
13647 First register is restored from CFA - CFA_OFFSET. */
13648 static void
13649 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
13650 bool maybe_eh_return)
13651 {
13652 unsigned int regno;
13653
13654 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13655 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
13656 {
13657 rtx reg = gen_rtx_REG (V4SFmode, regno);
13658 rtx mem;
13659 unsigned int align = GET_MODE_ALIGNMENT (V4SFmode);
13660
13661 mem = choose_baseaddr (cfa_offset, &align);
13662 mem = gen_rtx_MEM (V4SFmode, mem);
13663
13664 /* The location aligment depends upon the base register. */
13665 align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align);
13666 gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
13667 set_mem_align (mem, align);
13668 emit_insn (gen_rtx_SET (reg, mem));
13669
13670 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
13671
13672 cfa_offset -= GET_MODE_SIZE (V4SFmode);
13673 }
13674 }
13675
13676 static void
13677 ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame,
13678 bool use_call, int style)
13679 {
13680 struct machine_function *m = cfun->machine;
13681 const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
13682 + m->call_ms2sysv_extra_regs;
13683 rtvec v;
13684 unsigned int elems_needed, align, i, vi = 0;
13685 rtx_insn *insn;
13686 rtx sym, tmp;
13687 rtx rsi = gen_rtx_REG (word_mode, SI_REG);
13688 rtx r10 = NULL_RTX;
13689 const struct xlogue_layout &xlogue = xlogue_layout::get_instance ();
13690 HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset ();
13691 HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset;
13692 rtx rsi_frame_load = NULL_RTX;
13693 HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1;
13694 enum xlogue_stub stub;
13695
13696 gcc_assert (!m->fs.fp_valid || frame_pointer_needed);
13697
13698 /* If using a realigned stack, we should never start with padding. */
13699 gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ());
13700
13701 /* Setup RSI as the stub's base pointer. */
13702 align = GET_MODE_ALIGNMENT (V4SFmode);
13703 tmp = choose_baseaddr (rsi_offset, &align, SI_REG);
13704 gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
13705
13706 emit_insn (gen_rtx_SET (rsi, tmp));
13707
13708 /* Get a symbol for the stub. */
13709 if (frame_pointer_needed)
13710 stub = use_call ? XLOGUE_STUB_RESTORE_HFP
13711 : XLOGUE_STUB_RESTORE_HFP_TAIL;
13712 else
13713 stub = use_call ? XLOGUE_STUB_RESTORE
13714 : XLOGUE_STUB_RESTORE_TAIL;
13715 sym = xlogue.get_stub_rtx (stub);
13716
13717 elems_needed = ncregs;
13718 if (use_call)
13719 elems_needed += 1;
13720 else
13721 elems_needed += frame_pointer_needed ? 5 : 3;
13722 v = rtvec_alloc (elems_needed);
13723
13724 /* We call the epilogue stub when we need to pop incoming args or we are
13725 doing a sibling call as the tail. Otherwise, we will emit a jmp to the
13726 epilogue stub and it is the tail-call. */
13727 if (use_call)
13728 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
13729 else
13730 {
13731 RTVEC_ELT (v, vi++) = ret_rtx;
13732 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
13733 if (frame_pointer_needed)
13734 {
13735 rtx rbp = gen_rtx_REG (DImode, BP_REG);
13736 gcc_assert (m->fs.fp_valid);
13737 gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx);
13738
13739 tmp = gen_rtx_PLUS (DImode, rbp, GEN_INT (8));
13740 RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, tmp);
13741 RTVEC_ELT (v, vi++) = gen_rtx_SET (rbp, gen_rtx_MEM (DImode, rbp));
13742 tmp = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
13743 RTVEC_ELT (v, vi++) = gen_rtx_CLOBBER (VOIDmode, tmp);
13744 }
13745 else
13746 {
13747 /* If no hard frame pointer, we set R10 to the SP restore value. */
13748 gcc_assert (!m->fs.fp_valid);
13749 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
13750 gcc_assert (m->fs.sp_valid);
13751
13752 r10 = gen_rtx_REG (DImode, R10_REG);
13753 tmp = gen_rtx_PLUS (Pmode, rsi, GEN_INT (stub_ptr_offset));
13754 emit_insn (gen_rtx_SET (r10, tmp));
13755
13756 RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10);
13757 }
13758 }
13759
13760 /* Generate frame load insns and restore notes. */
13761 for (i = 0; i < ncregs; ++i)
13762 {
13763 const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
13764 machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode;
13765 rtx reg, frame_load;
13766
13767 reg = gen_rtx_REG (mode, r.regno);
13768 frame_load = gen_frame_load (reg, rsi, r.offset);
13769
13770 /* Save RSI frame load insn & note to add last. */
13771 if (r.regno == SI_REG)
13772 {
13773 gcc_assert (!rsi_frame_load);
13774 rsi_frame_load = frame_load;
13775 rsi_restore_offset = r.offset;
13776 }
13777 else
13778 {
13779 RTVEC_ELT (v, vi++) = frame_load;
13780 ix86_add_cfa_restore_note (NULL, reg, r.offset);
13781 }
13782 }
13783
13784 /* Add RSI frame load & restore note at the end. */
13785 gcc_assert (rsi_frame_load);
13786 gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1);
13787 RTVEC_ELT (v, vi++) = rsi_frame_load;
13788 ix86_add_cfa_restore_note (NULL, gen_rtx_REG (DImode, SI_REG),
13789 rsi_restore_offset);
13790
13791 /* Finally, for tail-call w/o a hard frame pointer, set SP to R10. */
13792 if (!use_call && !frame_pointer_needed)
13793 {
13794 gcc_assert (m->fs.sp_valid);
13795 gcc_assert (!m->fs.sp_realigned);
13796
13797 /* At this point, R10 should point to frame.stack_realign_offset. */
13798 if (m->fs.cfa_reg == stack_pointer_rtx)
13799 m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset;
13800 m->fs.sp_offset = frame.stack_realign_offset;
13801 }
13802
13803 gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v));
13804 tmp = gen_rtx_PARALLEL (VOIDmode, v);
13805 if (use_call)
13806 insn = emit_insn (tmp);
13807 else
13808 {
13809 insn = emit_jump_insn (tmp);
13810 JUMP_LABEL (insn) = ret_rtx;
13811
13812 if (frame_pointer_needed)
13813 ix86_emit_leave (insn);
13814 else
13815 {
13816 /* Need CFA adjust note. */
13817 tmp = gen_rtx_SET (stack_pointer_rtx, r10);
13818 add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp);
13819 }
13820 }
13821
13822 RTX_FRAME_RELATED_P (insn) = true;
13823 ix86_add_queued_cfa_restore_notes (insn);
13824
13825 /* If we're not doing a tail-call, we need to adjust the stack. */
13826 if (use_call && m->fs.sp_valid)
13827 {
13828 HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset;
13829 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13830 GEN_INT (dealloc), style,
13831 m->fs.cfa_reg == stack_pointer_rtx);
13832 }
13833 }
13834
13835 /* Restore function stack, frame, and registers. */
13836
13837 void
13838 ix86_expand_epilogue (int style)
13839 {
13840 struct machine_function *m = cfun->machine;
13841 struct machine_frame_state frame_state_save = m->fs;
13842 struct ix86_frame frame;
13843 bool restore_regs_via_mov;
13844 bool using_drap;
13845 bool restore_stub_is_tail = false;
13846
13847 if (ix86_function_naked (current_function_decl))
13848 {
13849 /* The program should not reach this point. */
13850 emit_insn (gen_ud2 ());
13851 return;
13852 }
13853
13854 ix86_finalize_stack_frame_flags ();
13855 frame = m->frame;
13856
13857 m->fs.sp_realigned = stack_realign_fp;
13858 m->fs.sp_valid = stack_realign_fp
13859 || !frame_pointer_needed
13860 || crtl->sp_is_unchanging;
13861 gcc_assert (!m->fs.sp_valid
13862 || m->fs.sp_offset == frame.stack_pointer_offset);
13863
13864 /* The FP must be valid if the frame pointer is present. */
13865 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
13866 gcc_assert (!m->fs.fp_valid
13867 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
13868
13869 /* We must have *some* valid pointer to the stack frame. */
13870 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
13871
13872 /* The DRAP is never valid at this point. */
13873 gcc_assert (!m->fs.drap_valid);
13874
13875 /* See the comment about red zone and frame
13876 pointer usage in ix86_expand_prologue. */
13877 if (frame_pointer_needed && frame.red_zone_size)
13878 emit_insn (gen_memory_blockage ());
13879
13880 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
13881 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
13882
13883 /* Determine the CFA offset of the end of the red-zone. */
13884 m->fs.red_zone_offset = 0;
13885 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
13886 {
13887 /* The red-zone begins below return address and error code in
13888 exception handler. */
13889 m->fs.red_zone_offset = RED_ZONE_SIZE + INCOMING_FRAME_SP_OFFSET;
13890
13891 /* When the register save area is in the aligned portion of
13892 the stack, determine the maximum runtime displacement that
13893 matches up with the aligned frame. */
13894 if (stack_realign_drap)
13895 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
13896 + UNITS_PER_WORD);
13897 }
13898
13899 /* Special care must be taken for the normal return case of a function
13900 using eh_return: the eax and edx registers are marked as saved, but
13901 not restored along this path. Adjust the save location to match. */
13902 if (crtl->calls_eh_return && style != 2)
13903 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
13904
13905 /* EH_RETURN requires the use of moves to function properly. */
13906 if (crtl->calls_eh_return)
13907 restore_regs_via_mov = true;
13908 /* SEH requires the use of pops to identify the epilogue. */
13909 else if (TARGET_SEH)
13910 restore_regs_via_mov = false;
13911 /* If we're only restoring one register and sp cannot be used then
13912 using a move instruction to restore the register since it's
13913 less work than reloading sp and popping the register. */
13914 else if (!sp_valid_at (frame.hfp_save_offset) && frame.nregs <= 1)
13915 restore_regs_via_mov = true;
13916 else if (TARGET_EPILOGUE_USING_MOVE
13917 && cfun->machine->use_fast_prologue_epilogue
13918 && (frame.nregs > 1
13919 || m->fs.sp_offset != frame.reg_save_offset))
13920 restore_regs_via_mov = true;
13921 else if (frame_pointer_needed
13922 && !frame.nregs
13923 && m->fs.sp_offset != frame.reg_save_offset)
13924 restore_regs_via_mov = true;
13925 else if (frame_pointer_needed
13926 && TARGET_USE_LEAVE
13927 && cfun->machine->use_fast_prologue_epilogue
13928 && frame.nregs == 1)
13929 restore_regs_via_mov = true;
13930 else
13931 restore_regs_via_mov = false;
13932
13933 if (restore_regs_via_mov || frame.nsseregs)
13934 {
13935 /* Ensure that the entire register save area is addressable via
13936 the stack pointer, if we will restore SSE regs via sp. */
13937 if (TARGET_64BIT
13938 && m->fs.sp_offset > 0x7fffffff
13939 && sp_valid_at (frame.stack_realign_offset + 1)
13940 && (frame.nsseregs + frame.nregs) != 0)
13941 {
13942 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13943 GEN_INT (m->fs.sp_offset
13944 - frame.sse_reg_save_offset),
13945 style,
13946 m->fs.cfa_reg == stack_pointer_rtx);
13947 }
13948 }
13949
13950 /* If there are any SSE registers to restore, then we have to do it
13951 via moves, since there's obviously no pop for SSE regs. */
13952 if (frame.nsseregs)
13953 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
13954 style == 2);
13955
13956 if (m->call_ms2sysv)
13957 {
13958 int pop_incoming_args = crtl->args.pops_args && crtl->args.size;
13959
13960 /* We cannot use a tail-call for the stub if:
13961 1. We have to pop incoming args,
13962 2. We have additional int regs to restore, or
13963 3. A sibling call will be the tail-call, or
13964 4. We are emitting an eh_return_internal epilogue.
13965
13966 TODO: Item 4 has not yet tested!
13967
13968 If any of the above are true, we will call the stub rather than
13969 jump to it. */
13970 restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1);
13971 ix86_emit_outlined_ms2sysv_restore (frame, !restore_stub_is_tail, style);
13972 }
13973
13974 /* If using out-of-line stub that is a tail-call, then...*/
13975 if (m->call_ms2sysv && restore_stub_is_tail)
13976 {
13977 /* TODO: parinoid tests. (remove eventually) */
13978 gcc_assert (m->fs.sp_valid);
13979 gcc_assert (!m->fs.sp_realigned);
13980 gcc_assert (!m->fs.fp_valid);
13981 gcc_assert (!m->fs.realigned);
13982 gcc_assert (m->fs.sp_offset == UNITS_PER_WORD);
13983 gcc_assert (!crtl->drap_reg);
13984 gcc_assert (!frame.nregs);
13985 }
13986 else if (restore_regs_via_mov)
13987 {
13988 rtx t;
13989
13990 if (frame.nregs)
13991 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
13992
13993 /* eh_return epilogues need %ecx added to the stack pointer. */
13994 if (style == 2)
13995 {
13996 rtx sa = EH_RETURN_STACKADJ_RTX;
13997 rtx_insn *insn;
13998
13999 /* %ecx can't be used for both DRAP register and eh_return. */
14000 if (crtl->drap_reg)
14001 gcc_assert (REGNO (crtl->drap_reg) != CX_REG);
14002
14003 /* regparm nested functions don't work with eh_return. */
14004 gcc_assert (!ix86_static_chain_on_stack);
14005
14006 if (frame_pointer_needed)
14007 {
14008 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
14009 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
14010 emit_insn (gen_rtx_SET (sa, t));
14011
14012 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
14013 insn = emit_move_insn (hard_frame_pointer_rtx, t);
14014
14015 /* Note that we use SA as a temporary CFA, as the return
14016 address is at the proper place relative to it. We
14017 pretend this happens at the FP restore insn because
14018 prior to this insn the FP would be stored at the wrong
14019 offset relative to SA, and after this insn we have no
14020 other reasonable register to use for the CFA. We don't
14021 bother resetting the CFA to the SP for the duration of
14022 the return insn, unless the control flow instrumentation
14023 is done. In this case the SP is used later and we have
14024 to reset CFA to SP. */
14025 add_reg_note (insn, REG_CFA_DEF_CFA,
14026 plus_constant (Pmode, sa, UNITS_PER_WORD));
14027 ix86_add_queued_cfa_restore_notes (insn);
14028 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
14029 RTX_FRAME_RELATED_P (insn) = 1;
14030
14031 m->fs.cfa_reg = sa;
14032 m->fs.cfa_offset = UNITS_PER_WORD;
14033 m->fs.fp_valid = false;
14034
14035 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
14036 const0_rtx, style,
14037 flag_cf_protection);
14038 }
14039 else
14040 {
14041 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
14042 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
14043 insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
14044 ix86_add_queued_cfa_restore_notes (insn);
14045
14046 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
14047 if (m->fs.cfa_offset != UNITS_PER_WORD)
14048 {
14049 m->fs.cfa_offset = UNITS_PER_WORD;
14050 add_reg_note (insn, REG_CFA_DEF_CFA,
14051 plus_constant (Pmode, stack_pointer_rtx,
14052 UNITS_PER_WORD));
14053 RTX_FRAME_RELATED_P (insn) = 1;
14054 }
14055 }
14056 m->fs.sp_offset = UNITS_PER_WORD;
14057 m->fs.sp_valid = true;
14058 m->fs.sp_realigned = false;
14059 }
14060 }
14061 else
14062 {
14063 /* SEH requires that the function end with (1) a stack adjustment
14064 if necessary, (2) a sequence of pops, and (3) a return or
14065 jump instruction. Prevent insns from the function body from
14066 being scheduled into this sequence. */
14067 if (TARGET_SEH)
14068 {
14069 /* Prevent a catch region from being adjacent to the standard
14070 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
14071 several other flags that would be interesting to test are
14072 not yet set up. */
14073 if (flag_non_call_exceptions)
14074 emit_insn (gen_nops (const1_rtx));
14075 else
14076 emit_insn (gen_blockage ());
14077 }
14078
14079 /* First step is to deallocate the stack frame so that we can
14080 pop the registers. If the stack pointer was realigned, it needs
14081 to be restored now. Also do it on SEH target for very large
14082 frame as the emitted instructions aren't allowed by the ABI
14083 in epilogues. */
14084 if (!m->fs.sp_valid || m->fs.sp_realigned
14085 || (TARGET_SEH
14086 && (m->fs.sp_offset - frame.reg_save_offset
14087 >= SEH_MAX_FRAME_SIZE)))
14088 {
14089 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
14090 GEN_INT (m->fs.fp_offset
14091 - frame.reg_save_offset),
14092 style, false);
14093 }
14094 else if (m->fs.sp_offset != frame.reg_save_offset)
14095 {
14096 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
14097 GEN_INT (m->fs.sp_offset
14098 - frame.reg_save_offset),
14099 style,
14100 m->fs.cfa_reg == stack_pointer_rtx);
14101 }
14102
14103 ix86_emit_restore_regs_using_pop ();
14104 }
14105
14106 /* If we used a stack pointer and haven't already got rid of it,
14107 then do so now. */
14108 if (m->fs.fp_valid)
14109 {
14110 /* If the stack pointer is valid and pointing at the frame
14111 pointer store address, then we only need a pop. */
14112 if (sp_valid_at (frame.hfp_save_offset)
14113 && m->fs.sp_offset == frame.hfp_save_offset)
14114 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
14115 /* Leave results in shorter dependency chains on CPUs that are
14116 able to grok it fast. */
14117 else if (TARGET_USE_LEAVE
14118 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
14119 || !cfun->machine->use_fast_prologue_epilogue)
14120 ix86_emit_leave (NULL);
14121 else
14122 {
14123 pro_epilogue_adjust_stack (stack_pointer_rtx,
14124 hard_frame_pointer_rtx,
14125 const0_rtx, style, !using_drap);
14126 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
14127 }
14128 }
14129
14130 if (using_drap)
14131 {
14132 int param_ptr_offset = UNITS_PER_WORD;
14133 rtx_insn *insn;
14134
14135 gcc_assert (stack_realign_drap);
14136
14137 if (ix86_static_chain_on_stack)
14138 param_ptr_offset += UNITS_PER_WORD;
14139 if (!call_used_regs[REGNO (crtl->drap_reg)])
14140 param_ptr_offset += UNITS_PER_WORD;
14141
14142 insn = emit_insn (gen_rtx_SET
14143 (stack_pointer_rtx,
14144 gen_rtx_PLUS (Pmode,
14145 crtl->drap_reg,
14146 GEN_INT (-param_ptr_offset))));
14147 m->fs.cfa_reg = stack_pointer_rtx;
14148 m->fs.cfa_offset = param_ptr_offset;
14149 m->fs.sp_offset = param_ptr_offset;
14150 m->fs.realigned = false;
14151
14152 add_reg_note (insn, REG_CFA_DEF_CFA,
14153 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14154 GEN_INT (param_ptr_offset)));
14155 RTX_FRAME_RELATED_P (insn) = 1;
14156
14157 if (!call_used_regs[REGNO (crtl->drap_reg)])
14158 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
14159 }
14160
14161 /* At this point the stack pointer must be valid, and we must have
14162 restored all of the registers. We may not have deallocated the
14163 entire stack frame. We've delayed this until now because it may
14164 be possible to merge the local stack deallocation with the
14165 deallocation forced by ix86_static_chain_on_stack. */
14166 gcc_assert (m->fs.sp_valid);
14167 gcc_assert (!m->fs.sp_realigned);
14168 gcc_assert (!m->fs.fp_valid);
14169 gcc_assert (!m->fs.realigned);
14170 if (m->fs.sp_offset != UNITS_PER_WORD)
14171 {
14172 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
14173 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
14174 style, true);
14175 }
14176 else
14177 ix86_add_queued_cfa_restore_notes (get_last_insn ());
14178
14179 /* Sibcall epilogues don't want a return instruction. */
14180 if (style == 0)
14181 {
14182 m->fs = frame_state_save;
14183 return;
14184 }
14185
14186 if (cfun->machine->func_type != TYPE_NORMAL)
14187 emit_jump_insn (gen_interrupt_return ());
14188 else if (crtl->args.pops_args && crtl->args.size)
14189 {
14190 rtx popc = GEN_INT (crtl->args.pops_args);
14191
14192 /* i386 can only pop 64K bytes. If asked to pop more, pop return
14193 address, do explicit add, and jump indirectly to the caller. */
14194
14195 if (crtl->args.pops_args >= 65536)
14196 {
14197 rtx ecx = gen_rtx_REG (SImode, CX_REG);
14198 rtx_insn *insn;
14199
14200 /* There is no "pascal" calling convention in any 64bit ABI. */
14201 gcc_assert (!TARGET_64BIT);
14202
14203 insn = emit_insn (gen_pop (ecx));
14204 m->fs.cfa_offset -= UNITS_PER_WORD;
14205 m->fs.sp_offset -= UNITS_PER_WORD;
14206
14207 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
14208 x = gen_rtx_SET (stack_pointer_rtx, x);
14209 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
14210 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
14211 RTX_FRAME_RELATED_P (insn) = 1;
14212
14213 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
14214 popc, -1, true);
14215 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
14216 }
14217 else
14218 emit_jump_insn (gen_simple_return_pop_internal (popc));
14219 }
14220 else if (!m->call_ms2sysv || !restore_stub_is_tail)
14221 {
14222 /* In case of return from EH a simple return cannot be used
14223 as a return address will be compared with a shadow stack
14224 return address. Use indirect jump instead. */
14225 if (style == 2 && flag_cf_protection)
14226 {
14227 /* Register used in indirect jump must be in word_mode. But
14228 Pmode may not be the same as word_mode for x32. */
14229 rtx ecx = gen_rtx_REG (word_mode, CX_REG);
14230 rtx_insn *insn;
14231
14232 insn = emit_insn (gen_pop (ecx));
14233 m->fs.cfa_offset -= UNITS_PER_WORD;
14234 m->fs.sp_offset -= UNITS_PER_WORD;
14235
14236 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
14237 x = gen_rtx_SET (stack_pointer_rtx, x);
14238 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
14239 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
14240 RTX_FRAME_RELATED_P (insn) = 1;
14241
14242 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
14243 }
14244 else
14245 emit_jump_insn (gen_simple_return_internal ());
14246 }
14247
14248 /* Restore the state back to the state from the prologue,
14249 so that it's correct for the next epilogue. */
14250 m->fs = frame_state_save;
14251 }
14252
14253 /* Reset from the function's potential modifications. */
14254
14255 static void
14256 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED)
14257 {
14258 if (pic_offset_table_rtx
14259 && !ix86_use_pseudo_pic_reg ())
14260 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
14261
14262 if (TARGET_MACHO)
14263 {
14264 rtx_insn *insn = get_last_insn ();
14265 rtx_insn *deleted_debug_label = NULL;
14266
14267 /* Mach-O doesn't support labels at the end of objects, so if
14268 it looks like we might want one, take special action.
14269 First, collect any sequence of deleted debug labels. */
14270 while (insn
14271 && NOTE_P (insn)
14272 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
14273 {
14274 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
14275 notes only, instead set their CODE_LABEL_NUMBER to -1,
14276 otherwise there would be code generation differences
14277 in between -g and -g0. */
14278 if (NOTE_P (insn) && NOTE_KIND (insn)
14279 == NOTE_INSN_DELETED_DEBUG_LABEL)
14280 deleted_debug_label = insn;
14281 insn = PREV_INSN (insn);
14282 }
14283
14284 /* If we have:
14285 label:
14286 barrier
14287 then this needs to be detected, so skip past the barrier. */
14288
14289 if (insn && BARRIER_P (insn))
14290 insn = PREV_INSN (insn);
14291
14292 /* Up to now we've only seen notes or barriers. */
14293 if (insn)
14294 {
14295 if (LABEL_P (insn)
14296 || (NOTE_P (insn)
14297 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
14298 /* Trailing label. */
14299 fputs ("\tnop\n", file);
14300 else if (cfun && ! cfun->is_thunk)
14301 {
14302 /* See if we have a completely empty function body, skipping
14303 the special case of the picbase thunk emitted as asm. */
14304 while (insn && ! INSN_P (insn))
14305 insn = PREV_INSN (insn);
14306 /* If we don't find any insns, we've got an empty function body;
14307 I.e. completely empty - without a return or branch. This is
14308 taken as the case where a function body has been removed
14309 because it contains an inline __builtin_unreachable(). GCC
14310 declares that reaching __builtin_unreachable() means UB so
14311 we're not obliged to do anything special; however, we want
14312 non-zero-sized function bodies. To meet this, and help the
14313 user out, let's trap the case. */
14314 if (insn == NULL)
14315 fputs ("\tud2\n", file);
14316 }
14317 }
14318 else if (deleted_debug_label)
14319 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
14320 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
14321 CODE_LABEL_NUMBER (insn) = -1;
14322 }
14323 }
14324
14325 /* Return a scratch register to use in the split stack prologue. The
14326 split stack prologue is used for -fsplit-stack. It is the first
14327 instructions in the function, even before the regular prologue.
14328 The scratch register can be any caller-saved register which is not
14329 used for parameters or for the static chain. */
14330
14331 static unsigned int
14332 split_stack_prologue_scratch_regno (void)
14333 {
14334 if (TARGET_64BIT)
14335 return R11_REG;
14336 else
14337 {
14338 bool is_fastcall, is_thiscall;
14339 int regparm;
14340
14341 is_fastcall = (lookup_attribute ("fastcall",
14342 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
14343 != NULL);
14344 is_thiscall = (lookup_attribute ("thiscall",
14345 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
14346 != NULL);
14347 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
14348
14349 if (is_fastcall)
14350 {
14351 if (DECL_STATIC_CHAIN (cfun->decl))
14352 {
14353 sorry ("-fsplit-stack does not support fastcall with "
14354 "nested function");
14355 return INVALID_REGNUM;
14356 }
14357 return AX_REG;
14358 }
14359 else if (is_thiscall)
14360 {
14361 if (!DECL_STATIC_CHAIN (cfun->decl))
14362 return DX_REG;
14363 return AX_REG;
14364 }
14365 else if (regparm < 3)
14366 {
14367 if (!DECL_STATIC_CHAIN (cfun->decl))
14368 return CX_REG;
14369 else
14370 {
14371 if (regparm >= 2)
14372 {
14373 sorry ("-fsplit-stack does not support 2 register "
14374 "parameters for a nested function");
14375 return INVALID_REGNUM;
14376 }
14377 return DX_REG;
14378 }
14379 }
14380 else
14381 {
14382 /* FIXME: We could make this work by pushing a register
14383 around the addition and comparison. */
14384 sorry ("-fsplit-stack does not support 3 register parameters");
14385 return INVALID_REGNUM;
14386 }
14387 }
14388 }
14389
14390 /* A SYMBOL_REF for the function which allocates new stackspace for
14391 -fsplit-stack. */
14392
14393 static GTY(()) rtx split_stack_fn;
14394
14395 /* A SYMBOL_REF for the more stack function when using the large
14396 model. */
14397
14398 static GTY(()) rtx split_stack_fn_large;
14399
14400 /* Return location of the stack guard value in the TLS block. */
14401
14402 rtx
14403 ix86_split_stack_guard (void)
14404 {
14405 int offset;
14406 addr_space_t as = DEFAULT_TLS_SEG_REG;
14407 rtx r;
14408
14409 gcc_assert (flag_split_stack);
14410
14411 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14412 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
14413 #else
14414 gcc_unreachable ();
14415 #endif
14416
14417 r = GEN_INT (offset);
14418 r = gen_const_mem (Pmode, r);
14419 set_mem_addr_space (r, as);
14420
14421 return r;
14422 }
14423
14424 /* Handle -fsplit-stack. These are the first instructions in the
14425 function, even before the regular prologue. */
14426
14427 void
14428 ix86_expand_split_stack_prologue (void)
14429 {
14430 HOST_WIDE_INT allocate;
14431 unsigned HOST_WIDE_INT args_size;
14432 rtx_code_label *label;
14433 rtx limit, current, allocate_rtx, call_insn, call_fusage;
14434 rtx scratch_reg = NULL_RTX;
14435 rtx_code_label *varargs_label = NULL;
14436 rtx fn;
14437
14438 gcc_assert (flag_split_stack && reload_completed);
14439
14440 ix86_finalize_stack_frame_flags ();
14441 struct ix86_frame &frame = cfun->machine->frame;
14442 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
14443
14444 /* This is the label we will branch to if we have enough stack
14445 space. We expect the basic block reordering pass to reverse this
14446 branch if optimizing, so that we branch in the unlikely case. */
14447 label = gen_label_rtx ();
14448
14449 /* We need to compare the stack pointer minus the frame size with
14450 the stack boundary in the TCB. The stack boundary always gives
14451 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
14452 can compare directly. Otherwise we need to do an addition. */
14453
14454 limit = ix86_split_stack_guard ();
14455
14456 if (allocate < SPLIT_STACK_AVAILABLE)
14457 current = stack_pointer_rtx;
14458 else
14459 {
14460 unsigned int scratch_regno;
14461 rtx offset;
14462
14463 /* We need a scratch register to hold the stack pointer minus
14464 the required frame size. Since this is the very start of the
14465 function, the scratch register can be any caller-saved
14466 register which is not used for parameters. */
14467 offset = GEN_INT (- allocate);
14468 scratch_regno = split_stack_prologue_scratch_regno ();
14469 if (scratch_regno == INVALID_REGNUM)
14470 return;
14471 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
14472 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
14473 {
14474 /* We don't use ix86_gen_add3 in this case because it will
14475 want to split to lea, but when not optimizing the insn
14476 will not be split after this point. */
14477 emit_insn (gen_rtx_SET (scratch_reg,
14478 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14479 offset)));
14480 }
14481 else
14482 {
14483 emit_move_insn (scratch_reg, offset);
14484 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
14485 stack_pointer_rtx));
14486 }
14487 current = scratch_reg;
14488 }
14489
14490 ix86_expand_branch (GEU, current, limit, label);
14491 rtx_insn *jump_insn = get_last_insn ();
14492 JUMP_LABEL (jump_insn) = label;
14493
14494 /* Mark the jump as very likely to be taken. */
14495 add_reg_br_prob_note (jump_insn, profile_probability::very_likely ());
14496
14497 if (split_stack_fn == NULL_RTX)
14498 {
14499 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
14500 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
14501 }
14502 fn = split_stack_fn;
14503
14504 /* Get more stack space. We pass in the desired stack space and the
14505 size of the arguments to copy to the new stack. In 32-bit mode
14506 we push the parameters; __morestack will return on a new stack
14507 anyhow. In 64-bit mode we pass the parameters in r10 and
14508 r11. */
14509 allocate_rtx = GEN_INT (allocate);
14510 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
14511 call_fusage = NULL_RTX;
14512 rtx pop = NULL_RTX;
14513 if (TARGET_64BIT)
14514 {
14515 rtx reg10, reg11;
14516
14517 reg10 = gen_rtx_REG (Pmode, R10_REG);
14518 reg11 = gen_rtx_REG (Pmode, R11_REG);
14519
14520 /* If this function uses a static chain, it will be in %r10.
14521 Preserve it across the call to __morestack. */
14522 if (DECL_STATIC_CHAIN (cfun->decl))
14523 {
14524 rtx rax;
14525
14526 rax = gen_rtx_REG (word_mode, AX_REG);
14527 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
14528 use_reg (&call_fusage, rax);
14529 }
14530
14531 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
14532 && !TARGET_PECOFF)
14533 {
14534 HOST_WIDE_INT argval;
14535
14536 gcc_assert (Pmode == DImode);
14537 /* When using the large model we need to load the address
14538 into a register, and we've run out of registers. So we
14539 switch to a different calling convention, and we call a
14540 different function: __morestack_large. We pass the
14541 argument size in the upper 32 bits of r10 and pass the
14542 frame size in the lower 32 bits. */
14543 gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate);
14544 gcc_assert ((args_size & 0xffffffff) == args_size);
14545
14546 if (split_stack_fn_large == NULL_RTX)
14547 {
14548 split_stack_fn_large =
14549 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
14550 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
14551 }
14552 if (ix86_cmodel == CM_LARGE_PIC)
14553 {
14554 rtx_code_label *label;
14555 rtx x;
14556
14557 label = gen_label_rtx ();
14558 emit_label (label);
14559 LABEL_PRESERVE_P (label) = 1;
14560 emit_insn (gen_set_rip_rex64 (reg10, label));
14561 emit_insn (gen_set_got_offset_rex64 (reg11, label));
14562 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
14563 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
14564 UNSPEC_GOT);
14565 x = gen_rtx_CONST (Pmode, x);
14566 emit_move_insn (reg11, x);
14567 x = gen_rtx_PLUS (Pmode, reg10, reg11);
14568 x = gen_const_mem (Pmode, x);
14569 emit_move_insn (reg11, x);
14570 }
14571 else
14572 emit_move_insn (reg11, split_stack_fn_large);
14573
14574 fn = reg11;
14575
14576 argval = ((args_size << 16) << 16) + allocate;
14577 emit_move_insn (reg10, GEN_INT (argval));
14578 }
14579 else
14580 {
14581 emit_move_insn (reg10, allocate_rtx);
14582 emit_move_insn (reg11, GEN_INT (args_size));
14583 use_reg (&call_fusage, reg11);
14584 }
14585
14586 use_reg (&call_fusage, reg10);
14587 }
14588 else
14589 {
14590 rtx_insn *insn = emit_insn (gen_push (GEN_INT (args_size)));
14591 add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (UNITS_PER_WORD));
14592 insn = emit_insn (gen_push (allocate_rtx));
14593 add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (2 * UNITS_PER_WORD));
14594 pop = GEN_INT (2 * UNITS_PER_WORD);
14595 }
14596 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
14597 GEN_INT (UNITS_PER_WORD), constm1_rtx,
14598 pop, false);
14599 add_function_usage_to (call_insn, call_fusage);
14600 if (!TARGET_64BIT)
14601 add_reg_note (call_insn, REG_ARGS_SIZE, GEN_INT (0));
14602 /* Indicate that this function can't jump to non-local gotos. */
14603 make_reg_eh_region_note_nothrow_nononlocal (as_a <rtx_insn *> (call_insn));
14604
14605 /* In order to make call/return prediction work right, we now need
14606 to execute a return instruction. See
14607 libgcc/config/i386/morestack.S for the details on how this works.
14608
14609 For flow purposes gcc must not see this as a return
14610 instruction--we need control flow to continue at the subsequent
14611 label. Therefore, we use an unspec. */
14612 gcc_assert (crtl->args.pops_args < 65536);
14613 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
14614
14615 /* If we are in 64-bit mode and this function uses a static chain,
14616 we saved %r10 in %rax before calling _morestack. */
14617 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
14618 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
14619 gen_rtx_REG (word_mode, AX_REG));
14620
14621 /* If this function calls va_start, we need to store a pointer to
14622 the arguments on the old stack, because they may not have been
14623 all copied to the new stack. At this point the old stack can be
14624 found at the frame pointer value used by __morestack, because
14625 __morestack has set that up before calling back to us. Here we
14626 store that pointer in a scratch register, and in
14627 ix86_expand_prologue we store the scratch register in a stack
14628 slot. */
14629 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
14630 {
14631 unsigned int scratch_regno;
14632 rtx frame_reg;
14633 int words;
14634
14635 scratch_regno = split_stack_prologue_scratch_regno ();
14636 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
14637 frame_reg = gen_rtx_REG (Pmode, BP_REG);
14638
14639 /* 64-bit:
14640 fp -> old fp value
14641 return address within this function
14642 return address of caller of this function
14643 stack arguments
14644 So we add three words to get to the stack arguments.
14645
14646 32-bit:
14647 fp -> old fp value
14648 return address within this function
14649 first argument to __morestack
14650 second argument to __morestack
14651 return address of caller of this function
14652 stack arguments
14653 So we add five words to get to the stack arguments.
14654 */
14655 words = TARGET_64BIT ? 3 : 5;
14656 emit_insn (gen_rtx_SET (scratch_reg,
14657 gen_rtx_PLUS (Pmode, frame_reg,
14658 GEN_INT (words * UNITS_PER_WORD))));
14659
14660 varargs_label = gen_label_rtx ();
14661 emit_jump_insn (gen_jump (varargs_label));
14662 JUMP_LABEL (get_last_insn ()) = varargs_label;
14663
14664 emit_barrier ();
14665 }
14666
14667 emit_label (label);
14668 LABEL_NUSES (label) = 1;
14669
14670 /* If this function calls va_start, we now have to set the scratch
14671 register for the case where we do not call __morestack. In this
14672 case we need to set it based on the stack pointer. */
14673 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
14674 {
14675 emit_insn (gen_rtx_SET (scratch_reg,
14676 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14677 GEN_INT (UNITS_PER_WORD))));
14678
14679 emit_label (varargs_label);
14680 LABEL_NUSES (varargs_label) = 1;
14681 }
14682 }
14683
14684 /* We may have to tell the dataflow pass that the split stack prologue
14685 is initializing a scratch register. */
14686
14687 static void
14688 ix86_live_on_entry (bitmap regs)
14689 {
14690 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
14691 {
14692 gcc_assert (flag_split_stack);
14693 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
14694 }
14695 }
14696 \f
14697 /* Extract the parts of an RTL expression that is a valid memory address
14698 for an instruction. Return 0 if the structure of the address is
14699 grossly off. Return -1 if the address contains ASHIFT, so it is not
14700 strictly valid, but still used for computing length of lea instruction. */
14701
14702 int
14703 ix86_decompose_address (rtx addr, struct ix86_address *out)
14704 {
14705 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
14706 rtx base_reg, index_reg;
14707 HOST_WIDE_INT scale = 1;
14708 rtx scale_rtx = NULL_RTX;
14709 rtx tmp;
14710 int retval = 1;
14711 addr_space_t seg = ADDR_SPACE_GENERIC;
14712
14713 /* Allow zero-extended SImode addresses,
14714 they will be emitted with addr32 prefix. */
14715 if (TARGET_64BIT && GET_MODE (addr) == DImode)
14716 {
14717 if (GET_CODE (addr) == ZERO_EXTEND
14718 && GET_MODE (XEXP (addr, 0)) == SImode)
14719 {
14720 addr = XEXP (addr, 0);
14721 if (CONST_INT_P (addr))
14722 return 0;
14723 }
14724 else if (GET_CODE (addr) == AND
14725 && const_32bit_mask (XEXP (addr, 1), DImode))
14726 {
14727 addr = lowpart_subreg (SImode, XEXP (addr, 0), DImode);
14728 if (addr == NULL_RTX)
14729 return 0;
14730
14731 if (CONST_INT_P (addr))
14732 return 0;
14733 }
14734 }
14735
14736 /* Allow SImode subregs of DImode addresses,
14737 they will be emitted with addr32 prefix. */
14738 if (TARGET_64BIT && GET_MODE (addr) == SImode)
14739 {
14740 if (SUBREG_P (addr)
14741 && GET_MODE (SUBREG_REG (addr)) == DImode)
14742 {
14743 addr = SUBREG_REG (addr);
14744 if (CONST_INT_P (addr))
14745 return 0;
14746 }
14747 }
14748
14749 if (REG_P (addr))
14750 base = addr;
14751 else if (SUBREG_P (addr))
14752 {
14753 if (REG_P (SUBREG_REG (addr)))
14754 base = addr;
14755 else
14756 return 0;
14757 }
14758 else if (GET_CODE (addr) == PLUS)
14759 {
14760 rtx addends[4], op;
14761 int n = 0, i;
14762
14763 op = addr;
14764 do
14765 {
14766 if (n >= 4)
14767 return 0;
14768 addends[n++] = XEXP (op, 1);
14769 op = XEXP (op, 0);
14770 }
14771 while (GET_CODE (op) == PLUS);
14772 if (n >= 4)
14773 return 0;
14774 addends[n] = op;
14775
14776 for (i = n; i >= 0; --i)
14777 {
14778 op = addends[i];
14779 switch (GET_CODE (op))
14780 {
14781 case MULT:
14782 if (index)
14783 return 0;
14784 index = XEXP (op, 0);
14785 scale_rtx = XEXP (op, 1);
14786 break;
14787
14788 case ASHIFT:
14789 if (index)
14790 return 0;
14791 index = XEXP (op, 0);
14792 tmp = XEXP (op, 1);
14793 if (!CONST_INT_P (tmp))
14794 return 0;
14795 scale = INTVAL (tmp);
14796 if ((unsigned HOST_WIDE_INT) scale > 3)
14797 return 0;
14798 scale = 1 << scale;
14799 break;
14800
14801 case ZERO_EXTEND:
14802 op = XEXP (op, 0);
14803 if (GET_CODE (op) != UNSPEC)
14804 return 0;
14805 /* FALLTHRU */
14806
14807 case UNSPEC:
14808 if (XINT (op, 1) == UNSPEC_TP
14809 && TARGET_TLS_DIRECT_SEG_REFS
14810 && seg == ADDR_SPACE_GENERIC)
14811 seg = DEFAULT_TLS_SEG_REG;
14812 else
14813 return 0;
14814 break;
14815
14816 case SUBREG:
14817 if (!REG_P (SUBREG_REG (op)))
14818 return 0;
14819 /* FALLTHRU */
14820
14821 case REG:
14822 if (!base)
14823 base = op;
14824 else if (!index)
14825 index = op;
14826 else
14827 return 0;
14828 break;
14829
14830 case CONST:
14831 case CONST_INT:
14832 case SYMBOL_REF:
14833 case LABEL_REF:
14834 if (disp)
14835 return 0;
14836 disp = op;
14837 break;
14838
14839 default:
14840 return 0;
14841 }
14842 }
14843 }
14844 else if (GET_CODE (addr) == MULT)
14845 {
14846 index = XEXP (addr, 0); /* index*scale */
14847 scale_rtx = XEXP (addr, 1);
14848 }
14849 else if (GET_CODE (addr) == ASHIFT)
14850 {
14851 /* We're called for lea too, which implements ashift on occasion. */
14852 index = XEXP (addr, 0);
14853 tmp = XEXP (addr, 1);
14854 if (!CONST_INT_P (tmp))
14855 return 0;
14856 scale = INTVAL (tmp);
14857 if ((unsigned HOST_WIDE_INT) scale > 3)
14858 return 0;
14859 scale = 1 << scale;
14860 retval = -1;
14861 }
14862 else
14863 disp = addr; /* displacement */
14864
14865 if (index)
14866 {
14867 if (REG_P (index))
14868 ;
14869 else if (SUBREG_P (index)
14870 && REG_P (SUBREG_REG (index)))
14871 ;
14872 else
14873 return 0;
14874 }
14875
14876 /* Extract the integral value of scale. */
14877 if (scale_rtx)
14878 {
14879 if (!CONST_INT_P (scale_rtx))
14880 return 0;
14881 scale = INTVAL (scale_rtx);
14882 }
14883
14884 base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base;
14885 index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index;
14886
14887 /* Avoid useless 0 displacement. */
14888 if (disp == const0_rtx && (base || index))
14889 disp = NULL_RTX;
14890
14891 /* Allow arg pointer and stack pointer as index if there is not scaling. */
14892 if (base_reg && index_reg && scale == 1
14893 && (REGNO (index_reg) == ARG_POINTER_REGNUM
14894 || REGNO (index_reg) == FRAME_POINTER_REGNUM
14895 || REGNO (index_reg) == SP_REG))
14896 {
14897 std::swap (base, index);
14898 std::swap (base_reg, index_reg);
14899 }
14900
14901 /* Special case: %ebp cannot be encoded as a base without a displacement.
14902 Similarly %r13. */
14903 if (!disp && base_reg
14904 && (REGNO (base_reg) == ARG_POINTER_REGNUM
14905 || REGNO (base_reg) == FRAME_POINTER_REGNUM
14906 || REGNO (base_reg) == BP_REG
14907 || REGNO (base_reg) == R13_REG))
14908 disp = const0_rtx;
14909
14910 /* Special case: on K6, [%esi] makes the instruction vector decoded.
14911 Avoid this by transforming to [%esi+0].
14912 Reload calls address legitimization without cfun defined, so we need
14913 to test cfun for being non-NULL. */
14914 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
14915 && base_reg && !index_reg && !disp
14916 && REGNO (base_reg) == SI_REG)
14917 disp = const0_rtx;
14918
14919 /* Special case: encode reg+reg instead of reg*2. */
14920 if (!base && index && scale == 2)
14921 base = index, base_reg = index_reg, scale = 1;
14922
14923 /* Special case: scaling cannot be encoded without base or displacement. */
14924 if (!base && !disp && index && scale != 1)
14925 disp = const0_rtx;
14926
14927 out->base = base;
14928 out->index = index;
14929 out->disp = disp;
14930 out->scale = scale;
14931 out->seg = seg;
14932
14933 return retval;
14934 }
14935 \f
14936 /* Return cost of the memory address x.
14937 For i386, it is better to use a complex address than let gcc copy
14938 the address into a reg and make a new pseudo. But not if the address
14939 requires to two regs - that would mean more pseudos with longer
14940 lifetimes. */
14941 static int
14942 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
14943 {
14944 struct ix86_address parts;
14945 int cost = 1;
14946 int ok = ix86_decompose_address (x, &parts);
14947
14948 gcc_assert (ok);
14949
14950 if (parts.base && SUBREG_P (parts.base))
14951 parts.base = SUBREG_REG (parts.base);
14952 if (parts.index && SUBREG_P (parts.index))
14953 parts.index = SUBREG_REG (parts.index);
14954
14955 /* Attempt to minimize number of registers in the address by increasing
14956 address cost for each used register. We don't increase address cost
14957 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
14958 is not invariant itself it most likely means that base or index is not
14959 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
14960 which is not profitable for x86. */
14961 if (parts.base
14962 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
14963 && (current_pass->type == GIMPLE_PASS
14964 || !pic_offset_table_rtx
14965 || !REG_P (parts.base)
14966 || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
14967 cost++;
14968
14969 if (parts.index
14970 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
14971 && (current_pass->type == GIMPLE_PASS
14972 || !pic_offset_table_rtx
14973 || !REG_P (parts.index)
14974 || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
14975 cost++;
14976
14977 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
14978 since it's predecode logic can't detect the length of instructions
14979 and it degenerates to vector decoded. Increase cost of such
14980 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
14981 to split such addresses or even refuse such addresses at all.
14982
14983 Following addressing modes are affected:
14984 [base+scale*index]
14985 [scale*index+disp]
14986 [base+index]
14987
14988 The first and last case may be avoidable by explicitly coding the zero in
14989 memory address, but I don't have AMD-K6 machine handy to check this
14990 theory. */
14991
14992 if (TARGET_K6
14993 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
14994 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
14995 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
14996 cost += 10;
14997
14998 return cost;
14999 }
15000 \f
15001 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
15002 this is used for to form addresses to local data when -fPIC is in
15003 use. */
15004
15005 static bool
15006 darwin_local_data_pic (rtx disp)
15007 {
15008 return (GET_CODE (disp) == UNSPEC
15009 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
15010 }
15011
15012 /* True if operand X should be loaded from GOT. */
15013
15014 bool
15015 ix86_force_load_from_GOT_p (rtx x)
15016 {
15017 return ((TARGET_64BIT || HAVE_AS_IX86_GOT32X)
15018 && !TARGET_PECOFF && !TARGET_MACHO
15019 && !flag_plt && !flag_pic
15020 && ix86_cmodel != CM_LARGE
15021 && GET_CODE (x) == SYMBOL_REF
15022 && SYMBOL_REF_FUNCTION_P (x)
15023 && !SYMBOL_REF_LOCAL_P (x));
15024 }
15025
15026 /* Determine if a given RTX is a valid constant. We already know this
15027 satisfies CONSTANT_P. */
15028
15029 static bool
15030 ix86_legitimate_constant_p (machine_mode mode, rtx x)
15031 {
15032 /* Pointer bounds constants are not valid. */
15033 if (POINTER_BOUNDS_MODE_P (GET_MODE (x)))
15034 return false;
15035
15036 switch (GET_CODE (x))
15037 {
15038 case CONST:
15039 x = XEXP (x, 0);
15040
15041 if (GET_CODE (x) == PLUS)
15042 {
15043 if (!CONST_INT_P (XEXP (x, 1)))
15044 return false;
15045 x = XEXP (x, 0);
15046 }
15047
15048 if (TARGET_MACHO && darwin_local_data_pic (x))
15049 return true;
15050
15051 /* Only some unspecs are valid as "constants". */
15052 if (GET_CODE (x) == UNSPEC)
15053 switch (XINT (x, 1))
15054 {
15055 case UNSPEC_GOT:
15056 case UNSPEC_GOTOFF:
15057 case UNSPEC_PLTOFF:
15058 return TARGET_64BIT;
15059 case UNSPEC_TPOFF:
15060 case UNSPEC_NTPOFF:
15061 x = XVECEXP (x, 0, 0);
15062 return (GET_CODE (x) == SYMBOL_REF
15063 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
15064 case UNSPEC_DTPOFF:
15065 x = XVECEXP (x, 0, 0);
15066 return (GET_CODE (x) == SYMBOL_REF
15067 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
15068 default:
15069 return false;
15070 }
15071
15072 /* We must have drilled down to a symbol. */
15073 if (GET_CODE (x) == LABEL_REF)
15074 return true;
15075 if (GET_CODE (x) != SYMBOL_REF)
15076 return false;
15077 /* FALLTHRU */
15078
15079 case SYMBOL_REF:
15080 /* TLS symbols are never valid. */
15081 if (SYMBOL_REF_TLS_MODEL (x))
15082 return false;
15083
15084 /* DLLIMPORT symbols are never valid. */
15085 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15086 && SYMBOL_REF_DLLIMPORT_P (x))
15087 return false;
15088
15089 #if TARGET_MACHO
15090 /* mdynamic-no-pic */
15091 if (MACHO_DYNAMIC_NO_PIC_P)
15092 return machopic_symbol_defined_p (x);
15093 #endif
15094
15095 /* External function address should be loaded
15096 via the GOT slot to avoid PLT. */
15097 if (ix86_force_load_from_GOT_p (x))
15098 return false;
15099
15100 break;
15101
15102 CASE_CONST_SCALAR_INT:
15103 switch (mode)
15104 {
15105 case E_TImode:
15106 if (TARGET_64BIT)
15107 return true;
15108 /* FALLTHRU */
15109 case E_OImode:
15110 case E_XImode:
15111 if (!standard_sse_constant_p (x, mode))
15112 return false;
15113 default:
15114 break;
15115 }
15116 break;
15117
15118 case CONST_VECTOR:
15119 if (!standard_sse_constant_p (x, mode))
15120 return false;
15121
15122 default:
15123 break;
15124 }
15125
15126 /* Otherwise we handle everything else in the move patterns. */
15127 return true;
15128 }
15129
15130 /* Determine if it's legal to put X into the constant pool. This
15131 is not possible for the address of thread-local symbols, which
15132 is checked above. */
15133
15134 static bool
15135 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
15136 {
15137 /* We can put any immediate constant in memory. */
15138 switch (GET_CODE (x))
15139 {
15140 CASE_CONST_ANY:
15141 return false;
15142
15143 default:
15144 break;
15145 }
15146
15147 return !ix86_legitimate_constant_p (mode, x);
15148 }
15149
15150 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
15151 otherwise zero. */
15152
15153 static bool
15154 is_imported_p (rtx x)
15155 {
15156 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
15157 || GET_CODE (x) != SYMBOL_REF)
15158 return false;
15159
15160 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
15161 }
15162
15163
15164 /* Nonzero if the constant value X is a legitimate general operand
15165 when generating PIC code. It is given that flag_pic is on and
15166 that X satisfies CONSTANT_P. */
15167
15168 bool
15169 legitimate_pic_operand_p (rtx x)
15170 {
15171 rtx inner;
15172
15173 switch (GET_CODE (x))
15174 {
15175 case CONST:
15176 inner = XEXP (x, 0);
15177 if (GET_CODE (inner) == PLUS
15178 && CONST_INT_P (XEXP (inner, 1)))
15179 inner = XEXP (inner, 0);
15180
15181 /* Only some unspecs are valid as "constants". */
15182 if (GET_CODE (inner) == UNSPEC)
15183 switch (XINT (inner, 1))
15184 {
15185 case UNSPEC_GOT:
15186 case UNSPEC_GOTOFF:
15187 case UNSPEC_PLTOFF:
15188 return TARGET_64BIT;
15189 case UNSPEC_TPOFF:
15190 x = XVECEXP (inner, 0, 0);
15191 return (GET_CODE (x) == SYMBOL_REF
15192 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
15193 case UNSPEC_MACHOPIC_OFFSET:
15194 return legitimate_pic_address_disp_p (x);
15195 default:
15196 return false;
15197 }
15198 /* FALLTHRU */
15199
15200 case SYMBOL_REF:
15201 case LABEL_REF:
15202 return legitimate_pic_address_disp_p (x);
15203
15204 default:
15205 return true;
15206 }
15207 }
15208
15209 /* Determine if a given CONST RTX is a valid memory displacement
15210 in PIC mode. */
15211
15212 bool
15213 legitimate_pic_address_disp_p (rtx disp)
15214 {
15215 bool saw_plus;
15216
15217 /* In 64bit mode we can allow direct addresses of symbols and labels
15218 when they are not dynamic symbols. */
15219 if (TARGET_64BIT)
15220 {
15221 rtx op0 = disp, op1;
15222
15223 switch (GET_CODE (disp))
15224 {
15225 case LABEL_REF:
15226 return true;
15227
15228 case CONST:
15229 if (GET_CODE (XEXP (disp, 0)) != PLUS)
15230 break;
15231 op0 = XEXP (XEXP (disp, 0), 0);
15232 op1 = XEXP (XEXP (disp, 0), 1);
15233 if (!CONST_INT_P (op1))
15234 break;
15235 if (GET_CODE (op0) == UNSPEC
15236 && (XINT (op0, 1) == UNSPEC_DTPOFF
15237 || XINT (op0, 1) == UNSPEC_NTPOFF)
15238 && trunc_int_for_mode (INTVAL (op1), SImode) == INTVAL (op1))
15239 return true;
15240 if (INTVAL (op1) >= 16*1024*1024
15241 || INTVAL (op1) < -16*1024*1024)
15242 break;
15243 if (GET_CODE (op0) == LABEL_REF)
15244 return true;
15245 if (GET_CODE (op0) == CONST
15246 && GET_CODE (XEXP (op0, 0)) == UNSPEC
15247 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
15248 return true;
15249 if (GET_CODE (op0) == UNSPEC
15250 && XINT (op0, 1) == UNSPEC_PCREL)
15251 return true;
15252 if (GET_CODE (op0) != SYMBOL_REF)
15253 break;
15254 /* FALLTHRU */
15255
15256 case SYMBOL_REF:
15257 /* TLS references should always be enclosed in UNSPEC.
15258 The dllimported symbol needs always to be resolved. */
15259 if (SYMBOL_REF_TLS_MODEL (op0)
15260 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
15261 return false;
15262
15263 if (TARGET_PECOFF)
15264 {
15265 if (is_imported_p (op0))
15266 return true;
15267
15268 if (SYMBOL_REF_FAR_ADDR_P (op0)
15269 || !SYMBOL_REF_LOCAL_P (op0))
15270 break;
15271
15272 /* Function-symbols need to be resolved only for
15273 large-model.
15274 For the small-model we don't need to resolve anything
15275 here. */
15276 if ((ix86_cmodel != CM_LARGE_PIC
15277 && SYMBOL_REF_FUNCTION_P (op0))
15278 || ix86_cmodel == CM_SMALL_PIC)
15279 return true;
15280 /* Non-external symbols don't need to be resolved for
15281 large, and medium-model. */
15282 if ((ix86_cmodel == CM_LARGE_PIC
15283 || ix86_cmodel == CM_MEDIUM_PIC)
15284 && !SYMBOL_REF_EXTERNAL_P (op0))
15285 return true;
15286 }
15287 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
15288 && (SYMBOL_REF_LOCAL_P (op0)
15289 || (HAVE_LD_PIE_COPYRELOC
15290 && flag_pie
15291 && !SYMBOL_REF_WEAK (op0)
15292 && !SYMBOL_REF_FUNCTION_P (op0)))
15293 && ix86_cmodel != CM_LARGE_PIC)
15294 return true;
15295 break;
15296
15297 default:
15298 break;
15299 }
15300 }
15301 if (GET_CODE (disp) != CONST)
15302 return false;
15303 disp = XEXP (disp, 0);
15304
15305 if (TARGET_64BIT)
15306 {
15307 /* We are unsafe to allow PLUS expressions. This limit allowed distance
15308 of GOT tables. We should not need these anyway. */
15309 if (GET_CODE (disp) != UNSPEC
15310 || (XINT (disp, 1) != UNSPEC_GOTPCREL
15311 && XINT (disp, 1) != UNSPEC_GOTOFF
15312 && XINT (disp, 1) != UNSPEC_PCREL
15313 && XINT (disp, 1) != UNSPEC_PLTOFF))
15314 return false;
15315
15316 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
15317 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
15318 return false;
15319 return true;
15320 }
15321
15322 saw_plus = false;
15323 if (GET_CODE (disp) == PLUS)
15324 {
15325 if (!CONST_INT_P (XEXP (disp, 1)))
15326 return false;
15327 disp = XEXP (disp, 0);
15328 saw_plus = true;
15329 }
15330
15331 if (TARGET_MACHO && darwin_local_data_pic (disp))
15332 return true;
15333
15334 if (GET_CODE (disp) != UNSPEC)
15335 return false;
15336
15337 switch (XINT (disp, 1))
15338 {
15339 case UNSPEC_GOT:
15340 if (saw_plus)
15341 return false;
15342 /* We need to check for both symbols and labels because VxWorks loads
15343 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
15344 details. */
15345 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
15346 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
15347 case UNSPEC_GOTOFF:
15348 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
15349 While ABI specify also 32bit relocation but we don't produce it in
15350 small PIC model at all. */
15351 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
15352 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
15353 && !TARGET_64BIT)
15354 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
15355 return false;
15356 case UNSPEC_GOTTPOFF:
15357 case UNSPEC_GOTNTPOFF:
15358 case UNSPEC_INDNTPOFF:
15359 if (saw_plus)
15360 return false;
15361 disp = XVECEXP (disp, 0, 0);
15362 return (GET_CODE (disp) == SYMBOL_REF
15363 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
15364 case UNSPEC_NTPOFF:
15365 disp = XVECEXP (disp, 0, 0);
15366 return (GET_CODE (disp) == SYMBOL_REF
15367 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
15368 case UNSPEC_DTPOFF:
15369 disp = XVECEXP (disp, 0, 0);
15370 return (GET_CODE (disp) == SYMBOL_REF
15371 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
15372 }
15373
15374 return false;
15375 }
15376
15377 /* Determine if op is suitable RTX for an address register.
15378 Return naked register if a register or a register subreg is
15379 found, otherwise return NULL_RTX. */
15380
15381 static rtx
15382 ix86_validate_address_register (rtx op)
15383 {
15384 machine_mode mode = GET_MODE (op);
15385
15386 /* Only SImode or DImode registers can form the address. */
15387 if (mode != SImode && mode != DImode)
15388 return NULL_RTX;
15389
15390 if (REG_P (op))
15391 return op;
15392 else if (SUBREG_P (op))
15393 {
15394 rtx reg = SUBREG_REG (op);
15395
15396 if (!REG_P (reg))
15397 return NULL_RTX;
15398
15399 mode = GET_MODE (reg);
15400
15401 /* Don't allow SUBREGs that span more than a word. It can
15402 lead to spill failures when the register is one word out
15403 of a two word structure. */
15404 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
15405 return NULL_RTX;
15406
15407 /* Allow only SUBREGs of non-eliminable hard registers. */
15408 if (register_no_elim_operand (reg, mode))
15409 return reg;
15410 }
15411
15412 /* Op is not a register. */
15413 return NULL_RTX;
15414 }
15415
15416 /* Recognizes RTL expressions that are valid memory addresses for an
15417 instruction. The MODE argument is the machine mode for the MEM
15418 expression that wants to use this address.
15419
15420 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
15421 convert common non-canonical forms to canonical form so that they will
15422 be recognized. */
15423
15424 static bool
15425 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
15426 {
15427 struct ix86_address parts;
15428 rtx base, index, disp;
15429 HOST_WIDE_INT scale;
15430 addr_space_t seg;
15431
15432 if (ix86_decompose_address (addr, &parts) <= 0)
15433 /* Decomposition failed. */
15434 return false;
15435
15436 base = parts.base;
15437 index = parts.index;
15438 disp = parts.disp;
15439 scale = parts.scale;
15440 seg = parts.seg;
15441
15442 /* Validate base register. */
15443 if (base)
15444 {
15445 rtx reg = ix86_validate_address_register (base);
15446
15447 if (reg == NULL_RTX)
15448 return false;
15449
15450 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
15451 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
15452 /* Base is not valid. */
15453 return false;
15454 }
15455
15456 /* Validate index register. */
15457 if (index)
15458 {
15459 rtx reg = ix86_validate_address_register (index);
15460
15461 if (reg == NULL_RTX)
15462 return false;
15463
15464 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
15465 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
15466 /* Index is not valid. */
15467 return false;
15468 }
15469
15470 /* Index and base should have the same mode. */
15471 if (base && index
15472 && GET_MODE (base) != GET_MODE (index))
15473 return false;
15474
15475 /* Address override works only on the (%reg) part of %fs:(%reg). */
15476 if (seg != ADDR_SPACE_GENERIC
15477 && ((base && GET_MODE (base) != word_mode)
15478 || (index && GET_MODE (index) != word_mode)))
15479 return false;
15480
15481 /* Validate scale factor. */
15482 if (scale != 1)
15483 {
15484 if (!index)
15485 /* Scale without index. */
15486 return false;
15487
15488 if (scale != 2 && scale != 4 && scale != 8)
15489 /* Scale is not a valid multiplier. */
15490 return false;
15491 }
15492
15493 /* Validate displacement. */
15494 if (disp)
15495 {
15496 if (GET_CODE (disp) == CONST
15497 && GET_CODE (XEXP (disp, 0)) == UNSPEC
15498 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
15499 switch (XINT (XEXP (disp, 0), 1))
15500 {
15501 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit
15502 when used. While ABI specify also 32bit relocations, we
15503 don't produce them at all and use IP relative instead.
15504 Allow GOT in 32bit mode for both PIC and non-PIC if symbol
15505 should be loaded via GOT. */
15506 case UNSPEC_GOT:
15507 if (!TARGET_64BIT
15508 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
15509 goto is_legitimate_pic;
15510 /* FALLTHRU */
15511 case UNSPEC_GOTOFF:
15512 gcc_assert (flag_pic);
15513 if (!TARGET_64BIT)
15514 goto is_legitimate_pic;
15515
15516 /* 64bit address unspec. */
15517 return false;
15518
15519 case UNSPEC_GOTPCREL:
15520 if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
15521 goto is_legitimate_pic;
15522 /* FALLTHRU */
15523 case UNSPEC_PCREL:
15524 gcc_assert (flag_pic);
15525 goto is_legitimate_pic;
15526
15527 case UNSPEC_GOTTPOFF:
15528 case UNSPEC_GOTNTPOFF:
15529 case UNSPEC_INDNTPOFF:
15530 case UNSPEC_NTPOFF:
15531 case UNSPEC_DTPOFF:
15532 break;
15533
15534 default:
15535 /* Invalid address unspec. */
15536 return false;
15537 }
15538
15539 else if (SYMBOLIC_CONST (disp)
15540 && (flag_pic
15541 || (TARGET_MACHO
15542 #if TARGET_MACHO
15543 && MACHOPIC_INDIRECT
15544 && !machopic_operand_p (disp)
15545 #endif
15546 )))
15547 {
15548
15549 is_legitimate_pic:
15550 if (TARGET_64BIT && (index || base))
15551 {
15552 /* foo@dtpoff(%rX) is ok. */
15553 if (GET_CODE (disp) != CONST
15554 || GET_CODE (XEXP (disp, 0)) != PLUS
15555 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
15556 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
15557 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
15558 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
15559 /* Non-constant pic memory reference. */
15560 return false;
15561 }
15562 else if ((!TARGET_MACHO || flag_pic)
15563 && ! legitimate_pic_address_disp_p (disp))
15564 /* Displacement is an invalid pic construct. */
15565 return false;
15566 #if TARGET_MACHO
15567 else if (MACHO_DYNAMIC_NO_PIC_P
15568 && !ix86_legitimate_constant_p (Pmode, disp))
15569 /* displacment must be referenced via non_lazy_pointer */
15570 return false;
15571 #endif
15572
15573 /* This code used to verify that a symbolic pic displacement
15574 includes the pic_offset_table_rtx register.
15575
15576 While this is good idea, unfortunately these constructs may
15577 be created by "adds using lea" optimization for incorrect
15578 code like:
15579
15580 int a;
15581 int foo(int i)
15582 {
15583 return *(&a+i);
15584 }
15585
15586 This code is nonsensical, but results in addressing
15587 GOT table with pic_offset_table_rtx base. We can't
15588 just refuse it easily, since it gets matched by
15589 "addsi3" pattern, that later gets split to lea in the
15590 case output register differs from input. While this
15591 can be handled by separate addsi pattern for this case
15592 that never results in lea, this seems to be easier and
15593 correct fix for crash to disable this test. */
15594 }
15595 else if (GET_CODE (disp) != LABEL_REF
15596 && !CONST_INT_P (disp)
15597 && (GET_CODE (disp) != CONST
15598 || !ix86_legitimate_constant_p (Pmode, disp))
15599 && (GET_CODE (disp) != SYMBOL_REF
15600 || !ix86_legitimate_constant_p (Pmode, disp)))
15601 /* Displacement is not constant. */
15602 return false;
15603 else if (TARGET_64BIT
15604 && !x86_64_immediate_operand (disp, VOIDmode))
15605 /* Displacement is out of range. */
15606 return false;
15607 /* In x32 mode, constant addresses are sign extended to 64bit, so
15608 we have to prevent addresses from 0x80000000 to 0xffffffff. */
15609 else if (TARGET_X32 && !(index || base)
15610 && CONST_INT_P (disp)
15611 && val_signbit_known_set_p (SImode, INTVAL (disp)))
15612 return false;
15613 }
15614
15615 /* Everything looks valid. */
15616 return true;
15617 }
15618
15619 /* Determine if a given RTX is a valid constant address. */
15620
15621 bool
15622 constant_address_p (rtx x)
15623 {
15624 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
15625 }
15626 \f
15627 /* Return a unique alias set for the GOT. */
15628
15629 static alias_set_type
15630 ix86_GOT_alias_set (void)
15631 {
15632 static alias_set_type set = -1;
15633 if (set == -1)
15634 set = new_alias_set ();
15635 return set;
15636 }
15637
15638 /* Return a legitimate reference for ORIG (an address) using the
15639 register REG. If REG is 0, a new pseudo is generated.
15640
15641 There are two types of references that must be handled:
15642
15643 1. Global data references must load the address from the GOT, via
15644 the PIC reg. An insn is emitted to do this load, and the reg is
15645 returned.
15646
15647 2. Static data references, constant pool addresses, and code labels
15648 compute the address as an offset from the GOT, whose base is in
15649 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
15650 differentiate them from global data objects. The returned
15651 address is the PIC reg + an unspec constant.
15652
15653 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
15654 reg also appears in the address. */
15655
15656 static rtx
15657 legitimize_pic_address (rtx orig, rtx reg)
15658 {
15659 rtx addr = orig;
15660 rtx new_rtx = orig;
15661
15662 #if TARGET_MACHO
15663 if (TARGET_MACHO && !TARGET_64BIT)
15664 {
15665 if (reg == 0)
15666 reg = gen_reg_rtx (Pmode);
15667 /* Use the generic Mach-O PIC machinery. */
15668 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
15669 }
15670 #endif
15671
15672 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
15673 {
15674 rtx tmp = legitimize_pe_coff_symbol (addr, true);
15675 if (tmp)
15676 return tmp;
15677 }
15678
15679 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
15680 new_rtx = addr;
15681 else if ((!TARGET_64BIT
15682 || /* TARGET_64BIT && */ ix86_cmodel != CM_SMALL_PIC)
15683 && !TARGET_PECOFF
15684 && gotoff_operand (addr, Pmode))
15685 {
15686 /* This symbol may be referenced via a displacement
15687 from the PIC base address (@GOTOFF). */
15688 if (GET_CODE (addr) == CONST)
15689 addr = XEXP (addr, 0);
15690
15691 if (GET_CODE (addr) == PLUS)
15692 {
15693 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
15694 UNSPEC_GOTOFF);
15695 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
15696 }
15697 else
15698 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
15699
15700 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15701
15702 if (TARGET_64BIT)
15703 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
15704
15705 if (reg != 0)
15706 {
15707 gcc_assert (REG_P (reg));
15708 new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
15709 new_rtx, reg, 1, OPTAB_DIRECT);
15710 }
15711 else
15712 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
15713 }
15714 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
15715 /* We can't use @GOTOFF for text labels
15716 on VxWorks, see gotoff_operand. */
15717 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
15718 {
15719 rtx tmp = legitimize_pe_coff_symbol (addr, true);
15720 if (tmp)
15721 return tmp;
15722
15723 /* For x64 PE-COFF there is no GOT table,
15724 so we use address directly. */
15725 if (TARGET_64BIT && TARGET_PECOFF)
15726 {
15727 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
15728 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15729 }
15730 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
15731 {
15732 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
15733 UNSPEC_GOTPCREL);
15734 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15735 new_rtx = gen_const_mem (Pmode, new_rtx);
15736 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
15737 }
15738 else
15739 {
15740 /* This symbol must be referenced via a load
15741 from the Global Offset Table (@GOT). */
15742 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
15743 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15744 if (TARGET_64BIT)
15745 new_rtx = force_reg (Pmode, new_rtx);
15746 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
15747 new_rtx = gen_const_mem (Pmode, new_rtx);
15748 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
15749 }
15750
15751 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
15752 }
15753 else
15754 {
15755 if (CONST_INT_P (addr)
15756 && !x86_64_immediate_operand (addr, VOIDmode))
15757 new_rtx = copy_to_suggested_reg (addr, reg, Pmode);
15758 else if (GET_CODE (addr) == CONST)
15759 {
15760 addr = XEXP (addr, 0);
15761
15762 /* We must match stuff we generate before. Assume the only
15763 unspecs that can get here are ours. Not that we could do
15764 anything with them anyway.... */
15765 if (GET_CODE (addr) == UNSPEC
15766 || (GET_CODE (addr) == PLUS
15767 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
15768 return orig;
15769 gcc_assert (GET_CODE (addr) == PLUS);
15770 }
15771
15772 if (GET_CODE (addr) == PLUS)
15773 {
15774 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
15775
15776 /* Check first to see if this is a constant
15777 offset from a @GOTOFF symbol reference. */
15778 if (!TARGET_PECOFF
15779 && gotoff_operand (op0, Pmode)
15780 && CONST_INT_P (op1))
15781 {
15782 if (!TARGET_64BIT)
15783 {
15784 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
15785 UNSPEC_GOTOFF);
15786 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
15787 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15788
15789 if (reg != 0)
15790 {
15791 gcc_assert (REG_P (reg));
15792 new_rtx = expand_simple_binop (Pmode, PLUS,
15793 pic_offset_table_rtx,
15794 new_rtx, reg, 1,
15795 OPTAB_DIRECT);
15796 }
15797 else
15798 new_rtx
15799 = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
15800 }
15801 else
15802 {
15803 if (INTVAL (op1) < -16*1024*1024
15804 || INTVAL (op1) >= 16*1024*1024)
15805 {
15806 if (!x86_64_immediate_operand (op1, Pmode))
15807 op1 = force_reg (Pmode, op1);
15808
15809 new_rtx
15810 = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
15811 }
15812 }
15813 }
15814 else
15815 {
15816 rtx base = legitimize_pic_address (op0, reg);
15817 machine_mode mode = GET_MODE (base);
15818 new_rtx
15819 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
15820
15821 if (CONST_INT_P (new_rtx))
15822 {
15823 if (INTVAL (new_rtx) < -16*1024*1024
15824 || INTVAL (new_rtx) >= 16*1024*1024)
15825 {
15826 if (!x86_64_immediate_operand (new_rtx, mode))
15827 new_rtx = force_reg (mode, new_rtx);
15828
15829 new_rtx
15830 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
15831 }
15832 else
15833 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
15834 }
15835 else
15836 {
15837 /* For %rip addressing, we have to use
15838 just disp32, not base nor index. */
15839 if (TARGET_64BIT
15840 && (GET_CODE (base) == SYMBOL_REF
15841 || GET_CODE (base) == LABEL_REF))
15842 base = force_reg (mode, base);
15843 if (GET_CODE (new_rtx) == PLUS
15844 && CONSTANT_P (XEXP (new_rtx, 1)))
15845 {
15846 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
15847 new_rtx = XEXP (new_rtx, 1);
15848 }
15849 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
15850 }
15851 }
15852 }
15853 }
15854 return new_rtx;
15855 }
15856 \f
15857 /* Load the thread pointer. If TO_REG is true, force it into a register. */
15858
15859 static rtx
15860 get_thread_pointer (machine_mode tp_mode, bool to_reg)
15861 {
15862 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
15863
15864 if (GET_MODE (tp) != tp_mode)
15865 {
15866 gcc_assert (GET_MODE (tp) == SImode);
15867 gcc_assert (tp_mode == DImode);
15868
15869 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
15870 }
15871
15872 if (to_reg)
15873 tp = copy_to_mode_reg (tp_mode, tp);
15874
15875 return tp;
15876 }
15877
15878 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15879
15880 static GTY(()) rtx ix86_tls_symbol;
15881
15882 static rtx
15883 ix86_tls_get_addr (void)
15884 {
15885 if (!ix86_tls_symbol)
15886 {
15887 const char *sym
15888 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
15889 ? "___tls_get_addr" : "__tls_get_addr");
15890
15891 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
15892 }
15893
15894 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
15895 {
15896 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
15897 UNSPEC_PLTOFF);
15898 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
15899 gen_rtx_CONST (Pmode, unspec));
15900 }
15901
15902 return ix86_tls_symbol;
15903 }
15904
15905 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15906
15907 static GTY(()) rtx ix86_tls_module_base_symbol;
15908
15909 rtx
15910 ix86_tls_module_base (void)
15911 {
15912 if (!ix86_tls_module_base_symbol)
15913 {
15914 ix86_tls_module_base_symbol
15915 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
15916
15917 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
15918 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
15919 }
15920
15921 return ix86_tls_module_base_symbol;
15922 }
15923
15924 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
15925 false if we expect this to be used for a memory address and true if
15926 we expect to load the address into a register. */
15927
15928 static rtx
15929 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
15930 {
15931 rtx dest, base, off;
15932 rtx pic = NULL_RTX, tp = NULL_RTX;
15933 machine_mode tp_mode = Pmode;
15934 int type;
15935
15936 /* Fall back to global dynamic model if tool chain cannot support local
15937 dynamic. */
15938 if (TARGET_SUN_TLS && !TARGET_64BIT
15939 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
15940 && model == TLS_MODEL_LOCAL_DYNAMIC)
15941 model = TLS_MODEL_GLOBAL_DYNAMIC;
15942
15943 switch (model)
15944 {
15945 case TLS_MODEL_GLOBAL_DYNAMIC:
15946 dest = gen_reg_rtx (Pmode);
15947
15948 if (!TARGET_64BIT)
15949 {
15950 if (flag_pic && !TARGET_PECOFF)
15951 pic = pic_offset_table_rtx;
15952 else
15953 {
15954 pic = gen_reg_rtx (Pmode);
15955 emit_insn (gen_set_got (pic));
15956 }
15957 }
15958
15959 if (TARGET_GNU2_TLS)
15960 {
15961 if (TARGET_64BIT)
15962 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
15963 else
15964 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
15965
15966 tp = get_thread_pointer (Pmode, true);
15967 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
15968
15969 if (GET_MODE (x) != Pmode)
15970 x = gen_rtx_ZERO_EXTEND (Pmode, x);
15971
15972 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
15973 }
15974 else
15975 {
15976 rtx caddr = ix86_tls_get_addr ();
15977
15978 if (TARGET_64BIT)
15979 {
15980 rtx rax = gen_rtx_REG (Pmode, AX_REG);
15981 rtx_insn *insns;
15982
15983 start_sequence ();
15984 emit_call_insn
15985 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
15986 insns = get_insns ();
15987 end_sequence ();
15988
15989 if (GET_MODE (x) != Pmode)
15990 x = gen_rtx_ZERO_EXTEND (Pmode, x);
15991
15992 RTL_CONST_CALL_P (insns) = 1;
15993 emit_libcall_block (insns, dest, rax, x);
15994 }
15995 else
15996 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
15997 }
15998 break;
15999
16000 case TLS_MODEL_LOCAL_DYNAMIC:
16001 base = gen_reg_rtx (Pmode);
16002
16003 if (!TARGET_64BIT)
16004 {
16005 if (flag_pic)
16006 pic = pic_offset_table_rtx;
16007 else
16008 {
16009 pic = gen_reg_rtx (Pmode);
16010 emit_insn (gen_set_got (pic));
16011 }
16012 }
16013
16014 if (TARGET_GNU2_TLS)
16015 {
16016 rtx tmp = ix86_tls_module_base ();
16017
16018 if (TARGET_64BIT)
16019 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
16020 else
16021 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
16022
16023 tp = get_thread_pointer (Pmode, true);
16024 set_unique_reg_note (get_last_insn (), REG_EQUAL,
16025 gen_rtx_MINUS (Pmode, tmp, tp));
16026 }
16027 else
16028 {
16029 rtx caddr = ix86_tls_get_addr ();
16030
16031 if (TARGET_64BIT)
16032 {
16033 rtx rax = gen_rtx_REG (Pmode, AX_REG);
16034 rtx_insn *insns;
16035 rtx eqv;
16036
16037 start_sequence ();
16038 emit_call_insn
16039 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
16040 insns = get_insns ();
16041 end_sequence ();
16042
16043 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
16044 share the LD_BASE result with other LD model accesses. */
16045 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
16046 UNSPEC_TLS_LD_BASE);
16047
16048 RTL_CONST_CALL_P (insns) = 1;
16049 emit_libcall_block (insns, base, rax, eqv);
16050 }
16051 else
16052 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
16053 }
16054
16055 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
16056 off = gen_rtx_CONST (Pmode, off);
16057
16058 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
16059
16060 if (TARGET_GNU2_TLS)
16061 {
16062 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
16063
16064 if (GET_MODE (x) != Pmode)
16065 x = gen_rtx_ZERO_EXTEND (Pmode, x);
16066
16067 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
16068 }
16069 break;
16070
16071 case TLS_MODEL_INITIAL_EXEC:
16072 if (TARGET_64BIT)
16073 {
16074 if (TARGET_SUN_TLS && !TARGET_X32)
16075 {
16076 /* The Sun linker took the AMD64 TLS spec literally
16077 and can only handle %rax as destination of the
16078 initial executable code sequence. */
16079
16080 dest = gen_reg_rtx (DImode);
16081 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
16082 return dest;
16083 }
16084
16085 /* Generate DImode references to avoid %fs:(%reg32)
16086 problems and linker IE->LE relaxation bug. */
16087 tp_mode = DImode;
16088 pic = NULL;
16089 type = UNSPEC_GOTNTPOFF;
16090 }
16091 else if (flag_pic)
16092 {
16093 pic = pic_offset_table_rtx;
16094 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
16095 }
16096 else if (!TARGET_ANY_GNU_TLS)
16097 {
16098 pic = gen_reg_rtx (Pmode);
16099 emit_insn (gen_set_got (pic));
16100 type = UNSPEC_GOTTPOFF;
16101 }
16102 else
16103 {
16104 pic = NULL;
16105 type = UNSPEC_INDNTPOFF;
16106 }
16107
16108 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
16109 off = gen_rtx_CONST (tp_mode, off);
16110 if (pic)
16111 off = gen_rtx_PLUS (tp_mode, pic, off);
16112 off = gen_const_mem (tp_mode, off);
16113 set_mem_alias_set (off, ix86_GOT_alias_set ());
16114
16115 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
16116 {
16117 base = get_thread_pointer (tp_mode,
16118 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
16119 off = force_reg (tp_mode, off);
16120 dest = gen_rtx_PLUS (tp_mode, base, off);
16121 if (tp_mode != Pmode)
16122 dest = convert_to_mode (Pmode, dest, 1);
16123 }
16124 else
16125 {
16126 base = get_thread_pointer (Pmode, true);
16127 dest = gen_reg_rtx (Pmode);
16128 emit_insn (ix86_gen_sub3 (dest, base, off));
16129 }
16130 break;
16131
16132 case TLS_MODEL_LOCAL_EXEC:
16133 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
16134 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
16135 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
16136 off = gen_rtx_CONST (Pmode, off);
16137
16138 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
16139 {
16140 base = get_thread_pointer (Pmode,
16141 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
16142 return gen_rtx_PLUS (Pmode, base, off);
16143 }
16144 else
16145 {
16146 base = get_thread_pointer (Pmode, true);
16147 dest = gen_reg_rtx (Pmode);
16148 emit_insn (ix86_gen_sub3 (dest, base, off));
16149 }
16150 break;
16151
16152 default:
16153 gcc_unreachable ();
16154 }
16155
16156 return dest;
16157 }
16158
16159 /* Return true if OP refers to a TLS address. */
16160 bool
16161 ix86_tls_address_pattern_p (rtx op)
16162 {
16163 subrtx_var_iterator::array_type array;
16164 FOR_EACH_SUBRTX_VAR (iter, array, op, ALL)
16165 {
16166 rtx op = *iter;
16167 if (MEM_P (op))
16168 {
16169 rtx *x = &XEXP (op, 0);
16170 while (GET_CODE (*x) == PLUS)
16171 {
16172 int i;
16173 for (i = 0; i < 2; i++)
16174 {
16175 rtx u = XEXP (*x, i);
16176 if (GET_CODE (u) == ZERO_EXTEND)
16177 u = XEXP (u, 0);
16178 if (GET_CODE (u) == UNSPEC
16179 && XINT (u, 1) == UNSPEC_TP)
16180 return true;
16181 }
16182 x = &XEXP (*x, 0);
16183 }
16184
16185 iter.skip_subrtxes ();
16186 }
16187 }
16188
16189 return false;
16190 }
16191
16192 /* Rewrite *LOC so that it refers to a default TLS address space. */
16193 void
16194 ix86_rewrite_tls_address_1 (rtx *loc)
16195 {
16196 subrtx_ptr_iterator::array_type array;
16197 FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL)
16198 {
16199 rtx *loc = *iter;
16200 if (MEM_P (*loc))
16201 {
16202 rtx addr = XEXP (*loc, 0);
16203 rtx *x = &addr;
16204 while (GET_CODE (*x) == PLUS)
16205 {
16206 int i;
16207 for (i = 0; i < 2; i++)
16208 {
16209 rtx u = XEXP (*x, i);
16210 if (GET_CODE (u) == ZERO_EXTEND)
16211 u = XEXP (u, 0);
16212 if (GET_CODE (u) == UNSPEC
16213 && XINT (u, 1) == UNSPEC_TP)
16214 {
16215 addr_space_t as = DEFAULT_TLS_SEG_REG;
16216
16217 *x = XEXP (*x, 1 - i);
16218
16219 *loc = replace_equiv_address_nv (*loc, addr, true);
16220 set_mem_addr_space (*loc, as);
16221 return;
16222 }
16223 }
16224 x = &XEXP (*x, 0);
16225 }
16226
16227 iter.skip_subrtxes ();
16228 }
16229 }
16230 }
16231
16232 /* Rewrite instruction pattern involvning TLS address
16233 so that it refers to a default TLS address space. */
16234 rtx
16235 ix86_rewrite_tls_address (rtx pattern)
16236 {
16237 pattern = copy_insn (pattern);
16238 ix86_rewrite_tls_address_1 (&pattern);
16239 return pattern;
16240 }
16241
16242 /* Create or return the unique __imp_DECL dllimport symbol corresponding
16243 to symbol DECL if BEIMPORT is true. Otherwise create or return the
16244 unique refptr-DECL symbol corresponding to symbol DECL. */
16245
16246 struct dllimport_hasher : ggc_cache_ptr_hash<tree_map>
16247 {
16248 static inline hashval_t hash (tree_map *m) { return m->hash; }
16249 static inline bool
16250 equal (tree_map *a, tree_map *b)
16251 {
16252 return a->base.from == b->base.from;
16253 }
16254
16255 static int
16256 keep_cache_entry (tree_map *&m)
16257 {
16258 return ggc_marked_p (m->base.from);
16259 }
16260 };
16261
16262 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
16263
16264 static tree
16265 get_dllimport_decl (tree decl, bool beimport)
16266 {
16267 struct tree_map *h, in;
16268 const char *name;
16269 const char *prefix;
16270 size_t namelen, prefixlen;
16271 char *imp_name;
16272 tree to;
16273 rtx rtl;
16274
16275 if (!dllimport_map)
16276 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
16277
16278 in.hash = htab_hash_pointer (decl);
16279 in.base.from = decl;
16280 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
16281 h = *loc;
16282 if (h)
16283 return h->to;
16284
16285 *loc = h = ggc_alloc<tree_map> ();
16286 h->hash = in.hash;
16287 h->base.from = decl;
16288 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
16289 VAR_DECL, NULL, ptr_type_node);
16290 DECL_ARTIFICIAL (to) = 1;
16291 DECL_IGNORED_P (to) = 1;
16292 DECL_EXTERNAL (to) = 1;
16293 TREE_READONLY (to) = 1;
16294
16295 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
16296 name = targetm.strip_name_encoding (name);
16297 if (beimport)
16298 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
16299 ? "*__imp_" : "*__imp__";
16300 else
16301 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
16302 namelen = strlen (name);
16303 prefixlen = strlen (prefix);
16304 imp_name = (char *) alloca (namelen + prefixlen + 1);
16305 memcpy (imp_name, prefix, prefixlen);
16306 memcpy (imp_name + prefixlen, name, namelen + 1);
16307
16308 name = ggc_alloc_string (imp_name, namelen + prefixlen);
16309 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
16310 SET_SYMBOL_REF_DECL (rtl, to);
16311 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
16312 if (!beimport)
16313 {
16314 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
16315 #ifdef SUB_TARGET_RECORD_STUB
16316 SUB_TARGET_RECORD_STUB (name);
16317 #endif
16318 }
16319
16320 rtl = gen_const_mem (Pmode, rtl);
16321 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
16322
16323 SET_DECL_RTL (to, rtl);
16324 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
16325
16326 return to;
16327 }
16328
16329 /* Expand SYMBOL into its corresponding far-address symbol.
16330 WANT_REG is true if we require the result be a register. */
16331
16332 static rtx
16333 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
16334 {
16335 tree imp_decl;
16336 rtx x;
16337
16338 gcc_assert (SYMBOL_REF_DECL (symbol));
16339 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
16340
16341 x = DECL_RTL (imp_decl);
16342 if (want_reg)
16343 x = force_reg (Pmode, x);
16344 return x;
16345 }
16346
16347 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
16348 true if we require the result be a register. */
16349
16350 static rtx
16351 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
16352 {
16353 tree imp_decl;
16354 rtx x;
16355
16356 gcc_assert (SYMBOL_REF_DECL (symbol));
16357 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
16358
16359 x = DECL_RTL (imp_decl);
16360 if (want_reg)
16361 x = force_reg (Pmode, x);
16362 return x;
16363 }
16364
16365 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
16366 is true if we require the result be a register. */
16367
16368 static rtx
16369 legitimize_pe_coff_symbol (rtx addr, bool inreg)
16370 {
16371 if (!TARGET_PECOFF)
16372 return NULL_RTX;
16373
16374 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
16375 {
16376 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
16377 return legitimize_dllimport_symbol (addr, inreg);
16378 if (GET_CODE (addr) == CONST
16379 && GET_CODE (XEXP (addr, 0)) == PLUS
16380 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
16381 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
16382 {
16383 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
16384 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
16385 }
16386 }
16387
16388 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
16389 return NULL_RTX;
16390 if (GET_CODE (addr) == SYMBOL_REF
16391 && !is_imported_p (addr)
16392 && SYMBOL_REF_EXTERNAL_P (addr)
16393 && SYMBOL_REF_DECL (addr))
16394 return legitimize_pe_coff_extern_decl (addr, inreg);
16395
16396 if (GET_CODE (addr) == CONST
16397 && GET_CODE (XEXP (addr, 0)) == PLUS
16398 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
16399 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
16400 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
16401 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
16402 {
16403 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
16404 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
16405 }
16406 return NULL_RTX;
16407 }
16408
16409 /* Try machine-dependent ways of modifying an illegitimate address
16410 to be legitimate. If we find one, return the new, valid address.
16411 This macro is used in only one place: `memory_address' in explow.c.
16412
16413 OLDX is the address as it was before break_out_memory_refs was called.
16414 In some cases it is useful to look at this to decide what needs to be done.
16415
16416 It is always safe for this macro to do nothing. It exists to recognize
16417 opportunities to optimize the output.
16418
16419 For the 80386, we handle X+REG by loading X into a register R and
16420 using R+REG. R will go in a general reg and indexing will be used.
16421 However, if REG is a broken-out memory address or multiplication,
16422 nothing needs to be done because REG can certainly go in a general reg.
16423
16424 When -fpic is used, special handling is needed for symbolic references.
16425 See comments by legitimize_pic_address in i386.c for details. */
16426
16427 static rtx
16428 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
16429 {
16430 bool changed = false;
16431 unsigned log;
16432
16433 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
16434 if (log)
16435 return legitimize_tls_address (x, (enum tls_model) log, false);
16436 if (GET_CODE (x) == CONST
16437 && GET_CODE (XEXP (x, 0)) == PLUS
16438 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
16439 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
16440 {
16441 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
16442 (enum tls_model) log, false);
16443 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
16444 }
16445
16446 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
16447 {
16448 rtx tmp = legitimize_pe_coff_symbol (x, true);
16449 if (tmp)
16450 return tmp;
16451 }
16452
16453 if (flag_pic && SYMBOLIC_CONST (x))
16454 return legitimize_pic_address (x, 0);
16455
16456 #if TARGET_MACHO
16457 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
16458 return machopic_indirect_data_reference (x, 0);
16459 #endif
16460
16461 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
16462 if (GET_CODE (x) == ASHIFT
16463 && CONST_INT_P (XEXP (x, 1))
16464 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
16465 {
16466 changed = true;
16467 log = INTVAL (XEXP (x, 1));
16468 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
16469 GEN_INT (1 << log));
16470 }
16471
16472 if (GET_CODE (x) == PLUS)
16473 {
16474 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
16475
16476 if (GET_CODE (XEXP (x, 0)) == ASHIFT
16477 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
16478 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
16479 {
16480 changed = true;
16481 log = INTVAL (XEXP (XEXP (x, 0), 1));
16482 XEXP (x, 0) = gen_rtx_MULT (Pmode,
16483 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
16484 GEN_INT (1 << log));
16485 }
16486
16487 if (GET_CODE (XEXP (x, 1)) == ASHIFT
16488 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
16489 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
16490 {
16491 changed = true;
16492 log = INTVAL (XEXP (XEXP (x, 1), 1));
16493 XEXP (x, 1) = gen_rtx_MULT (Pmode,
16494 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
16495 GEN_INT (1 << log));
16496 }
16497
16498 /* Put multiply first if it isn't already. */
16499 if (GET_CODE (XEXP (x, 1)) == MULT)
16500 {
16501 std::swap (XEXP (x, 0), XEXP (x, 1));
16502 changed = true;
16503 }
16504
16505 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
16506 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
16507 created by virtual register instantiation, register elimination, and
16508 similar optimizations. */
16509 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
16510 {
16511 changed = true;
16512 x = gen_rtx_PLUS (Pmode,
16513 gen_rtx_PLUS (Pmode, XEXP (x, 0),
16514 XEXP (XEXP (x, 1), 0)),
16515 XEXP (XEXP (x, 1), 1));
16516 }
16517
16518 /* Canonicalize
16519 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
16520 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
16521 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
16522 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
16523 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
16524 && CONSTANT_P (XEXP (x, 1)))
16525 {
16526 rtx constant;
16527 rtx other = NULL_RTX;
16528
16529 if (CONST_INT_P (XEXP (x, 1)))
16530 {
16531 constant = XEXP (x, 1);
16532 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
16533 }
16534 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
16535 {
16536 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
16537 other = XEXP (x, 1);
16538 }
16539 else
16540 constant = 0;
16541
16542 if (constant)
16543 {
16544 changed = true;
16545 x = gen_rtx_PLUS (Pmode,
16546 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
16547 XEXP (XEXP (XEXP (x, 0), 1), 0)),
16548 plus_constant (Pmode, other,
16549 INTVAL (constant)));
16550 }
16551 }
16552
16553 if (changed && ix86_legitimate_address_p (mode, x, false))
16554 return x;
16555
16556 if (GET_CODE (XEXP (x, 0)) == MULT)
16557 {
16558 changed = true;
16559 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
16560 }
16561
16562 if (GET_CODE (XEXP (x, 1)) == MULT)
16563 {
16564 changed = true;
16565 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
16566 }
16567
16568 if (changed
16569 && REG_P (XEXP (x, 1))
16570 && REG_P (XEXP (x, 0)))
16571 return x;
16572
16573 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
16574 {
16575 changed = true;
16576 x = legitimize_pic_address (x, 0);
16577 }
16578
16579 if (changed && ix86_legitimate_address_p (mode, x, false))
16580 return x;
16581
16582 if (REG_P (XEXP (x, 0)))
16583 {
16584 rtx temp = gen_reg_rtx (Pmode);
16585 rtx val = force_operand (XEXP (x, 1), temp);
16586 if (val != temp)
16587 {
16588 val = convert_to_mode (Pmode, val, 1);
16589 emit_move_insn (temp, val);
16590 }
16591
16592 XEXP (x, 1) = temp;
16593 return x;
16594 }
16595
16596 else if (REG_P (XEXP (x, 1)))
16597 {
16598 rtx temp = gen_reg_rtx (Pmode);
16599 rtx val = force_operand (XEXP (x, 0), temp);
16600 if (val != temp)
16601 {
16602 val = convert_to_mode (Pmode, val, 1);
16603 emit_move_insn (temp, val);
16604 }
16605
16606 XEXP (x, 0) = temp;
16607 return x;
16608 }
16609 }
16610
16611 return x;
16612 }
16613 \f
16614 /* Print an integer constant expression in assembler syntax. Addition
16615 and subtraction are the only arithmetic that may appear in these
16616 expressions. FILE is the stdio stream to write to, X is the rtx, and
16617 CODE is the operand print code from the output string. */
16618
16619 static void
16620 output_pic_addr_const (FILE *file, rtx x, int code)
16621 {
16622 char buf[256];
16623
16624 switch (GET_CODE (x))
16625 {
16626 case PC:
16627 gcc_assert (flag_pic);
16628 putc ('.', file);
16629 break;
16630
16631 case SYMBOL_REF:
16632 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
16633 output_addr_const (file, x);
16634 else
16635 {
16636 const char *name = XSTR (x, 0);
16637
16638 /* Mark the decl as referenced so that cgraph will
16639 output the function. */
16640 if (SYMBOL_REF_DECL (x))
16641 mark_decl_referenced (SYMBOL_REF_DECL (x));
16642
16643 #if TARGET_MACHO
16644 if (MACHOPIC_INDIRECT
16645 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
16646 name = machopic_indirection_name (x, /*stub_p=*/true);
16647 #endif
16648 assemble_name (file, name);
16649 }
16650 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
16651 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
16652 fputs ("@PLT", file);
16653 break;
16654
16655 case LABEL_REF:
16656 x = XEXP (x, 0);
16657 /* FALLTHRU */
16658 case CODE_LABEL:
16659 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
16660 assemble_name (asm_out_file, buf);
16661 break;
16662
16663 case CONST_INT:
16664 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
16665 break;
16666
16667 case CONST:
16668 /* This used to output parentheses around the expression,
16669 but that does not work on the 386 (either ATT or BSD assembler). */
16670 output_pic_addr_const (file, XEXP (x, 0), code);
16671 break;
16672
16673 case CONST_DOUBLE:
16674 /* We can't handle floating point constants;
16675 TARGET_PRINT_OPERAND must handle them. */
16676 output_operand_lossage ("floating constant misused");
16677 break;
16678
16679 case PLUS:
16680 /* Some assemblers need integer constants to appear first. */
16681 if (CONST_INT_P (XEXP (x, 0)))
16682 {
16683 output_pic_addr_const (file, XEXP (x, 0), code);
16684 putc ('+', file);
16685 output_pic_addr_const (file, XEXP (x, 1), code);
16686 }
16687 else
16688 {
16689 gcc_assert (CONST_INT_P (XEXP (x, 1)));
16690 output_pic_addr_const (file, XEXP (x, 1), code);
16691 putc ('+', file);
16692 output_pic_addr_const (file, XEXP (x, 0), code);
16693 }
16694 break;
16695
16696 case MINUS:
16697 if (!TARGET_MACHO)
16698 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
16699 output_pic_addr_const (file, XEXP (x, 0), code);
16700 putc ('-', file);
16701 output_pic_addr_const (file, XEXP (x, 1), code);
16702 if (!TARGET_MACHO)
16703 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
16704 break;
16705
16706 case UNSPEC:
16707 gcc_assert (XVECLEN (x, 0) == 1);
16708 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
16709 switch (XINT (x, 1))
16710 {
16711 case UNSPEC_GOT:
16712 fputs ("@GOT", file);
16713 break;
16714 case UNSPEC_GOTOFF:
16715 fputs ("@GOTOFF", file);
16716 break;
16717 case UNSPEC_PLTOFF:
16718 fputs ("@PLTOFF", file);
16719 break;
16720 case UNSPEC_PCREL:
16721 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16722 "(%rip)" : "[rip]", file);
16723 break;
16724 case UNSPEC_GOTPCREL:
16725 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16726 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
16727 break;
16728 case UNSPEC_GOTTPOFF:
16729 /* FIXME: This might be @TPOFF in Sun ld too. */
16730 fputs ("@gottpoff", file);
16731 break;
16732 case UNSPEC_TPOFF:
16733 fputs ("@tpoff", file);
16734 break;
16735 case UNSPEC_NTPOFF:
16736 if (TARGET_64BIT)
16737 fputs ("@tpoff", file);
16738 else
16739 fputs ("@ntpoff", file);
16740 break;
16741 case UNSPEC_DTPOFF:
16742 fputs ("@dtpoff", file);
16743 break;
16744 case UNSPEC_GOTNTPOFF:
16745 if (TARGET_64BIT)
16746 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16747 "@gottpoff(%rip)": "@gottpoff[rip]", file);
16748 else
16749 fputs ("@gotntpoff", file);
16750 break;
16751 case UNSPEC_INDNTPOFF:
16752 fputs ("@indntpoff", file);
16753 break;
16754 #if TARGET_MACHO
16755 case UNSPEC_MACHOPIC_OFFSET:
16756 putc ('-', file);
16757 machopic_output_function_base_name (file);
16758 break;
16759 #endif
16760 default:
16761 output_operand_lossage ("invalid UNSPEC as operand");
16762 break;
16763 }
16764 break;
16765
16766 default:
16767 output_operand_lossage ("invalid expression as operand");
16768 }
16769 }
16770
16771 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
16772 We need to emit DTP-relative relocations. */
16773
16774 static void ATTRIBUTE_UNUSED
16775 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
16776 {
16777 fputs (ASM_LONG, file);
16778 output_addr_const (file, x);
16779 fputs ("@dtpoff", file);
16780 switch (size)
16781 {
16782 case 4:
16783 break;
16784 case 8:
16785 fputs (", 0", file);
16786 break;
16787 default:
16788 gcc_unreachable ();
16789 }
16790 }
16791
16792 /* Return true if X is a representation of the PIC register. This copes
16793 with calls from ix86_find_base_term, where the register might have
16794 been replaced by a cselib value. */
16795
16796 static bool
16797 ix86_pic_register_p (rtx x)
16798 {
16799 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
16800 return (pic_offset_table_rtx
16801 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
16802 else if (!REG_P (x))
16803 return false;
16804 else if (pic_offset_table_rtx)
16805 {
16806 if (REGNO (x) == REGNO (pic_offset_table_rtx))
16807 return true;
16808 if (HARD_REGISTER_P (x)
16809 && !HARD_REGISTER_P (pic_offset_table_rtx)
16810 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
16811 return true;
16812 return false;
16813 }
16814 else
16815 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
16816 }
16817
16818 /* Helper function for ix86_delegitimize_address.
16819 Attempt to delegitimize TLS local-exec accesses. */
16820
16821 static rtx
16822 ix86_delegitimize_tls_address (rtx orig_x)
16823 {
16824 rtx x = orig_x, unspec;
16825 struct ix86_address addr;
16826
16827 if (!TARGET_TLS_DIRECT_SEG_REFS)
16828 return orig_x;
16829 if (MEM_P (x))
16830 x = XEXP (x, 0);
16831 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
16832 return orig_x;
16833 if (ix86_decompose_address (x, &addr) == 0
16834 || addr.seg != DEFAULT_TLS_SEG_REG
16835 || addr.disp == NULL_RTX
16836 || GET_CODE (addr.disp) != CONST)
16837 return orig_x;
16838 unspec = XEXP (addr.disp, 0);
16839 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
16840 unspec = XEXP (unspec, 0);
16841 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
16842 return orig_x;
16843 x = XVECEXP (unspec, 0, 0);
16844 gcc_assert (GET_CODE (x) == SYMBOL_REF);
16845 if (unspec != XEXP (addr.disp, 0))
16846 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
16847 if (addr.index)
16848 {
16849 rtx idx = addr.index;
16850 if (addr.scale != 1)
16851 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
16852 x = gen_rtx_PLUS (Pmode, idx, x);
16853 }
16854 if (addr.base)
16855 x = gen_rtx_PLUS (Pmode, addr.base, x);
16856 if (MEM_P (orig_x))
16857 x = replace_equiv_address_nv (orig_x, x);
16858 return x;
16859 }
16860
16861 /* In the name of slightly smaller debug output, and to cater to
16862 general assembler lossage, recognize PIC+GOTOFF and turn it back
16863 into a direct symbol reference.
16864
16865 On Darwin, this is necessary to avoid a crash, because Darwin
16866 has a different PIC label for each routine but the DWARF debugging
16867 information is not associated with any particular routine, so it's
16868 necessary to remove references to the PIC label from RTL stored by
16869 the DWARF output code.
16870
16871 This helper is used in the normal ix86_delegitimize_address
16872 entrypoint (e.g. used in the target delegitimization hook) and
16873 in ix86_find_base_term. As compile time memory optimization, we
16874 avoid allocating rtxes that will not change anything on the outcome
16875 of the callers (find_base_value and find_base_term). */
16876
16877 static inline rtx
16878 ix86_delegitimize_address_1 (rtx x, bool base_term_p)
16879 {
16880 rtx orig_x = delegitimize_mem_from_attrs (x);
16881 /* addend is NULL or some rtx if x is something+GOTOFF where
16882 something doesn't include the PIC register. */
16883 rtx addend = NULL_RTX;
16884 /* reg_addend is NULL or a multiple of some register. */
16885 rtx reg_addend = NULL_RTX;
16886 /* const_addend is NULL or a const_int. */
16887 rtx const_addend = NULL_RTX;
16888 /* This is the result, or NULL. */
16889 rtx result = NULL_RTX;
16890
16891 x = orig_x;
16892
16893 if (MEM_P (x))
16894 x = XEXP (x, 0);
16895
16896 if (TARGET_64BIT)
16897 {
16898 if (GET_CODE (x) == CONST
16899 && GET_CODE (XEXP (x, 0)) == PLUS
16900 && GET_MODE (XEXP (x, 0)) == Pmode
16901 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
16902 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
16903 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
16904 {
16905 /* find_base_{value,term} only care about MEMs with arg_pointer_rtx
16906 base. A CONST can't be arg_pointer_rtx based. */
16907 if (base_term_p && MEM_P (orig_x))
16908 return orig_x;
16909 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
16910 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
16911 if (MEM_P (orig_x))
16912 x = replace_equiv_address_nv (orig_x, x);
16913 return x;
16914 }
16915
16916 if (GET_CODE (x) == CONST
16917 && GET_CODE (XEXP (x, 0)) == UNSPEC
16918 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
16919 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
16920 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
16921 {
16922 x = XVECEXP (XEXP (x, 0), 0, 0);
16923 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
16924 {
16925 x = lowpart_subreg (GET_MODE (orig_x), x, GET_MODE (x));
16926 if (x == NULL_RTX)
16927 return orig_x;
16928 }
16929 return x;
16930 }
16931
16932 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
16933 return ix86_delegitimize_tls_address (orig_x);
16934
16935 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
16936 and -mcmodel=medium -fpic. */
16937 }
16938
16939 if (GET_CODE (x) != PLUS
16940 || GET_CODE (XEXP (x, 1)) != CONST)
16941 return ix86_delegitimize_tls_address (orig_x);
16942
16943 if (ix86_pic_register_p (XEXP (x, 0)))
16944 /* %ebx + GOT/GOTOFF */
16945 ;
16946 else if (GET_CODE (XEXP (x, 0)) == PLUS)
16947 {
16948 /* %ebx + %reg * scale + GOT/GOTOFF */
16949 reg_addend = XEXP (x, 0);
16950 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
16951 reg_addend = XEXP (reg_addend, 1);
16952 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
16953 reg_addend = XEXP (reg_addend, 0);
16954 else
16955 {
16956 reg_addend = NULL_RTX;
16957 addend = XEXP (x, 0);
16958 }
16959 }
16960 else
16961 addend = XEXP (x, 0);
16962
16963 x = XEXP (XEXP (x, 1), 0);
16964 if (GET_CODE (x) == PLUS
16965 && CONST_INT_P (XEXP (x, 1)))
16966 {
16967 const_addend = XEXP (x, 1);
16968 x = XEXP (x, 0);
16969 }
16970
16971 if (GET_CODE (x) == UNSPEC
16972 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
16973 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
16974 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
16975 && !MEM_P (orig_x) && !addend)))
16976 result = XVECEXP (x, 0, 0);
16977
16978 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
16979 && !MEM_P (orig_x))
16980 result = XVECEXP (x, 0, 0);
16981
16982 if (! result)
16983 return ix86_delegitimize_tls_address (orig_x);
16984
16985 /* For (PLUS something CONST_INT) both find_base_{value,term} just
16986 recurse on the first operand. */
16987 if (const_addend && !base_term_p)
16988 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
16989 if (reg_addend)
16990 result = gen_rtx_PLUS (Pmode, reg_addend, result);
16991 if (addend)
16992 {
16993 /* If the rest of original X doesn't involve the PIC register, add
16994 addend and subtract pic_offset_table_rtx. This can happen e.g.
16995 for code like:
16996 leal (%ebx, %ecx, 4), %ecx
16997 ...
16998 movl foo@GOTOFF(%ecx), %edx
16999 in which case we return (%ecx - %ebx) + foo
17000 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
17001 and reload has completed. Don't do the latter for debug,
17002 as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly. */
17003 if (pic_offset_table_rtx
17004 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
17005 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
17006 pic_offset_table_rtx),
17007 result);
17008 else if (base_term_p
17009 && pic_offset_table_rtx
17010 && !TARGET_MACHO
17011 && !TARGET_VXWORKS_RTP)
17012 {
17013 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
17014 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
17015 result = gen_rtx_PLUS (Pmode, tmp, result);
17016 }
17017 else
17018 return orig_x;
17019 }
17020 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
17021 {
17022 result = lowpart_subreg (GET_MODE (orig_x), result, Pmode);
17023 if (result == NULL_RTX)
17024 return orig_x;
17025 }
17026 return result;
17027 }
17028
17029 /* The normal instantiation of the above template. */
17030
17031 static rtx
17032 ix86_delegitimize_address (rtx x)
17033 {
17034 return ix86_delegitimize_address_1 (x, false);
17035 }
17036
17037 /* If X is a machine specific address (i.e. a symbol or label being
17038 referenced as a displacement from the GOT implemented using an
17039 UNSPEC), then return the base term. Otherwise return X. */
17040
17041 rtx
17042 ix86_find_base_term (rtx x)
17043 {
17044 rtx term;
17045
17046 if (TARGET_64BIT)
17047 {
17048 if (GET_CODE (x) != CONST)
17049 return x;
17050 term = XEXP (x, 0);
17051 if (GET_CODE (term) == PLUS
17052 && CONST_INT_P (XEXP (term, 1)))
17053 term = XEXP (term, 0);
17054 if (GET_CODE (term) != UNSPEC
17055 || (XINT (term, 1) != UNSPEC_GOTPCREL
17056 && XINT (term, 1) != UNSPEC_PCREL))
17057 return x;
17058
17059 return XVECEXP (term, 0, 0);
17060 }
17061
17062 return ix86_delegitimize_address_1 (x, true);
17063 }
17064
17065 /* Return true if X shouldn't be emitted into the debug info.
17066 Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_
17067 symbol easily into the .debug_info section, so we need not to
17068 delegitimize, but instead assemble as @gotoff.
17069 Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically
17070 assembles that as _GLOBAL_OFFSET_TABLE_-. expression. */
17071
17072 static bool
17073 ix86_const_not_ok_for_debug_p (rtx x)
17074 {
17075 if (GET_CODE (x) == UNSPEC && XINT (x, 1) != UNSPEC_GOTOFF)
17076 return true;
17077
17078 if (SYMBOL_REF_P (x) && strcmp (XSTR (x, 0), GOT_SYMBOL_NAME) == 0)
17079 return true;
17080
17081 return false;
17082 }
17083 \f
17084 static void
17085 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
17086 bool fp, FILE *file)
17087 {
17088 const char *suffix;
17089
17090 if (mode == CCFPmode)
17091 {
17092 code = ix86_fp_compare_code_to_integer (code);
17093 mode = CCmode;
17094 }
17095 if (reverse)
17096 code = reverse_condition (code);
17097
17098 switch (code)
17099 {
17100 case EQ:
17101 gcc_assert (mode != CCGZmode);
17102 switch (mode)
17103 {
17104 case E_CCAmode:
17105 suffix = "a";
17106 break;
17107 case E_CCCmode:
17108 suffix = "c";
17109 break;
17110 case E_CCOmode:
17111 suffix = "o";
17112 break;
17113 case E_CCPmode:
17114 suffix = "p";
17115 break;
17116 case E_CCSmode:
17117 suffix = "s";
17118 break;
17119 default:
17120 suffix = "e";
17121 break;
17122 }
17123 break;
17124 case NE:
17125 gcc_assert (mode != CCGZmode);
17126 switch (mode)
17127 {
17128 case E_CCAmode:
17129 suffix = "na";
17130 break;
17131 case E_CCCmode:
17132 suffix = "nc";
17133 break;
17134 case E_CCOmode:
17135 suffix = "no";
17136 break;
17137 case E_CCPmode:
17138 suffix = "np";
17139 break;
17140 case E_CCSmode:
17141 suffix = "ns";
17142 break;
17143 default:
17144 suffix = "ne";
17145 break;
17146 }
17147 break;
17148 case GT:
17149 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
17150 suffix = "g";
17151 break;
17152 case GTU:
17153 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
17154 Those same assemblers have the same but opposite lossage on cmov. */
17155 if (mode == CCmode)
17156 suffix = fp ? "nbe" : "a";
17157 else
17158 gcc_unreachable ();
17159 break;
17160 case LT:
17161 switch (mode)
17162 {
17163 case E_CCNOmode:
17164 case E_CCGOCmode:
17165 suffix = "s";
17166 break;
17167
17168 case E_CCmode:
17169 case E_CCGCmode:
17170 case E_CCGZmode:
17171 suffix = "l";
17172 break;
17173
17174 default:
17175 gcc_unreachable ();
17176 }
17177 break;
17178 case LTU:
17179 if (mode == CCmode || mode == CCGZmode)
17180 suffix = "b";
17181 else if (mode == CCCmode)
17182 suffix = fp ? "b" : "c";
17183 else
17184 gcc_unreachable ();
17185 break;
17186 case GE:
17187 switch (mode)
17188 {
17189 case E_CCNOmode:
17190 case E_CCGOCmode:
17191 suffix = "ns";
17192 break;
17193
17194 case E_CCmode:
17195 case E_CCGCmode:
17196 case E_CCGZmode:
17197 suffix = "ge";
17198 break;
17199
17200 default:
17201 gcc_unreachable ();
17202 }
17203 break;
17204 case GEU:
17205 if (mode == CCmode || mode == CCGZmode)
17206 suffix = "nb";
17207 else if (mode == CCCmode)
17208 suffix = fp ? "nb" : "nc";
17209 else
17210 gcc_unreachable ();
17211 break;
17212 case LE:
17213 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
17214 suffix = "le";
17215 break;
17216 case LEU:
17217 if (mode == CCmode)
17218 suffix = "be";
17219 else
17220 gcc_unreachable ();
17221 break;
17222 case UNORDERED:
17223 suffix = fp ? "u" : "p";
17224 break;
17225 case ORDERED:
17226 suffix = fp ? "nu" : "np";
17227 break;
17228 default:
17229 gcc_unreachable ();
17230 }
17231 fputs (suffix, file);
17232 }
17233
17234 /* Print the name of register X to FILE based on its machine mode and number.
17235 If CODE is 'w', pretend the mode is HImode.
17236 If CODE is 'b', pretend the mode is QImode.
17237 If CODE is 'k', pretend the mode is SImode.
17238 If CODE is 'q', pretend the mode is DImode.
17239 If CODE is 'x', pretend the mode is V4SFmode.
17240 If CODE is 't', pretend the mode is V8SFmode.
17241 If CODE is 'g', pretend the mode is V16SFmode.
17242 If CODE is 'h', pretend the reg is the 'high' byte register.
17243 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
17244 If CODE is 'd', duplicate the operand for AVX instruction.
17245 */
17246
17247 void
17248 print_reg (rtx x, int code, FILE *file)
17249 {
17250 const char *reg;
17251 int msize;
17252 unsigned int regno;
17253 bool duplicated;
17254
17255 if (ASSEMBLER_DIALECT == ASM_ATT)
17256 putc ('%', file);
17257
17258 if (x == pc_rtx)
17259 {
17260 gcc_assert (TARGET_64BIT);
17261 fputs ("rip", file);
17262 return;
17263 }
17264
17265 if (code == 'y' && STACK_TOP_P (x))
17266 {
17267 fputs ("st(0)", file);
17268 return;
17269 }
17270
17271 if (code == 'w')
17272 msize = 2;
17273 else if (code == 'b')
17274 msize = 1;
17275 else if (code == 'k')
17276 msize = 4;
17277 else if (code == 'q')
17278 msize = 8;
17279 else if (code == 'h')
17280 msize = 0;
17281 else if (code == 'x')
17282 msize = 16;
17283 else if (code == 't')
17284 msize = 32;
17285 else if (code == 'g')
17286 msize = 64;
17287 else
17288 msize = GET_MODE_SIZE (GET_MODE (x));
17289
17290 regno = REGNO (x);
17291
17292 if (regno == ARG_POINTER_REGNUM
17293 || regno == FRAME_POINTER_REGNUM
17294 || regno == FPSR_REG
17295 || regno == FPCR_REG)
17296 {
17297 output_operand_lossage
17298 ("invalid use of register '%s'", reg_names[regno]);
17299 return;
17300 }
17301 else if (regno == FLAGS_REG)
17302 {
17303 output_operand_lossage ("invalid use of asm flag output");
17304 return;
17305 }
17306
17307 duplicated = code == 'd' && TARGET_AVX;
17308
17309 switch (msize)
17310 {
17311 case 16:
17312 case 12:
17313 case 8:
17314 if (GENERAL_REGNO_P (regno) && msize > GET_MODE_SIZE (word_mode))
17315 warning (0, "unsupported size for integer register");
17316 /* FALLTHRU */
17317 case 4:
17318 if (LEGACY_INT_REGNO_P (regno))
17319 putc (msize > 4 && TARGET_64BIT ? 'r' : 'e', file);
17320 /* FALLTHRU */
17321 case 2:
17322 normal:
17323 reg = hi_reg_name[regno];
17324 break;
17325 case 1:
17326 if (regno >= ARRAY_SIZE (qi_reg_name))
17327 goto normal;
17328 if (!ANY_QI_REGNO_P (regno))
17329 error ("unsupported size for integer register");
17330 reg = qi_reg_name[regno];
17331 break;
17332 case 0:
17333 if (regno >= ARRAY_SIZE (qi_high_reg_name))
17334 goto normal;
17335 reg = qi_high_reg_name[regno];
17336 break;
17337 case 32:
17338 case 64:
17339 if (SSE_REGNO_P (regno))
17340 {
17341 gcc_assert (!duplicated);
17342 putc (msize == 32 ? 'y' : 'z', file);
17343 reg = hi_reg_name[regno] + 1;
17344 break;
17345 }
17346 goto normal;
17347 default:
17348 gcc_unreachable ();
17349 }
17350
17351 fputs (reg, file);
17352
17353 /* Irritatingly, AMD extended registers use
17354 different naming convention: "r%d[bwd]" */
17355 if (REX_INT_REGNO_P (regno))
17356 {
17357 gcc_assert (TARGET_64BIT);
17358 switch (msize)
17359 {
17360 case 0:
17361 error ("extended registers have no high halves");
17362 break;
17363 case 1:
17364 putc ('b', file);
17365 break;
17366 case 2:
17367 putc ('w', file);
17368 break;
17369 case 4:
17370 putc ('d', file);
17371 break;
17372 case 8:
17373 /* no suffix */
17374 break;
17375 default:
17376 error ("unsupported operand size for extended register");
17377 break;
17378 }
17379 return;
17380 }
17381
17382 if (duplicated)
17383 {
17384 if (ASSEMBLER_DIALECT == ASM_ATT)
17385 fprintf (file, ", %%%s", reg);
17386 else
17387 fprintf (file, ", %s", reg);
17388 }
17389 }
17390
17391 /* Meaning of CODE:
17392 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
17393 C -- print opcode suffix for set/cmov insn.
17394 c -- like C, but print reversed condition
17395 F,f -- likewise, but for floating-point.
17396 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
17397 otherwise nothing
17398 R -- print embeded rounding and sae.
17399 r -- print only sae.
17400 z -- print the opcode suffix for the size of the current operand.
17401 Z -- likewise, with special suffixes for x87 instructions.
17402 * -- print a star (in certain assembler syntax)
17403 A -- print an absolute memory reference.
17404 E -- print address with DImode register names if TARGET_64BIT.
17405 w -- print the operand as if it's a "word" (HImode) even if it isn't.
17406 s -- print a shift double count, followed by the assemblers argument
17407 delimiter.
17408 b -- print the QImode name of the register for the indicated operand.
17409 %b0 would print %al if operands[0] is reg 0.
17410 w -- likewise, print the HImode name of the register.
17411 k -- likewise, print the SImode name of the register.
17412 q -- likewise, print the DImode name of the register.
17413 x -- likewise, print the V4SFmode name of the register.
17414 t -- likewise, print the V8SFmode name of the register.
17415 g -- likewise, print the V16SFmode name of the register.
17416 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
17417 y -- print "st(0)" instead of "st" as a register.
17418 d -- print duplicated register operand for AVX instruction.
17419 D -- print condition for SSE cmp instruction.
17420 P -- if PIC, print an @PLT suffix.
17421 p -- print raw symbol name.
17422 X -- don't print any sort of PIC '@' suffix for a symbol.
17423 & -- print some in-use local-dynamic symbol name.
17424 H -- print a memory address offset by 8; used for sse high-parts
17425 Y -- print condition for XOP pcom* instruction.
17426 + -- print a branch hint as 'cs' or 'ds' prefix
17427 ; -- print a semicolon (after prefixes due to bug in older gas).
17428 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
17429 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
17430 ! -- print MPX prefix for jxx/call/ret instructions if required.
17431 */
17432
17433 void
17434 ix86_print_operand (FILE *file, rtx x, int code)
17435 {
17436 if (code)
17437 {
17438 switch (code)
17439 {
17440 case 'A':
17441 switch (ASSEMBLER_DIALECT)
17442 {
17443 case ASM_ATT:
17444 putc ('*', file);
17445 break;
17446
17447 case ASM_INTEL:
17448 /* Intel syntax. For absolute addresses, registers should not
17449 be surrounded by braces. */
17450 if (!REG_P (x))
17451 {
17452 putc ('[', file);
17453 ix86_print_operand (file, x, 0);
17454 putc (']', file);
17455 return;
17456 }
17457 break;
17458
17459 default:
17460 gcc_unreachable ();
17461 }
17462
17463 ix86_print_operand (file, x, 0);
17464 return;
17465
17466 case 'E':
17467 /* Wrap address in an UNSPEC to declare special handling. */
17468 if (TARGET_64BIT)
17469 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
17470
17471 output_address (VOIDmode, x);
17472 return;
17473
17474 case 'L':
17475 if (ASSEMBLER_DIALECT == ASM_ATT)
17476 putc ('l', file);
17477 return;
17478
17479 case 'W':
17480 if (ASSEMBLER_DIALECT == ASM_ATT)
17481 putc ('w', file);
17482 return;
17483
17484 case 'B':
17485 if (ASSEMBLER_DIALECT == ASM_ATT)
17486 putc ('b', file);
17487 return;
17488
17489 case 'Q':
17490 if (ASSEMBLER_DIALECT == ASM_ATT)
17491 putc ('l', file);
17492 return;
17493
17494 case 'S':
17495 if (ASSEMBLER_DIALECT == ASM_ATT)
17496 putc ('s', file);
17497 return;
17498
17499 case 'T':
17500 if (ASSEMBLER_DIALECT == ASM_ATT)
17501 putc ('t', file);
17502 return;
17503
17504 case 'O':
17505 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
17506 if (ASSEMBLER_DIALECT != ASM_ATT)
17507 return;
17508
17509 switch (GET_MODE_SIZE (GET_MODE (x)))
17510 {
17511 case 2:
17512 putc ('w', file);
17513 break;
17514
17515 case 4:
17516 putc ('l', file);
17517 break;
17518
17519 case 8:
17520 putc ('q', file);
17521 break;
17522
17523 default:
17524 output_operand_lossage ("invalid operand size for operand "
17525 "code 'O'");
17526 return;
17527 }
17528
17529 putc ('.', file);
17530 #endif
17531 return;
17532
17533 case 'z':
17534 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
17535 {
17536 /* Opcodes don't get size suffixes if using Intel opcodes. */
17537 if (ASSEMBLER_DIALECT == ASM_INTEL)
17538 return;
17539
17540 switch (GET_MODE_SIZE (GET_MODE (x)))
17541 {
17542 case 1:
17543 putc ('b', file);
17544 return;
17545
17546 case 2:
17547 putc ('w', file);
17548 return;
17549
17550 case 4:
17551 putc ('l', file);
17552 return;
17553
17554 case 8:
17555 putc ('q', file);
17556 return;
17557
17558 default:
17559 output_operand_lossage ("invalid operand size for operand "
17560 "code 'z'");
17561 return;
17562 }
17563 }
17564
17565 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
17566 warning (0, "non-integer operand used with operand code 'z'");
17567 /* FALLTHRU */
17568
17569 case 'Z':
17570 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
17571 if (ASSEMBLER_DIALECT == ASM_INTEL)
17572 return;
17573
17574 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
17575 {
17576 switch (GET_MODE_SIZE (GET_MODE (x)))
17577 {
17578 case 2:
17579 #ifdef HAVE_AS_IX86_FILDS
17580 putc ('s', file);
17581 #endif
17582 return;
17583
17584 case 4:
17585 putc ('l', file);
17586 return;
17587
17588 case 8:
17589 #ifdef HAVE_AS_IX86_FILDQ
17590 putc ('q', file);
17591 #else
17592 fputs ("ll", file);
17593 #endif
17594 return;
17595
17596 default:
17597 break;
17598 }
17599 }
17600 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
17601 {
17602 /* 387 opcodes don't get size suffixes
17603 if the operands are registers. */
17604 if (STACK_REG_P (x))
17605 return;
17606
17607 switch (GET_MODE_SIZE (GET_MODE (x)))
17608 {
17609 case 4:
17610 putc ('s', file);
17611 return;
17612
17613 case 8:
17614 putc ('l', file);
17615 return;
17616
17617 case 12:
17618 case 16:
17619 putc ('t', file);
17620 return;
17621
17622 default:
17623 break;
17624 }
17625 }
17626 else
17627 {
17628 output_operand_lossage ("invalid operand type used with "
17629 "operand code 'Z'");
17630 return;
17631 }
17632
17633 output_operand_lossage ("invalid operand size for operand code 'Z'");
17634 return;
17635
17636 case 'd':
17637 case 'b':
17638 case 'w':
17639 case 'k':
17640 case 'q':
17641 case 'h':
17642 case 't':
17643 case 'g':
17644 case 'y':
17645 case 'x':
17646 case 'X':
17647 case 'P':
17648 case 'p':
17649 break;
17650
17651 case 's':
17652 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
17653 {
17654 ix86_print_operand (file, x, 0);
17655 fputs (", ", file);
17656 }
17657 return;
17658
17659 case 'Y':
17660 switch (GET_CODE (x))
17661 {
17662 case NE:
17663 fputs ("neq", file);
17664 break;
17665 case EQ:
17666 fputs ("eq", file);
17667 break;
17668 case GE:
17669 case GEU:
17670 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
17671 break;
17672 case GT:
17673 case GTU:
17674 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
17675 break;
17676 case LE:
17677 case LEU:
17678 fputs ("le", file);
17679 break;
17680 case LT:
17681 case LTU:
17682 fputs ("lt", file);
17683 break;
17684 case UNORDERED:
17685 fputs ("unord", file);
17686 break;
17687 case ORDERED:
17688 fputs ("ord", file);
17689 break;
17690 case UNEQ:
17691 fputs ("ueq", file);
17692 break;
17693 case UNGE:
17694 fputs ("nlt", file);
17695 break;
17696 case UNGT:
17697 fputs ("nle", file);
17698 break;
17699 case UNLE:
17700 fputs ("ule", file);
17701 break;
17702 case UNLT:
17703 fputs ("ult", file);
17704 break;
17705 case LTGT:
17706 fputs ("une", file);
17707 break;
17708 default:
17709 output_operand_lossage ("operand is not a condition code, "
17710 "invalid operand code 'Y'");
17711 return;
17712 }
17713 return;
17714
17715 case 'D':
17716 /* Little bit of braindamage here. The SSE compare instructions
17717 does use completely different names for the comparisons that the
17718 fp conditional moves. */
17719 switch (GET_CODE (x))
17720 {
17721 case UNEQ:
17722 if (TARGET_AVX)
17723 {
17724 fputs ("eq_us", file);
17725 break;
17726 }
17727 /* FALLTHRU */
17728 case EQ:
17729 fputs ("eq", file);
17730 break;
17731 case UNLT:
17732 if (TARGET_AVX)
17733 {
17734 fputs ("nge", file);
17735 break;
17736 }
17737 /* FALLTHRU */
17738 case LT:
17739 fputs ("lt", file);
17740 break;
17741 case UNLE:
17742 if (TARGET_AVX)
17743 {
17744 fputs ("ngt", file);
17745 break;
17746 }
17747 /* FALLTHRU */
17748 case LE:
17749 fputs ("le", file);
17750 break;
17751 case UNORDERED:
17752 fputs ("unord", file);
17753 break;
17754 case LTGT:
17755 if (TARGET_AVX)
17756 {
17757 fputs ("neq_oq", file);
17758 break;
17759 }
17760 /* FALLTHRU */
17761 case NE:
17762 fputs ("neq", file);
17763 break;
17764 case GE:
17765 if (TARGET_AVX)
17766 {
17767 fputs ("ge", file);
17768 break;
17769 }
17770 /* FALLTHRU */
17771 case UNGE:
17772 fputs ("nlt", file);
17773 break;
17774 case GT:
17775 if (TARGET_AVX)
17776 {
17777 fputs ("gt", file);
17778 break;
17779 }
17780 /* FALLTHRU */
17781 case UNGT:
17782 fputs ("nle", file);
17783 break;
17784 case ORDERED:
17785 fputs ("ord", file);
17786 break;
17787 default:
17788 output_operand_lossage ("operand is not a condition code, "
17789 "invalid operand code 'D'");
17790 return;
17791 }
17792 return;
17793
17794 case 'F':
17795 case 'f':
17796 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
17797 if (ASSEMBLER_DIALECT == ASM_ATT)
17798 putc ('.', file);
17799 gcc_fallthrough ();
17800 #endif
17801
17802 case 'C':
17803 case 'c':
17804 if (!COMPARISON_P (x))
17805 {
17806 output_operand_lossage ("operand is not a condition code, "
17807 "invalid operand code '%c'", code);
17808 return;
17809 }
17810 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
17811 code == 'c' || code == 'f',
17812 code == 'F' || code == 'f',
17813 file);
17814 return;
17815
17816 case 'H':
17817 if (!offsettable_memref_p (x))
17818 {
17819 output_operand_lossage ("operand is not an offsettable memory "
17820 "reference, invalid operand code 'H'");
17821 return;
17822 }
17823 /* It doesn't actually matter what mode we use here, as we're
17824 only going to use this for printing. */
17825 x = adjust_address_nv (x, DImode, 8);
17826 /* Output 'qword ptr' for intel assembler dialect. */
17827 if (ASSEMBLER_DIALECT == ASM_INTEL)
17828 code = 'q';
17829 break;
17830
17831 case 'K':
17832 if (!CONST_INT_P (x))
17833 {
17834 output_operand_lossage ("operand is not an integer, invalid "
17835 "operand code 'K'");
17836 return;
17837 }
17838
17839 if (INTVAL (x) & IX86_HLE_ACQUIRE)
17840 #ifdef HAVE_AS_IX86_HLE
17841 fputs ("xacquire ", file);
17842 #else
17843 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
17844 #endif
17845 else if (INTVAL (x) & IX86_HLE_RELEASE)
17846 #ifdef HAVE_AS_IX86_HLE
17847 fputs ("xrelease ", file);
17848 #else
17849 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
17850 #endif
17851 /* We do not want to print value of the operand. */
17852 return;
17853
17854 case 'N':
17855 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
17856 fputs ("{z}", file);
17857 return;
17858
17859 case 'r':
17860 if (!CONST_INT_P (x) || INTVAL (x) != ROUND_SAE)
17861 {
17862 output_operand_lossage ("operand is not a specific integer, "
17863 "invalid operand code 'r'");
17864 return;
17865 }
17866
17867 if (ASSEMBLER_DIALECT == ASM_INTEL)
17868 fputs (", ", file);
17869
17870 fputs ("{sae}", file);
17871
17872 if (ASSEMBLER_DIALECT == ASM_ATT)
17873 fputs (", ", file);
17874
17875 return;
17876
17877 case 'R':
17878 if (!CONST_INT_P (x))
17879 {
17880 output_operand_lossage ("operand is not an integer, invalid "
17881 "operand code 'R'");
17882 return;
17883 }
17884
17885 if (ASSEMBLER_DIALECT == ASM_INTEL)
17886 fputs (", ", file);
17887
17888 switch (INTVAL (x))
17889 {
17890 case ROUND_NEAREST_INT | ROUND_SAE:
17891 fputs ("{rn-sae}", file);
17892 break;
17893 case ROUND_NEG_INF | ROUND_SAE:
17894 fputs ("{rd-sae}", file);
17895 break;
17896 case ROUND_POS_INF | ROUND_SAE:
17897 fputs ("{ru-sae}", file);
17898 break;
17899 case ROUND_ZERO | ROUND_SAE:
17900 fputs ("{rz-sae}", file);
17901 break;
17902 default:
17903 output_operand_lossage ("operand is not a specific integer, "
17904 "invalid operand code 'R'");
17905 }
17906
17907 if (ASSEMBLER_DIALECT == ASM_ATT)
17908 fputs (", ", file);
17909
17910 return;
17911
17912 case '*':
17913 if (ASSEMBLER_DIALECT == ASM_ATT)
17914 putc ('*', file);
17915 return;
17916
17917 case '&':
17918 {
17919 const char *name = get_some_local_dynamic_name ();
17920 if (name == NULL)
17921 output_operand_lossage ("'%%&' used without any "
17922 "local dynamic TLS references");
17923 else
17924 assemble_name (file, name);
17925 return;
17926 }
17927
17928 case '+':
17929 {
17930 rtx x;
17931
17932 if (!optimize
17933 || optimize_function_for_size_p (cfun)
17934 || !TARGET_BRANCH_PREDICTION_HINTS)
17935 return;
17936
17937 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
17938 if (x)
17939 {
17940 int pred_val = profile_probability::from_reg_br_prob_note
17941 (XINT (x, 0)).to_reg_br_prob_base ();
17942
17943 if (pred_val < REG_BR_PROB_BASE * 45 / 100
17944 || pred_val > REG_BR_PROB_BASE * 55 / 100)
17945 {
17946 bool taken = pred_val > REG_BR_PROB_BASE / 2;
17947 bool cputaken
17948 = final_forward_branch_p (current_output_insn) == 0;
17949
17950 /* Emit hints only in the case default branch prediction
17951 heuristics would fail. */
17952 if (taken != cputaken)
17953 {
17954 /* We use 3e (DS) prefix for taken branches and
17955 2e (CS) prefix for not taken branches. */
17956 if (taken)
17957 fputs ("ds ; ", file);
17958 else
17959 fputs ("cs ; ", file);
17960 }
17961 }
17962 }
17963 return;
17964 }
17965
17966 case ';':
17967 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
17968 putc (';', file);
17969 #endif
17970 return;
17971
17972 case '~':
17973 putc (TARGET_AVX2 ? 'i' : 'f', file);
17974 return;
17975
17976 case '^':
17977 if (TARGET_64BIT && Pmode != word_mode)
17978 fputs ("addr32 ", file);
17979 return;
17980
17981 case '!':
17982 if (ix86_bnd_prefixed_insn_p (current_output_insn))
17983 fputs ("bnd ", file);
17984 if (ix86_notrack_prefixed_insn_p (current_output_insn))
17985 fputs ("notrack ", file);
17986 return;
17987
17988 default:
17989 output_operand_lossage ("invalid operand code '%c'", code);
17990 }
17991 }
17992
17993 if (REG_P (x))
17994 print_reg (x, code, file);
17995
17996 else if (MEM_P (x))
17997 {
17998 rtx addr = XEXP (x, 0);
17999
18000 /* No `byte ptr' prefix for call instructions ... */
18001 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
18002 {
18003 machine_mode mode = GET_MODE (x);
18004 const char *size;
18005
18006 /* Check for explicit size override codes. */
18007 if (code == 'b')
18008 size = "BYTE";
18009 else if (code == 'w')
18010 size = "WORD";
18011 else if (code == 'k')
18012 size = "DWORD";
18013 else if (code == 'q')
18014 size = "QWORD";
18015 else if (code == 'x')
18016 size = "XMMWORD";
18017 else if (code == 't')
18018 size = "YMMWORD";
18019 else if (code == 'g')
18020 size = "ZMMWORD";
18021 else if (mode == BLKmode)
18022 /* ... or BLKmode operands, when not overridden. */
18023 size = NULL;
18024 else
18025 switch (GET_MODE_SIZE (mode))
18026 {
18027 case 1: size = "BYTE"; break;
18028 case 2: size = "WORD"; break;
18029 case 4: size = "DWORD"; break;
18030 case 8: size = "QWORD"; break;
18031 case 12: size = "TBYTE"; break;
18032 case 16:
18033 if (mode == XFmode)
18034 size = "TBYTE";
18035 else
18036 size = "XMMWORD";
18037 break;
18038 case 32: size = "YMMWORD"; break;
18039 case 64: size = "ZMMWORD"; break;
18040 default:
18041 gcc_unreachable ();
18042 }
18043 if (size)
18044 {
18045 fputs (size, file);
18046 fputs (" PTR ", file);
18047 }
18048 }
18049
18050 if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
18051 output_operand_lossage ("invalid constraints for operand");
18052 else
18053 ix86_print_operand_address_as
18054 (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P');
18055 }
18056
18057 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
18058 {
18059 long l;
18060
18061 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
18062
18063 if (ASSEMBLER_DIALECT == ASM_ATT)
18064 putc ('$', file);
18065 /* Sign extend 32bit SFmode immediate to 8 bytes. */
18066 if (code == 'q')
18067 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
18068 (unsigned long long) (int) l);
18069 else
18070 fprintf (file, "0x%08x", (unsigned int) l);
18071 }
18072
18073 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
18074 {
18075 long l[2];
18076
18077 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
18078
18079 if (ASSEMBLER_DIALECT == ASM_ATT)
18080 putc ('$', file);
18081 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
18082 }
18083
18084 /* These float cases don't actually occur as immediate operands. */
18085 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
18086 {
18087 char dstr[30];
18088
18089 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
18090 fputs (dstr, file);
18091 }
18092
18093 else
18094 {
18095 /* We have patterns that allow zero sets of memory, for instance.
18096 In 64-bit mode, we should probably support all 8-byte vectors,
18097 since we can in fact encode that into an immediate. */
18098 if (GET_CODE (x) == CONST_VECTOR)
18099 {
18100 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
18101 x = const0_rtx;
18102 }
18103
18104 if (code != 'P' && code != 'p')
18105 {
18106 if (CONST_INT_P (x))
18107 {
18108 if (ASSEMBLER_DIALECT == ASM_ATT)
18109 putc ('$', file);
18110 }
18111 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
18112 || GET_CODE (x) == LABEL_REF)
18113 {
18114 if (ASSEMBLER_DIALECT == ASM_ATT)
18115 putc ('$', file);
18116 else
18117 fputs ("OFFSET FLAT:", file);
18118 }
18119 }
18120 if (CONST_INT_P (x))
18121 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
18122 else if (flag_pic || MACHOPIC_INDIRECT)
18123 output_pic_addr_const (file, x, code);
18124 else
18125 output_addr_const (file, x);
18126 }
18127 }
18128
18129 static bool
18130 ix86_print_operand_punct_valid_p (unsigned char code)
18131 {
18132 return (code == '*' || code == '+' || code == '&' || code == ';'
18133 || code == '~' || code == '^' || code == '!');
18134 }
18135 \f
18136 /* Print a memory operand whose address is ADDR. */
18137
18138 static void
18139 ix86_print_operand_address_as (FILE *file, rtx addr,
18140 addr_space_t as, bool no_rip)
18141 {
18142 struct ix86_address parts;
18143 rtx base, index, disp;
18144 int scale;
18145 int ok;
18146 bool vsib = false;
18147 int code = 0;
18148
18149 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
18150 {
18151 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
18152 gcc_assert (parts.index == NULL_RTX);
18153 parts.index = XVECEXP (addr, 0, 1);
18154 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
18155 addr = XVECEXP (addr, 0, 0);
18156 vsib = true;
18157 }
18158 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
18159 {
18160 gcc_assert (TARGET_64BIT);
18161 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
18162 code = 'q';
18163 }
18164 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR)
18165 {
18166 ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts);
18167 gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX);
18168 if (parts.base != NULL_RTX)
18169 {
18170 parts.index = parts.base;
18171 parts.scale = 1;
18172 }
18173 parts.base = XVECEXP (addr, 0, 0);
18174 addr = XVECEXP (addr, 0, 0);
18175 }
18176 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR)
18177 {
18178 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
18179 gcc_assert (parts.index == NULL_RTX);
18180 parts.index = XVECEXP (addr, 0, 1);
18181 addr = XVECEXP (addr, 0, 0);
18182 }
18183 else
18184 ok = ix86_decompose_address (addr, &parts);
18185
18186 gcc_assert (ok);
18187
18188 base = parts.base;
18189 index = parts.index;
18190 disp = parts.disp;
18191 scale = parts.scale;
18192
18193 if (ADDR_SPACE_GENERIC_P (as))
18194 as = parts.seg;
18195 else
18196 gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg));
18197
18198 if (!ADDR_SPACE_GENERIC_P (as))
18199 {
18200 const char *string;
18201
18202 if (as == ADDR_SPACE_SEG_FS)
18203 string = (ASSEMBLER_DIALECT == ASM_ATT ? "%fs:" : "fs:");
18204 else if (as == ADDR_SPACE_SEG_GS)
18205 string = (ASSEMBLER_DIALECT == ASM_ATT ? "%gs:" : "gs:");
18206 else
18207 gcc_unreachable ();
18208 fputs (string, file);
18209 }
18210
18211 /* Use one byte shorter RIP relative addressing for 64bit mode. */
18212 if (TARGET_64BIT && !base && !index && !no_rip)
18213 {
18214 rtx symbol = disp;
18215
18216 if (GET_CODE (disp) == CONST
18217 && GET_CODE (XEXP (disp, 0)) == PLUS
18218 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
18219 symbol = XEXP (XEXP (disp, 0), 0);
18220
18221 if (GET_CODE (symbol) == LABEL_REF
18222 || (GET_CODE (symbol) == SYMBOL_REF
18223 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
18224 base = pc_rtx;
18225 }
18226
18227 if (!base && !index)
18228 {
18229 /* Displacement only requires special attention. */
18230 if (CONST_INT_P (disp))
18231 {
18232 if (ASSEMBLER_DIALECT == ASM_INTEL && ADDR_SPACE_GENERIC_P (as))
18233 fputs ("ds:", file);
18234 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
18235 }
18236 /* Load the external function address via the GOT slot to avoid PLT. */
18237 else if (GET_CODE (disp) == CONST
18238 && GET_CODE (XEXP (disp, 0)) == UNSPEC
18239 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL
18240 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT)
18241 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
18242 output_pic_addr_const (file, disp, 0);
18243 else if (flag_pic)
18244 output_pic_addr_const (file, disp, 0);
18245 else
18246 output_addr_const (file, disp);
18247 }
18248 else
18249 {
18250 /* Print SImode register names to force addr32 prefix. */
18251 if (SImode_address_operand (addr, VOIDmode))
18252 {
18253 if (flag_checking)
18254 {
18255 gcc_assert (TARGET_64BIT);
18256 switch (GET_CODE (addr))
18257 {
18258 case SUBREG:
18259 gcc_assert (GET_MODE (addr) == SImode);
18260 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
18261 break;
18262 case ZERO_EXTEND:
18263 case AND:
18264 gcc_assert (GET_MODE (addr) == DImode);
18265 break;
18266 default:
18267 gcc_unreachable ();
18268 }
18269 }
18270 gcc_assert (!code);
18271 code = 'k';
18272 }
18273 else if (code == 0
18274 && TARGET_X32
18275 && disp
18276 && CONST_INT_P (disp)
18277 && INTVAL (disp) < -16*1024*1024)
18278 {
18279 /* X32 runs in 64-bit mode, where displacement, DISP, in
18280 address DISP(%r64), is encoded as 32-bit immediate sign-
18281 extended from 32-bit to 64-bit. For -0x40000300(%r64),
18282 address is %r64 + 0xffffffffbffffd00. When %r64 <
18283 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
18284 which is invalid for x32. The correct address is %r64
18285 - 0x40000300 == 0xf7ffdd64. To properly encode
18286 -0x40000300(%r64) for x32, we zero-extend negative
18287 displacement by forcing addr32 prefix which truncates
18288 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
18289 zero-extend all negative displacements, including -1(%rsp).
18290 However, for small negative displacements, sign-extension
18291 won't cause overflow. We only zero-extend negative
18292 displacements if they < -16*1024*1024, which is also used
18293 to check legitimate address displacements for PIC. */
18294 code = 'k';
18295 }
18296
18297 /* Since the upper 32 bits of RSP are always zero for x32,
18298 we can encode %esp as %rsp to avoid 0x67 prefix if
18299 there is no index register. */
18300 if (TARGET_X32 && Pmode == SImode
18301 && !index && base && REG_P (base) && REGNO (base) == SP_REG)
18302 code = 'q';
18303
18304 if (ASSEMBLER_DIALECT == ASM_ATT)
18305 {
18306 if (disp)
18307 {
18308 if (flag_pic)
18309 output_pic_addr_const (file, disp, 0);
18310 else if (GET_CODE (disp) == LABEL_REF)
18311 output_asm_label (disp);
18312 else
18313 output_addr_const (file, disp);
18314 }
18315
18316 putc ('(', file);
18317 if (base)
18318 print_reg (base, code, file);
18319 if (index)
18320 {
18321 putc (',', file);
18322 print_reg (index, vsib ? 0 : code, file);
18323 if (scale != 1 || vsib)
18324 fprintf (file, ",%d", scale);
18325 }
18326 putc (')', file);
18327 }
18328 else
18329 {
18330 rtx offset = NULL_RTX;
18331
18332 if (disp)
18333 {
18334 /* Pull out the offset of a symbol; print any symbol itself. */
18335 if (GET_CODE (disp) == CONST
18336 && GET_CODE (XEXP (disp, 0)) == PLUS
18337 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
18338 {
18339 offset = XEXP (XEXP (disp, 0), 1);
18340 disp = gen_rtx_CONST (VOIDmode,
18341 XEXP (XEXP (disp, 0), 0));
18342 }
18343
18344 if (flag_pic)
18345 output_pic_addr_const (file, disp, 0);
18346 else if (GET_CODE (disp) == LABEL_REF)
18347 output_asm_label (disp);
18348 else if (CONST_INT_P (disp))
18349 offset = disp;
18350 else
18351 output_addr_const (file, disp);
18352 }
18353
18354 putc ('[', file);
18355 if (base)
18356 {
18357 print_reg (base, code, file);
18358 if (offset)
18359 {
18360 if (INTVAL (offset) >= 0)
18361 putc ('+', file);
18362 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
18363 }
18364 }
18365 else if (offset)
18366 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
18367 else
18368 putc ('0', file);
18369
18370 if (index)
18371 {
18372 putc ('+', file);
18373 print_reg (index, vsib ? 0 : code, file);
18374 if (scale != 1 || vsib)
18375 fprintf (file, "*%d", scale);
18376 }
18377 putc (']', file);
18378 }
18379 }
18380 }
18381
18382 static void
18383 ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
18384 {
18385 ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, false);
18386 }
18387
18388 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
18389
18390 static bool
18391 i386_asm_output_addr_const_extra (FILE *file, rtx x)
18392 {
18393 rtx op;
18394
18395 if (GET_CODE (x) != UNSPEC)
18396 return false;
18397
18398 op = XVECEXP (x, 0, 0);
18399 switch (XINT (x, 1))
18400 {
18401 case UNSPEC_GOTOFF:
18402 output_addr_const (file, op);
18403 fputs ("@gotoff", file);
18404 break;
18405 case UNSPEC_GOTTPOFF:
18406 output_addr_const (file, op);
18407 /* FIXME: This might be @TPOFF in Sun ld. */
18408 fputs ("@gottpoff", file);
18409 break;
18410 case UNSPEC_TPOFF:
18411 output_addr_const (file, op);
18412 fputs ("@tpoff", file);
18413 break;
18414 case UNSPEC_NTPOFF:
18415 output_addr_const (file, op);
18416 if (TARGET_64BIT)
18417 fputs ("@tpoff", file);
18418 else
18419 fputs ("@ntpoff", file);
18420 break;
18421 case UNSPEC_DTPOFF:
18422 output_addr_const (file, op);
18423 fputs ("@dtpoff", file);
18424 break;
18425 case UNSPEC_GOTNTPOFF:
18426 output_addr_const (file, op);
18427 if (TARGET_64BIT)
18428 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
18429 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
18430 else
18431 fputs ("@gotntpoff", file);
18432 break;
18433 case UNSPEC_INDNTPOFF:
18434 output_addr_const (file, op);
18435 fputs ("@indntpoff", file);
18436 break;
18437 #if TARGET_MACHO
18438 case UNSPEC_MACHOPIC_OFFSET:
18439 output_addr_const (file, op);
18440 putc ('-', file);
18441 machopic_output_function_base_name (file);
18442 break;
18443 #endif
18444
18445 default:
18446 return false;
18447 }
18448
18449 return true;
18450 }
18451 \f
18452 /* Split one or more double-mode RTL references into pairs of half-mode
18453 references. The RTL can be REG, offsettable MEM, integer constant, or
18454 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
18455 split and "num" is its length. lo_half and hi_half are output arrays
18456 that parallel "operands". */
18457
18458 void
18459 split_double_mode (machine_mode mode, rtx operands[],
18460 int num, rtx lo_half[], rtx hi_half[])
18461 {
18462 machine_mode half_mode;
18463 unsigned int byte;
18464
18465 switch (mode)
18466 {
18467 case E_TImode:
18468 half_mode = DImode;
18469 break;
18470 case E_DImode:
18471 half_mode = SImode;
18472 break;
18473 default:
18474 gcc_unreachable ();
18475 }
18476
18477 byte = GET_MODE_SIZE (half_mode);
18478
18479 while (num--)
18480 {
18481 rtx op = operands[num];
18482
18483 /* simplify_subreg refuse to split volatile memory addresses,
18484 but we still have to handle it. */
18485 if (MEM_P (op))
18486 {
18487 lo_half[num] = adjust_address (op, half_mode, 0);
18488 hi_half[num] = adjust_address (op, half_mode, byte);
18489 }
18490 else
18491 {
18492 lo_half[num] = simplify_gen_subreg (half_mode, op,
18493 GET_MODE (op) == VOIDmode
18494 ? mode : GET_MODE (op), 0);
18495 hi_half[num] = simplify_gen_subreg (half_mode, op,
18496 GET_MODE (op) == VOIDmode
18497 ? mode : GET_MODE (op), byte);
18498 }
18499 }
18500 }
18501 \f
18502 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
18503 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
18504 is the expression of the binary operation. The output may either be
18505 emitted here, or returned to the caller, like all output_* functions.
18506
18507 There is no guarantee that the operands are the same mode, as they
18508 might be within FLOAT or FLOAT_EXTEND expressions. */
18509
18510 #ifndef SYSV386_COMPAT
18511 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
18512 wants to fix the assemblers because that causes incompatibility
18513 with gcc. No-one wants to fix gcc because that causes
18514 incompatibility with assemblers... You can use the option of
18515 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
18516 #define SYSV386_COMPAT 1
18517 #endif
18518
18519 const char *
18520 output_387_binary_op (rtx_insn *insn, rtx *operands)
18521 {
18522 static char buf[40];
18523 const char *p;
18524 bool is_sse
18525 = (SSE_REG_P (operands[0])
18526 || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]));
18527
18528 if (is_sse)
18529 p = "%v";
18530 else if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
18531 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
18532 p = "fi";
18533 else
18534 p = "f";
18535
18536 strcpy (buf, p);
18537
18538 switch (GET_CODE (operands[3]))
18539 {
18540 case PLUS:
18541 p = "add"; break;
18542 case MINUS:
18543 p = "sub"; break;
18544 case MULT:
18545 p = "mul"; break;
18546 case DIV:
18547 p = "div"; break;
18548 default:
18549 gcc_unreachable ();
18550 }
18551
18552 strcat (buf, p);
18553
18554 if (is_sse)
18555 {
18556 p = (GET_MODE (operands[0]) == SFmode) ? "ss" : "sd";
18557 strcat (buf, p);
18558
18559 if (TARGET_AVX)
18560 p = "\t{%2, %1, %0|%0, %1, %2}";
18561 else
18562 p = "\t{%2, %0|%0, %2}";
18563
18564 strcat (buf, p);
18565 return buf;
18566 }
18567
18568 /* Even if we do not want to check the inputs, this documents input
18569 constraints. Which helps in understanding the following code. */
18570 if (flag_checking)
18571 {
18572 if (STACK_REG_P (operands[0])
18573 && ((REG_P (operands[1])
18574 && REGNO (operands[0]) == REGNO (operands[1])
18575 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
18576 || (REG_P (operands[2])
18577 && REGNO (operands[0]) == REGNO (operands[2])
18578 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
18579 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
18580 ; /* ok */
18581 else
18582 gcc_unreachable ();
18583 }
18584
18585 switch (GET_CODE (operands[3]))
18586 {
18587 case MULT:
18588 case PLUS:
18589 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
18590 std::swap (operands[1], operands[2]);
18591
18592 /* know operands[0] == operands[1]. */
18593
18594 if (MEM_P (operands[2]))
18595 {
18596 p = "%Z2\t%2";
18597 break;
18598 }
18599
18600 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
18601 {
18602 if (STACK_TOP_P (operands[0]))
18603 /* How is it that we are storing to a dead operand[2]?
18604 Well, presumably operands[1] is dead too. We can't
18605 store the result to st(0) as st(0) gets popped on this
18606 instruction. Instead store to operands[2] (which I
18607 think has to be st(1)). st(1) will be popped later.
18608 gcc <= 2.8.1 didn't have this check and generated
18609 assembly code that the Unixware assembler rejected. */
18610 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
18611 else
18612 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
18613 break;
18614 }
18615
18616 if (STACK_TOP_P (operands[0]))
18617 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
18618 else
18619 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
18620 break;
18621
18622 case MINUS:
18623 case DIV:
18624 if (MEM_P (operands[1]))
18625 {
18626 p = "r%Z1\t%1";
18627 break;
18628 }
18629
18630 if (MEM_P (operands[2]))
18631 {
18632 p = "%Z2\t%2";
18633 break;
18634 }
18635
18636 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
18637 {
18638 #if SYSV386_COMPAT
18639 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
18640 derived assemblers, confusingly reverse the direction of
18641 the operation for fsub{r} and fdiv{r} when the
18642 destination register is not st(0). The Intel assembler
18643 doesn't have this brain damage. Read !SYSV386_COMPAT to
18644 figure out what the hardware really does. */
18645 if (STACK_TOP_P (operands[0]))
18646 p = "{p\t%0, %2|rp\t%2, %0}";
18647 else
18648 p = "{rp\t%2, %0|p\t%0, %2}";
18649 #else
18650 if (STACK_TOP_P (operands[0]))
18651 /* As above for fmul/fadd, we can't store to st(0). */
18652 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
18653 else
18654 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
18655 #endif
18656 break;
18657 }
18658
18659 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
18660 {
18661 #if SYSV386_COMPAT
18662 if (STACK_TOP_P (operands[0]))
18663 p = "{rp\t%0, %1|p\t%1, %0}";
18664 else
18665 p = "{p\t%1, %0|rp\t%0, %1}";
18666 #else
18667 if (STACK_TOP_P (operands[0]))
18668 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
18669 else
18670 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
18671 #endif
18672 break;
18673 }
18674
18675 if (STACK_TOP_P (operands[0]))
18676 {
18677 if (STACK_TOP_P (operands[1]))
18678 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
18679 else
18680 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
18681 break;
18682 }
18683 else if (STACK_TOP_P (operands[1]))
18684 {
18685 #if SYSV386_COMPAT
18686 p = "{\t%1, %0|r\t%0, %1}";
18687 #else
18688 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
18689 #endif
18690 }
18691 else
18692 {
18693 #if SYSV386_COMPAT
18694 p = "{r\t%2, %0|\t%0, %2}";
18695 #else
18696 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
18697 #endif
18698 }
18699 break;
18700
18701 default:
18702 gcc_unreachable ();
18703 }
18704
18705 strcat (buf, p);
18706 return buf;
18707 }
18708
18709 /* Return needed mode for entity in optimize_mode_switching pass. */
18710
18711 static int
18712 ix86_dirflag_mode_needed (rtx_insn *insn)
18713 {
18714 if (CALL_P (insn))
18715 {
18716 if (cfun->machine->func_type == TYPE_NORMAL)
18717 return X86_DIRFLAG_ANY;
18718 else
18719 /* No need to emit CLD in interrupt handler for TARGET_CLD. */
18720 return TARGET_CLD ? X86_DIRFLAG_ANY : X86_DIRFLAG_RESET;
18721 }
18722
18723 if (recog_memoized (insn) < 0)
18724 return X86_DIRFLAG_ANY;
18725
18726 if (get_attr_type (insn) == TYPE_STR)
18727 {
18728 /* Emit cld instruction if stringops are used in the function. */
18729 if (cfun->machine->func_type == TYPE_NORMAL)
18730 return TARGET_CLD ? X86_DIRFLAG_RESET : X86_DIRFLAG_ANY;
18731 else
18732 return X86_DIRFLAG_RESET;
18733 }
18734
18735 return X86_DIRFLAG_ANY;
18736 }
18737
18738 /* Check if a 256bit or 512 bit AVX register is referenced inside of EXP. */
18739
18740 static bool
18741 ix86_check_avx_upper_register (const_rtx exp)
18742 {
18743 if (SUBREG_P (exp))
18744 exp = SUBREG_REG (exp);
18745
18746 return (REG_P (exp)
18747 && (VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp))
18748 || VALID_AVX512F_REG_OR_XI_MODE (GET_MODE (exp))));
18749 }
18750
18751 /* Return needed mode for entity in optimize_mode_switching pass. */
18752
18753 static int
18754 ix86_avx_u128_mode_needed (rtx_insn *insn)
18755 {
18756 if (CALL_P (insn))
18757 {
18758 rtx link;
18759
18760 /* Needed mode is set to AVX_U128_CLEAN if there are
18761 no 256bit or 512bit modes used in function arguments. */
18762 for (link = CALL_INSN_FUNCTION_USAGE (insn);
18763 link;
18764 link = XEXP (link, 1))
18765 {
18766 if (GET_CODE (XEXP (link, 0)) == USE)
18767 {
18768 rtx arg = XEXP (XEXP (link, 0), 0);
18769
18770 if (ix86_check_avx_upper_register (arg))
18771 return AVX_U128_DIRTY;
18772 }
18773 }
18774
18775 return AVX_U128_CLEAN;
18776 }
18777
18778 /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced.
18779 Hardware changes state only when a 256bit register is written to,
18780 but we need to prevent the compiler from moving optimal insertion
18781 point above eventual read from 256bit or 512 bit register. */
18782 subrtx_iterator::array_type array;
18783 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
18784 if (ix86_check_avx_upper_register (*iter))
18785 return AVX_U128_DIRTY;
18786
18787 return AVX_U128_ANY;
18788 }
18789
18790 /* Return mode that i387 must be switched into
18791 prior to the execution of insn. */
18792
18793 static int
18794 ix86_i387_mode_needed (int entity, rtx_insn *insn)
18795 {
18796 enum attr_i387_cw mode;
18797
18798 /* The mode UNINITIALIZED is used to store control word after a
18799 function call or ASM pattern. The mode ANY specify that function
18800 has no requirements on the control word and make no changes in the
18801 bits we are interested in. */
18802
18803 if (CALL_P (insn)
18804 || (NONJUMP_INSN_P (insn)
18805 && (asm_noperands (PATTERN (insn)) >= 0
18806 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
18807 return I387_CW_UNINITIALIZED;
18808
18809 if (recog_memoized (insn) < 0)
18810 return I387_CW_ANY;
18811
18812 mode = get_attr_i387_cw (insn);
18813
18814 switch (entity)
18815 {
18816 case I387_TRUNC:
18817 if (mode == I387_CW_TRUNC)
18818 return mode;
18819 break;
18820
18821 case I387_FLOOR:
18822 if (mode == I387_CW_FLOOR)
18823 return mode;
18824 break;
18825
18826 case I387_CEIL:
18827 if (mode == I387_CW_CEIL)
18828 return mode;
18829 break;
18830
18831 case I387_MASK_PM:
18832 if (mode == I387_CW_MASK_PM)
18833 return mode;
18834 break;
18835
18836 default:
18837 gcc_unreachable ();
18838 }
18839
18840 return I387_CW_ANY;
18841 }
18842
18843 /* Return mode that entity must be switched into
18844 prior to the execution of insn. */
18845
18846 static int
18847 ix86_mode_needed (int entity, rtx_insn *insn)
18848 {
18849 switch (entity)
18850 {
18851 case X86_DIRFLAG:
18852 return ix86_dirflag_mode_needed (insn);
18853 case AVX_U128:
18854 return ix86_avx_u128_mode_needed (insn);
18855 case I387_TRUNC:
18856 case I387_FLOOR:
18857 case I387_CEIL:
18858 case I387_MASK_PM:
18859 return ix86_i387_mode_needed (entity, insn);
18860 default:
18861 gcc_unreachable ();
18862 }
18863 return 0;
18864 }
18865
18866 /* Check if a 256bit or 512bit AVX register is referenced in stores. */
18867
18868 static void
18869 ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data)
18870 {
18871 if (ix86_check_avx_upper_register (dest))
18872 {
18873 bool *used = (bool *) data;
18874 *used = true;
18875 }
18876 }
18877
18878 /* Calculate mode of upper 128bit AVX registers after the insn. */
18879
18880 static int
18881 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
18882 {
18883 rtx pat = PATTERN (insn);
18884
18885 if (vzeroupper_operation (pat, VOIDmode)
18886 || vzeroall_operation (pat, VOIDmode))
18887 return AVX_U128_CLEAN;
18888
18889 /* We know that state is clean after CALL insn if there are no
18890 256bit or 512bit registers used in the function return register. */
18891 if (CALL_P (insn))
18892 {
18893 bool avx_upper_reg_found = false;
18894 note_stores (pat, ix86_check_avx_upper_stores, &avx_upper_reg_found);
18895
18896 return avx_upper_reg_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
18897 }
18898
18899 /* Otherwise, return current mode. Remember that if insn
18900 references AVX 256bit or 512bit registers, the mode was already
18901 changed to DIRTY from MODE_NEEDED. */
18902 return mode;
18903 }
18904
18905 /* Return the mode that an insn results in. */
18906
18907 static int
18908 ix86_mode_after (int entity, int mode, rtx_insn *insn)
18909 {
18910 switch (entity)
18911 {
18912 case X86_DIRFLAG:
18913 return mode;
18914 case AVX_U128:
18915 return ix86_avx_u128_mode_after (mode, insn);
18916 case I387_TRUNC:
18917 case I387_FLOOR:
18918 case I387_CEIL:
18919 case I387_MASK_PM:
18920 return mode;
18921 default:
18922 gcc_unreachable ();
18923 }
18924 }
18925
18926 static int
18927 ix86_dirflag_mode_entry (void)
18928 {
18929 /* For TARGET_CLD or in the interrupt handler we can't assume
18930 direction flag state at function entry. */
18931 if (TARGET_CLD
18932 || cfun->machine->func_type != TYPE_NORMAL)
18933 return X86_DIRFLAG_ANY;
18934
18935 return X86_DIRFLAG_RESET;
18936 }
18937
18938 static int
18939 ix86_avx_u128_mode_entry (void)
18940 {
18941 tree arg;
18942
18943 /* Entry mode is set to AVX_U128_DIRTY if there are
18944 256bit or 512bit modes used in function arguments. */
18945 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
18946 arg = TREE_CHAIN (arg))
18947 {
18948 rtx incoming = DECL_INCOMING_RTL (arg);
18949
18950 if (incoming && ix86_check_avx_upper_register (incoming))
18951 return AVX_U128_DIRTY;
18952 }
18953
18954 return AVX_U128_CLEAN;
18955 }
18956
18957 /* Return a mode that ENTITY is assumed to be
18958 switched to at function entry. */
18959
18960 static int
18961 ix86_mode_entry (int entity)
18962 {
18963 switch (entity)
18964 {
18965 case X86_DIRFLAG:
18966 return ix86_dirflag_mode_entry ();
18967 case AVX_U128:
18968 return ix86_avx_u128_mode_entry ();
18969 case I387_TRUNC:
18970 case I387_FLOOR:
18971 case I387_CEIL:
18972 case I387_MASK_PM:
18973 return I387_CW_ANY;
18974 default:
18975 gcc_unreachable ();
18976 }
18977 }
18978
18979 static int
18980 ix86_avx_u128_mode_exit (void)
18981 {
18982 rtx reg = crtl->return_rtx;
18983
18984 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit
18985 or 512 bit modes used in the function return register. */
18986 if (reg && ix86_check_avx_upper_register (reg))
18987 return AVX_U128_DIRTY;
18988
18989 return AVX_U128_CLEAN;
18990 }
18991
18992 /* Return a mode that ENTITY is assumed to be
18993 switched to at function exit. */
18994
18995 static int
18996 ix86_mode_exit (int entity)
18997 {
18998 switch (entity)
18999 {
19000 case X86_DIRFLAG:
19001 return X86_DIRFLAG_ANY;
19002 case AVX_U128:
19003 return ix86_avx_u128_mode_exit ();
19004 case I387_TRUNC:
19005 case I387_FLOOR:
19006 case I387_CEIL:
19007 case I387_MASK_PM:
19008 return I387_CW_ANY;
19009 default:
19010 gcc_unreachable ();
19011 }
19012 }
19013
19014 static int
19015 ix86_mode_priority (int, int n)
19016 {
19017 return n;
19018 }
19019
19020 /* Output code to initialize control word copies used by trunc?f?i and
19021 rounding patterns. CURRENT_MODE is set to current control word,
19022 while NEW_MODE is set to new control word. */
19023
19024 static void
19025 emit_i387_cw_initialization (int mode)
19026 {
19027 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
19028 rtx new_mode;
19029
19030 enum ix86_stack_slot slot;
19031
19032 rtx reg = gen_reg_rtx (HImode);
19033
19034 emit_insn (gen_x86_fnstcw_1 (stored_mode));
19035 emit_move_insn (reg, copy_rtx (stored_mode));
19036
19037 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
19038 || optimize_insn_for_size_p ())
19039 {
19040 switch (mode)
19041 {
19042 case I387_CW_TRUNC:
19043 /* round toward zero (truncate) */
19044 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
19045 slot = SLOT_CW_TRUNC;
19046 break;
19047
19048 case I387_CW_FLOOR:
19049 /* round down toward -oo */
19050 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
19051 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
19052 slot = SLOT_CW_FLOOR;
19053 break;
19054
19055 case I387_CW_CEIL:
19056 /* round up toward +oo */
19057 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
19058 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
19059 slot = SLOT_CW_CEIL;
19060 break;
19061
19062 case I387_CW_MASK_PM:
19063 /* mask precision exception for nearbyint() */
19064 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
19065 slot = SLOT_CW_MASK_PM;
19066 break;
19067
19068 default:
19069 gcc_unreachable ();
19070 }
19071 }
19072 else
19073 {
19074 switch (mode)
19075 {
19076 case I387_CW_TRUNC:
19077 /* round toward zero (truncate) */
19078 emit_insn (gen_insvsi_1 (reg, GEN_INT (0xc)));
19079 slot = SLOT_CW_TRUNC;
19080 break;
19081
19082 case I387_CW_FLOOR:
19083 /* round down toward -oo */
19084 emit_insn (gen_insvsi_1 (reg, GEN_INT (0x4)));
19085 slot = SLOT_CW_FLOOR;
19086 break;
19087
19088 case I387_CW_CEIL:
19089 /* round up toward +oo */
19090 emit_insn (gen_insvsi_1 (reg, GEN_INT (0x8)));
19091 slot = SLOT_CW_CEIL;
19092 break;
19093
19094 case I387_CW_MASK_PM:
19095 /* mask precision exception for nearbyint() */
19096 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
19097 slot = SLOT_CW_MASK_PM;
19098 break;
19099
19100 default:
19101 gcc_unreachable ();
19102 }
19103 }
19104
19105 gcc_assert (slot < MAX_386_STACK_LOCALS);
19106
19107 new_mode = assign_386_stack_local (HImode, slot);
19108 emit_move_insn (new_mode, reg);
19109 }
19110
19111 /* Emit vzeroupper. */
19112
19113 void
19114 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
19115 {
19116 int i;
19117
19118 /* Cancel automatic vzeroupper insertion if there are
19119 live call-saved SSE registers at the insertion point. */
19120
19121 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
19122 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
19123 return;
19124
19125 if (TARGET_64BIT)
19126 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
19127 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
19128 return;
19129
19130 emit_insn (gen_avx_vzeroupper ());
19131 }
19132
19133 /* Generate one or more insns to set ENTITY to MODE. */
19134
19135 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
19136 is the set of hard registers live at the point where the insn(s)
19137 are to be inserted. */
19138
19139 static void
19140 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
19141 HARD_REG_SET regs_live)
19142 {
19143 switch (entity)
19144 {
19145 case X86_DIRFLAG:
19146 if (mode == X86_DIRFLAG_RESET)
19147 emit_insn (gen_cld ());
19148 break;
19149 case AVX_U128:
19150 if (mode == AVX_U128_CLEAN)
19151 ix86_avx_emit_vzeroupper (regs_live);
19152 break;
19153 case I387_TRUNC:
19154 case I387_FLOOR:
19155 case I387_CEIL:
19156 case I387_MASK_PM:
19157 if (mode != I387_CW_ANY
19158 && mode != I387_CW_UNINITIALIZED)
19159 emit_i387_cw_initialization (mode);
19160 break;
19161 default:
19162 gcc_unreachable ();
19163 }
19164 }
19165
19166 /* Output code for INSN to convert a float to a signed int. OPERANDS
19167 are the insn operands. The output may be [HSD]Imode and the input
19168 operand may be [SDX]Fmode. */
19169
19170 const char *
19171 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
19172 {
19173 bool stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
19174 bool dimode_p = GET_MODE (operands[0]) == DImode;
19175 int round_mode = get_attr_i387_cw (insn);
19176
19177 static char buf[40];
19178 const char *p;
19179
19180 /* Jump through a hoop or two for DImode, since the hardware has no
19181 non-popping instruction. We used to do this a different way, but
19182 that was somewhat fragile and broke with post-reload splitters. */
19183 if ((dimode_p || fisttp) && !stack_top_dies)
19184 output_asm_insn ("fld\t%y1", operands);
19185
19186 gcc_assert (STACK_TOP_P (operands[1]));
19187 gcc_assert (MEM_P (operands[0]));
19188 gcc_assert (GET_MODE (operands[1]) != TFmode);
19189
19190 if (fisttp)
19191 return "fisttp%Z0\t%0";
19192
19193 strcpy (buf, "fist");
19194
19195 if (round_mode != I387_CW_ANY)
19196 output_asm_insn ("fldcw\t%3", operands);
19197
19198 p = "p%Z0\t%0";
19199 strcat (buf, p + !(stack_top_dies || dimode_p));
19200
19201 output_asm_insn (buf, operands);
19202
19203 if (round_mode != I387_CW_ANY)
19204 output_asm_insn ("fldcw\t%2", operands);
19205
19206 return "";
19207 }
19208
19209 /* Output code for x87 ffreep insn. The OPNO argument, which may only
19210 have the values zero or one, indicates the ffreep insn's operand
19211 from the OPERANDS array. */
19212
19213 static const char *
19214 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
19215 {
19216 if (TARGET_USE_FFREEP)
19217 #ifdef HAVE_AS_IX86_FFREEP
19218 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
19219 #else
19220 {
19221 static char retval[32];
19222 int regno = REGNO (operands[opno]);
19223
19224 gcc_assert (STACK_REGNO_P (regno));
19225
19226 regno -= FIRST_STACK_REG;
19227
19228 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
19229 return retval;
19230 }
19231 #endif
19232
19233 return opno ? "fstp\t%y1" : "fstp\t%y0";
19234 }
19235
19236
19237 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
19238 should be used. UNORDERED_P is true when fucom should be used. */
19239
19240 const char *
19241 output_fp_compare (rtx_insn *insn, rtx *operands,
19242 bool eflags_p, bool unordered_p)
19243 {
19244 rtx *xops = eflags_p ? &operands[0] : &operands[1];
19245 bool stack_top_dies;
19246
19247 static char buf[40];
19248 const char *p;
19249
19250 gcc_assert (STACK_TOP_P (xops[0]));
19251
19252 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
19253
19254 if (eflags_p)
19255 {
19256 p = unordered_p ? "fucomi" : "fcomi";
19257 strcpy (buf, p);
19258
19259 p = "p\t{%y1, %0|%0, %y1}";
19260 strcat (buf, p + !stack_top_dies);
19261
19262 return buf;
19263 }
19264
19265 if (STACK_REG_P (xops[1])
19266 && stack_top_dies
19267 && find_regno_note (insn, REG_DEAD, FIRST_STACK_REG + 1))
19268 {
19269 gcc_assert (REGNO (xops[1]) == FIRST_STACK_REG + 1);
19270
19271 /* If both the top of the 387 stack die, and the other operand
19272 is also a stack register that dies, then this must be a
19273 `fcompp' float compare. */
19274 p = unordered_p ? "fucompp" : "fcompp";
19275 strcpy (buf, p);
19276 }
19277 else if (const0_operand (xops[1], VOIDmode))
19278 {
19279 gcc_assert (!unordered_p);
19280 strcpy (buf, "ftst");
19281 }
19282 else
19283 {
19284 if (GET_MODE_CLASS (GET_MODE (xops[1])) == MODE_INT)
19285 {
19286 gcc_assert (!unordered_p);
19287 p = "ficom";
19288 }
19289 else
19290 p = unordered_p ? "fucom" : "fcom";
19291
19292 strcpy (buf, p);
19293
19294 p = "p%Z2\t%y2";
19295 strcat (buf, p + !stack_top_dies);
19296 }
19297
19298 output_asm_insn (buf, operands);
19299 return "fnstsw\t%0";
19300 }
19301
19302 void
19303 ix86_output_addr_vec_elt (FILE *file, int value)
19304 {
19305 const char *directive = ASM_LONG;
19306
19307 #ifdef ASM_QUAD
19308 if (TARGET_LP64)
19309 directive = ASM_QUAD;
19310 #else
19311 gcc_assert (!TARGET_64BIT);
19312 #endif
19313
19314 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
19315 }
19316
19317 void
19318 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
19319 {
19320 const char *directive = ASM_LONG;
19321
19322 #ifdef ASM_QUAD
19323 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
19324 directive = ASM_QUAD;
19325 #else
19326 gcc_assert (!TARGET_64BIT);
19327 #endif
19328 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
19329 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
19330 fprintf (file, "%s%s%d-%s%d\n",
19331 directive, LPREFIX, value, LPREFIX, rel);
19332 else if (HAVE_AS_GOTOFF_IN_DATA)
19333 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
19334 #if TARGET_MACHO
19335 else if (TARGET_MACHO)
19336 {
19337 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
19338 machopic_output_function_base_name (file);
19339 putc ('\n', file);
19340 }
19341 #endif
19342 else
19343 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
19344 GOT_SYMBOL_NAME, LPREFIX, value);
19345 }
19346 \f
19347 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
19348 for the target. */
19349
19350 void
19351 ix86_expand_clear (rtx dest)
19352 {
19353 rtx tmp;
19354
19355 /* We play register width games, which are only valid after reload. */
19356 gcc_assert (reload_completed);
19357
19358 /* Avoid HImode and its attendant prefix byte. */
19359 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
19360 dest = gen_rtx_REG (SImode, REGNO (dest));
19361 tmp = gen_rtx_SET (dest, const0_rtx);
19362
19363 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
19364 {
19365 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19366 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
19367 }
19368
19369 emit_insn (tmp);
19370 }
19371
19372 void
19373 ix86_expand_move (machine_mode mode, rtx operands[])
19374 {
19375 rtx op0, op1;
19376 rtx tmp, addend = NULL_RTX;
19377 enum tls_model model;
19378
19379 op0 = operands[0];
19380 op1 = operands[1];
19381
19382 switch (GET_CODE (op1))
19383 {
19384 case CONST:
19385 tmp = XEXP (op1, 0);
19386
19387 if (GET_CODE (tmp) != PLUS
19388 || GET_CODE (XEXP (tmp, 0)) != SYMBOL_REF)
19389 break;
19390
19391 op1 = XEXP (tmp, 0);
19392 addend = XEXP (tmp, 1);
19393 /* FALLTHRU */
19394
19395 case SYMBOL_REF:
19396 model = SYMBOL_REF_TLS_MODEL (op1);
19397
19398 if (model)
19399 op1 = legitimize_tls_address (op1, model, true);
19400 else if (ix86_force_load_from_GOT_p (op1))
19401 {
19402 /* Load the external function address via GOT slot to avoid PLT. */
19403 op1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op1),
19404 (TARGET_64BIT
19405 ? UNSPEC_GOTPCREL
19406 : UNSPEC_GOT));
19407 op1 = gen_rtx_CONST (Pmode, op1);
19408 op1 = gen_const_mem (Pmode, op1);
19409 set_mem_alias_set (op1, ix86_GOT_alias_set ());
19410 }
19411 else
19412 {
19413 tmp = legitimize_pe_coff_symbol (op1, addend != NULL_RTX);
19414 if (tmp)
19415 {
19416 op1 = tmp;
19417 if (!addend)
19418 break;
19419 }
19420 else
19421 {
19422 op1 = operands[1];
19423 break;
19424 }
19425 }
19426
19427 if (addend)
19428 {
19429 op1 = force_operand (op1, NULL_RTX);
19430 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
19431 op0, 1, OPTAB_DIRECT);
19432 }
19433 else
19434 op1 = force_operand (op1, op0);
19435
19436 if (op1 == op0)
19437 return;
19438
19439 op1 = convert_to_mode (mode, op1, 1);
19440
19441 default:
19442 break;
19443 }
19444
19445 if ((flag_pic || MACHOPIC_INDIRECT)
19446 && symbolic_operand (op1, mode))
19447 {
19448 if (TARGET_MACHO && !TARGET_64BIT)
19449 {
19450 #if TARGET_MACHO
19451 /* dynamic-no-pic */
19452 if (MACHOPIC_INDIRECT)
19453 {
19454 rtx temp = (op0 && REG_P (op0) && mode == Pmode)
19455 ? op0 : gen_reg_rtx (Pmode);
19456 op1 = machopic_indirect_data_reference (op1, temp);
19457 if (MACHOPIC_PURE)
19458 op1 = machopic_legitimize_pic_address (op1, mode,
19459 temp == op1 ? 0 : temp);
19460 }
19461 if (op0 != op1 && GET_CODE (op0) != MEM)
19462 {
19463 rtx insn = gen_rtx_SET (op0, op1);
19464 emit_insn (insn);
19465 return;
19466 }
19467 if (GET_CODE (op0) == MEM)
19468 op1 = force_reg (Pmode, op1);
19469 else
19470 {
19471 rtx temp = op0;
19472 if (GET_CODE (temp) != REG)
19473 temp = gen_reg_rtx (Pmode);
19474 temp = legitimize_pic_address (op1, temp);
19475 if (temp == op0)
19476 return;
19477 op1 = temp;
19478 }
19479 /* dynamic-no-pic */
19480 #endif
19481 }
19482 else
19483 {
19484 if (MEM_P (op0))
19485 op1 = force_reg (mode, op1);
19486 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
19487 {
19488 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
19489 op1 = legitimize_pic_address (op1, reg);
19490 if (op0 == op1)
19491 return;
19492 op1 = convert_to_mode (mode, op1, 1);
19493 }
19494 }
19495 }
19496 else
19497 {
19498 if (MEM_P (op0)
19499 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
19500 || !push_operand (op0, mode))
19501 && MEM_P (op1))
19502 op1 = force_reg (mode, op1);
19503
19504 if (push_operand (op0, mode)
19505 && ! general_no_elim_operand (op1, mode))
19506 op1 = copy_to_mode_reg (mode, op1);
19507
19508 /* Force large constants in 64bit compilation into register
19509 to get them CSEed. */
19510 if (can_create_pseudo_p ()
19511 && (mode == DImode) && TARGET_64BIT
19512 && immediate_operand (op1, mode)
19513 && !x86_64_zext_immediate_operand (op1, VOIDmode)
19514 && !register_operand (op0, mode)
19515 && optimize)
19516 op1 = copy_to_mode_reg (mode, op1);
19517
19518 if (can_create_pseudo_p ()
19519 && CONST_DOUBLE_P (op1))
19520 {
19521 /* If we are loading a floating point constant to a register,
19522 force the value to memory now, since we'll get better code
19523 out the back end. */
19524
19525 op1 = validize_mem (force_const_mem (mode, op1));
19526 if (!register_operand (op0, mode))
19527 {
19528 rtx temp = gen_reg_rtx (mode);
19529 emit_insn (gen_rtx_SET (temp, op1));
19530 emit_move_insn (op0, temp);
19531 return;
19532 }
19533 }
19534 }
19535
19536 emit_insn (gen_rtx_SET (op0, op1));
19537 }
19538
19539 void
19540 ix86_expand_vector_move (machine_mode mode, rtx operands[])
19541 {
19542 rtx op0 = operands[0], op1 = operands[1];
19543 /* Use GET_MODE_BITSIZE instead of GET_MODE_ALIGNMENT for IA MCU
19544 psABI since the biggest alignment is 4 byte for IA MCU psABI. */
19545 unsigned int align = (TARGET_IAMCU
19546 ? GET_MODE_BITSIZE (mode)
19547 : GET_MODE_ALIGNMENT (mode));
19548
19549 if (push_operand (op0, VOIDmode))
19550 op0 = emit_move_resolve_push (mode, op0);
19551
19552 /* Force constants other than zero into memory. We do not know how
19553 the instructions used to build constants modify the upper 64 bits
19554 of the register, once we have that information we may be able
19555 to handle some of them more efficiently. */
19556 if (can_create_pseudo_p ()
19557 && (CONSTANT_P (op1)
19558 || (SUBREG_P (op1)
19559 && CONSTANT_P (SUBREG_REG (op1))))
19560 && ((register_operand (op0, mode)
19561 && !standard_sse_constant_p (op1, mode))
19562 /* ix86_expand_vector_move_misalign() does not like constants. */
19563 || (SSE_REG_MODE_P (mode)
19564 && MEM_P (op0)
19565 && MEM_ALIGN (op0) < align)))
19566 {
19567 if (SUBREG_P (op1))
19568 {
19569 machine_mode imode = GET_MODE (SUBREG_REG (op1));
19570 rtx r = force_const_mem (imode, SUBREG_REG (op1));
19571 if (r)
19572 r = validize_mem (r);
19573 else
19574 r = force_reg (imode, SUBREG_REG (op1));
19575 op1 = simplify_gen_subreg (mode, r, imode, SUBREG_BYTE (op1));
19576 }
19577 else
19578 op1 = validize_mem (force_const_mem (mode, op1));
19579 }
19580
19581 /* We need to check memory alignment for SSE mode since attribute
19582 can make operands unaligned. */
19583 if (can_create_pseudo_p ()
19584 && SSE_REG_MODE_P (mode)
19585 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
19586 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
19587 {
19588 rtx tmp[2];
19589
19590 /* ix86_expand_vector_move_misalign() does not like both
19591 arguments in memory. */
19592 if (!register_operand (op0, mode)
19593 && !register_operand (op1, mode))
19594 op1 = force_reg (mode, op1);
19595
19596 tmp[0] = op0; tmp[1] = op1;
19597 ix86_expand_vector_move_misalign (mode, tmp);
19598 return;
19599 }
19600
19601 /* Make operand1 a register if it isn't already. */
19602 if (can_create_pseudo_p ()
19603 && !register_operand (op0, mode)
19604 && !register_operand (op1, mode))
19605 {
19606 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
19607 return;
19608 }
19609
19610 emit_insn (gen_rtx_SET (op0, op1));
19611 }
19612
19613 /* Split 32-byte AVX unaligned load and store if needed. */
19614
19615 static void
19616 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
19617 {
19618 rtx m;
19619 rtx (*extract) (rtx, rtx, rtx);
19620 machine_mode mode;
19621
19622 if ((MEM_P (op1) && !TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
19623 || (MEM_P (op0) && !TARGET_AVX256_SPLIT_UNALIGNED_STORE))
19624 {
19625 emit_insn (gen_rtx_SET (op0, op1));
19626 return;
19627 }
19628
19629 rtx orig_op0 = NULL_RTX;
19630 mode = GET_MODE (op0);
19631 switch (GET_MODE_CLASS (mode))
19632 {
19633 case MODE_VECTOR_INT:
19634 case MODE_INT:
19635 if (mode != V32QImode)
19636 {
19637 if (!MEM_P (op0))
19638 {
19639 orig_op0 = op0;
19640 op0 = gen_reg_rtx (V32QImode);
19641 }
19642 else
19643 op0 = gen_lowpart (V32QImode, op0);
19644 op1 = gen_lowpart (V32QImode, op1);
19645 mode = V32QImode;
19646 }
19647 break;
19648 case MODE_VECTOR_FLOAT:
19649 break;
19650 default:
19651 gcc_unreachable ();
19652 }
19653
19654 switch (mode)
19655 {
19656 default:
19657 gcc_unreachable ();
19658 case E_V32QImode:
19659 extract = gen_avx_vextractf128v32qi;
19660 mode = V16QImode;
19661 break;
19662 case E_V8SFmode:
19663 extract = gen_avx_vextractf128v8sf;
19664 mode = V4SFmode;
19665 break;
19666 case E_V4DFmode:
19667 extract = gen_avx_vextractf128v4df;
19668 mode = V2DFmode;
19669 break;
19670 }
19671
19672 if (MEM_P (op1))
19673 {
19674 rtx r = gen_reg_rtx (mode);
19675 m = adjust_address (op1, mode, 0);
19676 emit_move_insn (r, m);
19677 m = adjust_address (op1, mode, 16);
19678 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
19679 emit_move_insn (op0, r);
19680 }
19681 else if (MEM_P (op0))
19682 {
19683 m = adjust_address (op0, mode, 0);
19684 emit_insn (extract (m, op1, const0_rtx));
19685 m = adjust_address (op0, mode, 16);
19686 emit_insn (extract (m, copy_rtx (op1), const1_rtx));
19687 }
19688 else
19689 gcc_unreachable ();
19690
19691 if (orig_op0)
19692 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
19693 }
19694
19695 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
19696 straight to ix86_expand_vector_move. */
19697 /* Code generation for scalar reg-reg moves of single and double precision data:
19698 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
19699 movaps reg, reg
19700 else
19701 movss reg, reg
19702 if (x86_sse_partial_reg_dependency == true)
19703 movapd reg, reg
19704 else
19705 movsd reg, reg
19706
19707 Code generation for scalar loads of double precision data:
19708 if (x86_sse_split_regs == true)
19709 movlpd mem, reg (gas syntax)
19710 else
19711 movsd mem, reg
19712
19713 Code generation for unaligned packed loads of single precision data
19714 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
19715 if (x86_sse_unaligned_move_optimal)
19716 movups mem, reg
19717
19718 if (x86_sse_partial_reg_dependency == true)
19719 {
19720 xorps reg, reg
19721 movlps mem, reg
19722 movhps mem+8, reg
19723 }
19724 else
19725 {
19726 movlps mem, reg
19727 movhps mem+8, reg
19728 }
19729
19730 Code generation for unaligned packed loads of double precision data
19731 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
19732 if (x86_sse_unaligned_move_optimal)
19733 movupd mem, reg
19734
19735 if (x86_sse_split_regs == true)
19736 {
19737 movlpd mem, reg
19738 movhpd mem+8, reg
19739 }
19740 else
19741 {
19742 movsd mem, reg
19743 movhpd mem+8, reg
19744 }
19745 */
19746
19747 void
19748 ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
19749 {
19750 rtx op0, op1, m;
19751
19752 op0 = operands[0];
19753 op1 = operands[1];
19754
19755 /* Use unaligned load/store for AVX512 or when optimizing for size. */
19756 if (GET_MODE_SIZE (mode) == 64 || optimize_insn_for_size_p ())
19757 {
19758 emit_insn (gen_rtx_SET (op0, op1));
19759 return;
19760 }
19761
19762 if (TARGET_AVX)
19763 {
19764 if (GET_MODE_SIZE (mode) == 32)
19765 ix86_avx256_split_vector_move_misalign (op0, op1);
19766 else
19767 /* Always use 128-bit mov<mode>_internal pattern for AVX. */
19768 emit_insn (gen_rtx_SET (op0, op1));
19769 return;
19770 }
19771
19772 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
19773 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
19774 {
19775 emit_insn (gen_rtx_SET (op0, op1));
19776 return;
19777 }
19778
19779 /* ??? If we have typed data, then it would appear that using
19780 movdqu is the only way to get unaligned data loaded with
19781 integer type. */
19782 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19783 {
19784 emit_insn (gen_rtx_SET (op0, op1));
19785 return;
19786 }
19787
19788 if (MEM_P (op1))
19789 {
19790 if (TARGET_SSE2 && mode == V2DFmode)
19791 {
19792 rtx zero;
19793
19794 /* When SSE registers are split into halves, we can avoid
19795 writing to the top half twice. */
19796 if (TARGET_SSE_SPLIT_REGS)
19797 {
19798 emit_clobber (op0);
19799 zero = op0;
19800 }
19801 else
19802 {
19803 /* ??? Not sure about the best option for the Intel chips.
19804 The following would seem to satisfy; the register is
19805 entirely cleared, breaking the dependency chain. We
19806 then store to the upper half, with a dependency depth
19807 of one. A rumor has it that Intel recommends two movsd
19808 followed by an unpacklpd, but this is unconfirmed. And
19809 given that the dependency depth of the unpacklpd would
19810 still be one, I'm not sure why this would be better. */
19811 zero = CONST0_RTX (V2DFmode);
19812 }
19813
19814 m = adjust_address (op1, DFmode, 0);
19815 emit_insn (gen_sse2_loadlpd (op0, zero, m));
19816 m = adjust_address (op1, DFmode, 8);
19817 emit_insn (gen_sse2_loadhpd (op0, op0, m));
19818 }
19819 else
19820 {
19821 rtx t;
19822
19823 if (mode != V4SFmode)
19824 t = gen_reg_rtx (V4SFmode);
19825 else
19826 t = op0;
19827
19828 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
19829 emit_move_insn (t, CONST0_RTX (V4SFmode));
19830 else
19831 emit_clobber (t);
19832
19833 m = adjust_address (op1, V2SFmode, 0);
19834 emit_insn (gen_sse_loadlps (t, t, m));
19835 m = adjust_address (op1, V2SFmode, 8);
19836 emit_insn (gen_sse_loadhps (t, t, m));
19837 if (mode != V4SFmode)
19838 emit_move_insn (op0, gen_lowpart (mode, t));
19839 }
19840 }
19841 else if (MEM_P (op0))
19842 {
19843 if (TARGET_SSE2 && mode == V2DFmode)
19844 {
19845 m = adjust_address (op0, DFmode, 0);
19846 emit_insn (gen_sse2_storelpd (m, op1));
19847 m = adjust_address (op0, DFmode, 8);
19848 emit_insn (gen_sse2_storehpd (m, op1));
19849 }
19850 else
19851 {
19852 if (mode != V4SFmode)
19853 op1 = gen_lowpart (V4SFmode, op1);
19854
19855 m = adjust_address (op0, V2SFmode, 0);
19856 emit_insn (gen_sse_storelps (m, op1));
19857 m = adjust_address (op0, V2SFmode, 8);
19858 emit_insn (gen_sse_storehps (m, copy_rtx (op1)));
19859 }
19860 }
19861 else
19862 gcc_unreachable ();
19863 }
19864
19865 /* Helper function of ix86_fixup_binary_operands to canonicalize
19866 operand order. Returns true if the operands should be swapped. */
19867
19868 static bool
19869 ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
19870 rtx operands[])
19871 {
19872 rtx dst = operands[0];
19873 rtx src1 = operands[1];
19874 rtx src2 = operands[2];
19875
19876 /* If the operation is not commutative, we can't do anything. */
19877 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH
19878 && GET_RTX_CLASS (code) != RTX_COMM_COMPARE)
19879 return false;
19880
19881 /* Highest priority is that src1 should match dst. */
19882 if (rtx_equal_p (dst, src1))
19883 return false;
19884 if (rtx_equal_p (dst, src2))
19885 return true;
19886
19887 /* Next highest priority is that immediate constants come second. */
19888 if (immediate_operand (src2, mode))
19889 return false;
19890 if (immediate_operand (src1, mode))
19891 return true;
19892
19893 /* Lowest priority is that memory references should come second. */
19894 if (MEM_P (src2))
19895 return false;
19896 if (MEM_P (src1))
19897 return true;
19898
19899 return false;
19900 }
19901
19902
19903 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
19904 destination to use for the operation. If different from the true
19905 destination in operands[0], a copy operation will be required. */
19906
19907 rtx
19908 ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
19909 rtx operands[])
19910 {
19911 rtx dst = operands[0];
19912 rtx src1 = operands[1];
19913 rtx src2 = operands[2];
19914
19915 /* Canonicalize operand order. */
19916 if (ix86_swap_binary_operands_p (code, mode, operands))
19917 {
19918 /* It is invalid to swap operands of different modes. */
19919 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
19920
19921 std::swap (src1, src2);
19922 }
19923
19924 /* Both source operands cannot be in memory. */
19925 if (MEM_P (src1) && MEM_P (src2))
19926 {
19927 /* Optimization: Only read from memory once. */
19928 if (rtx_equal_p (src1, src2))
19929 {
19930 src2 = force_reg (mode, src2);
19931 src1 = src2;
19932 }
19933 else if (rtx_equal_p (dst, src1))
19934 src2 = force_reg (mode, src2);
19935 else
19936 src1 = force_reg (mode, src1);
19937 }
19938
19939 /* If the destination is memory, and we do not have matching source
19940 operands, do things in registers. */
19941 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
19942 dst = gen_reg_rtx (mode);
19943
19944 /* Source 1 cannot be a constant. */
19945 if (CONSTANT_P (src1))
19946 src1 = force_reg (mode, src1);
19947
19948 /* Source 1 cannot be a non-matching memory. */
19949 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
19950 src1 = force_reg (mode, src1);
19951
19952 /* Improve address combine. */
19953 if (code == PLUS
19954 && GET_MODE_CLASS (mode) == MODE_INT
19955 && MEM_P (src2))
19956 src2 = force_reg (mode, src2);
19957
19958 operands[1] = src1;
19959 operands[2] = src2;
19960 return dst;
19961 }
19962
19963 /* Similarly, but assume that the destination has already been
19964 set up properly. */
19965
19966 void
19967 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
19968 machine_mode mode, rtx operands[])
19969 {
19970 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
19971 gcc_assert (dst == operands[0]);
19972 }
19973
19974 /* Attempt to expand a binary operator. Make the expansion closer to the
19975 actual machine, then just general_operand, which will allow 3 separate
19976 memory references (one output, two input) in a single insn. */
19977
19978 void
19979 ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
19980 rtx operands[])
19981 {
19982 rtx src1, src2, dst, op, clob;
19983
19984 dst = ix86_fixup_binary_operands (code, mode, operands);
19985 src1 = operands[1];
19986 src2 = operands[2];
19987
19988 /* Emit the instruction. */
19989
19990 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, src1, src2));
19991
19992 if (reload_completed
19993 && code == PLUS
19994 && !rtx_equal_p (dst, src1))
19995 {
19996 /* This is going to be an LEA; avoid splitting it later. */
19997 emit_insn (op);
19998 }
19999 else
20000 {
20001 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
20002 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
20003 }
20004
20005 /* Fix up the destination if needed. */
20006 if (dst != operands[0])
20007 emit_move_insn (operands[0], dst);
20008 }
20009
20010 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
20011 the given OPERANDS. */
20012
20013 void
20014 ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
20015 rtx operands[])
20016 {
20017 rtx op1 = NULL_RTX, op2 = NULL_RTX;
20018 if (SUBREG_P (operands[1]))
20019 {
20020 op1 = operands[1];
20021 op2 = operands[2];
20022 }
20023 else if (SUBREG_P (operands[2]))
20024 {
20025 op1 = operands[2];
20026 op2 = operands[1];
20027 }
20028 /* Optimize (__m128i) d | (__m128i) e and similar code
20029 when d and e are float vectors into float vector logical
20030 insn. In C/C++ without using intrinsics there is no other way
20031 to express vector logical operation on float vectors than
20032 to cast them temporarily to integer vectors. */
20033 if (op1
20034 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
20035 && (SUBREG_P (op2) || GET_CODE (op2) == CONST_VECTOR)
20036 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
20037 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
20038 && SUBREG_BYTE (op1) == 0
20039 && (GET_CODE (op2) == CONST_VECTOR
20040 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
20041 && SUBREG_BYTE (op2) == 0))
20042 && can_create_pseudo_p ())
20043 {
20044 rtx dst;
20045 switch (GET_MODE (SUBREG_REG (op1)))
20046 {
20047 case E_V4SFmode:
20048 case E_V8SFmode:
20049 case E_V16SFmode:
20050 case E_V2DFmode:
20051 case E_V4DFmode:
20052 case E_V8DFmode:
20053 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
20054 if (GET_CODE (op2) == CONST_VECTOR)
20055 {
20056 op2 = gen_lowpart (GET_MODE (dst), op2);
20057 op2 = force_reg (GET_MODE (dst), op2);
20058 }
20059 else
20060 {
20061 op1 = operands[1];
20062 op2 = SUBREG_REG (operands[2]);
20063 if (!vector_operand (op2, GET_MODE (dst)))
20064 op2 = force_reg (GET_MODE (dst), op2);
20065 }
20066 op1 = SUBREG_REG (op1);
20067 if (!vector_operand (op1, GET_MODE (dst)))
20068 op1 = force_reg (GET_MODE (dst), op1);
20069 emit_insn (gen_rtx_SET (dst,
20070 gen_rtx_fmt_ee (code, GET_MODE (dst),
20071 op1, op2)));
20072 emit_move_insn (operands[0], gen_lowpart (mode, dst));
20073 return;
20074 default:
20075 break;
20076 }
20077 }
20078 if (!vector_operand (operands[1], mode))
20079 operands[1] = force_reg (mode, operands[1]);
20080 if (!vector_operand (operands[2], mode))
20081 operands[2] = force_reg (mode, operands[2]);
20082 ix86_fixup_binary_operands_no_copy (code, mode, operands);
20083 emit_insn (gen_rtx_SET (operands[0],
20084 gen_rtx_fmt_ee (code, mode, operands[1],
20085 operands[2])));
20086 }
20087
20088 /* Return TRUE or FALSE depending on whether the binary operator meets the
20089 appropriate constraints. */
20090
20091 bool
20092 ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
20093 rtx operands[3])
20094 {
20095 rtx dst = operands[0];
20096 rtx src1 = operands[1];
20097 rtx src2 = operands[2];
20098
20099 /* Both source operands cannot be in memory. */
20100 if (MEM_P (src1) && MEM_P (src2))
20101 return false;
20102
20103 /* Canonicalize operand order for commutative operators. */
20104 if (ix86_swap_binary_operands_p (code, mode, operands))
20105 std::swap (src1, src2);
20106
20107 /* If the destination is memory, we must have a matching source operand. */
20108 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
20109 return false;
20110
20111 /* Source 1 cannot be a constant. */
20112 if (CONSTANT_P (src1))
20113 return false;
20114
20115 /* Source 1 cannot be a non-matching memory. */
20116 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
20117 /* Support "andhi/andsi/anddi" as a zero-extending move. */
20118 return (code == AND
20119 && (mode == HImode
20120 || mode == SImode
20121 || (TARGET_64BIT && mode == DImode))
20122 && satisfies_constraint_L (src2));
20123
20124 return true;
20125 }
20126
20127 /* Attempt to expand a unary operator. Make the expansion closer to the
20128 actual machine, then just general_operand, which will allow 2 separate
20129 memory references (one output, one input) in a single insn. */
20130
20131 void
20132 ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
20133 rtx operands[])
20134 {
20135 bool matching_memory = false;
20136 rtx src, dst, op, clob;
20137
20138 dst = operands[0];
20139 src = operands[1];
20140
20141 /* If the destination is memory, and we do not have matching source
20142 operands, do things in registers. */
20143 if (MEM_P (dst))
20144 {
20145 if (rtx_equal_p (dst, src))
20146 matching_memory = true;
20147 else
20148 dst = gen_reg_rtx (mode);
20149 }
20150
20151 /* When source operand is memory, destination must match. */
20152 if (MEM_P (src) && !matching_memory)
20153 src = force_reg (mode, src);
20154
20155 /* Emit the instruction. */
20156
20157 op = gen_rtx_SET (dst, gen_rtx_fmt_e (code, mode, src));
20158
20159 if (code == NOT)
20160 emit_insn (op);
20161 else
20162 {
20163 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
20164 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
20165 }
20166
20167 /* Fix up the destination if needed. */
20168 if (dst != operands[0])
20169 emit_move_insn (operands[0], dst);
20170 }
20171
20172 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
20173 divisor are within the range [0-255]. */
20174
20175 void
20176 ix86_split_idivmod (machine_mode mode, rtx operands[],
20177 bool signed_p)
20178 {
20179 rtx_code_label *end_label, *qimode_label;
20180 rtx div, mod;
20181 rtx_insn *insn;
20182 rtx scratch, tmp0, tmp1, tmp2;
20183 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
20184 rtx (*gen_zero_extend) (rtx, rtx);
20185 rtx (*gen_test_ccno_1) (rtx, rtx);
20186
20187 switch (mode)
20188 {
20189 case E_SImode:
20190 if (GET_MODE (operands[0]) == SImode)
20191 {
20192 if (GET_MODE (operands[1]) == SImode)
20193 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
20194 else
20195 gen_divmod4_1
20196 = signed_p ? gen_divmodsi4_zext_2 : gen_udivmodsi4_zext_2;
20197 gen_zero_extend = gen_zero_extendqisi2;
20198 }
20199 else
20200 {
20201 gen_divmod4_1
20202 = signed_p ? gen_divmodsi4_zext_1 : gen_udivmodsi4_zext_1;
20203 gen_zero_extend = gen_zero_extendqidi2;
20204 }
20205 gen_test_ccno_1 = gen_testsi_ccno_1;
20206 break;
20207 case E_DImode:
20208 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
20209 gen_test_ccno_1 = gen_testdi_ccno_1;
20210 gen_zero_extend = gen_zero_extendqidi2;
20211 break;
20212 default:
20213 gcc_unreachable ();
20214 }
20215
20216 end_label = gen_label_rtx ();
20217 qimode_label = gen_label_rtx ();
20218
20219 scratch = gen_reg_rtx (mode);
20220
20221 /* Use 8bit unsigned divimod if dividend and divisor are within
20222 the range [0-255]. */
20223 emit_move_insn (scratch, operands[2]);
20224 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
20225 scratch, 1, OPTAB_DIRECT);
20226 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
20227 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
20228 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
20229 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
20230 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
20231 pc_rtx);
20232 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp0));
20233 predict_jump (REG_BR_PROB_BASE * 50 / 100);
20234 JUMP_LABEL (insn) = qimode_label;
20235
20236 /* Generate original signed/unsigned divimod. */
20237 div = gen_divmod4_1 (operands[0], operands[1],
20238 operands[2], operands[3]);
20239 emit_insn (div);
20240
20241 /* Branch to the end. */
20242 emit_jump_insn (gen_jump (end_label));
20243 emit_barrier ();
20244
20245 /* Generate 8bit unsigned divide. */
20246 emit_label (qimode_label);
20247 /* Don't use operands[0] for result of 8bit divide since not all
20248 registers support QImode ZERO_EXTRACT. */
20249 tmp0 = lowpart_subreg (HImode, scratch, mode);
20250 tmp1 = lowpart_subreg (HImode, operands[2], mode);
20251 tmp2 = lowpart_subreg (QImode, operands[3], mode);
20252 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
20253
20254 if (signed_p)
20255 {
20256 div = gen_rtx_DIV (mode, operands[2], operands[3]);
20257 mod = gen_rtx_MOD (mode, operands[2], operands[3]);
20258 }
20259 else
20260 {
20261 div = gen_rtx_UDIV (mode, operands[2], operands[3]);
20262 mod = gen_rtx_UMOD (mode, operands[2], operands[3]);
20263 }
20264 if (mode == SImode)
20265 {
20266 if (GET_MODE (operands[0]) != SImode)
20267 div = gen_rtx_ZERO_EXTEND (DImode, div);
20268 if (GET_MODE (operands[1]) != SImode)
20269 mod = gen_rtx_ZERO_EXTEND (DImode, mod);
20270 }
20271
20272 /* Extract remainder from AH. */
20273 tmp1 = gen_rtx_ZERO_EXTRACT (GET_MODE (operands[1]),
20274 tmp0, GEN_INT (8), GEN_INT (8));
20275 if (REG_P (operands[1]))
20276 insn = emit_move_insn (operands[1], tmp1);
20277 else
20278 {
20279 /* Need a new scratch register since the old one has result
20280 of 8bit divide. */
20281 scratch = gen_reg_rtx (GET_MODE (operands[1]));
20282 emit_move_insn (scratch, tmp1);
20283 insn = emit_move_insn (operands[1], scratch);
20284 }
20285 set_unique_reg_note (insn, REG_EQUAL, mod);
20286
20287 /* Zero extend quotient from AL. */
20288 tmp1 = gen_lowpart (QImode, tmp0);
20289 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
20290 set_unique_reg_note (insn, REG_EQUAL, div);
20291
20292 emit_label (end_label);
20293 }
20294
20295 #define LEA_MAX_STALL (3)
20296 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
20297
20298 /* Increase given DISTANCE in half-cycles according to
20299 dependencies between PREV and NEXT instructions.
20300 Add 1 half-cycle if there is no dependency and
20301 go to next cycle if there is some dependecy. */
20302
20303 static unsigned int
20304 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
20305 {
20306 df_ref def, use;
20307
20308 if (!prev || !next)
20309 return distance + (distance & 1) + 2;
20310
20311 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
20312 return distance + 1;
20313
20314 FOR_EACH_INSN_USE (use, next)
20315 FOR_EACH_INSN_DEF (def, prev)
20316 if (!DF_REF_IS_ARTIFICIAL (def)
20317 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
20318 return distance + (distance & 1) + 2;
20319
20320 return distance + 1;
20321 }
20322
20323 /* Function checks if instruction INSN defines register number
20324 REGNO1 or REGNO2. */
20325
20326 static bool
20327 insn_defines_reg (unsigned int regno1, unsigned int regno2,
20328 rtx_insn *insn)
20329 {
20330 df_ref def;
20331
20332 FOR_EACH_INSN_DEF (def, insn)
20333 if (DF_REF_REG_DEF_P (def)
20334 && !DF_REF_IS_ARTIFICIAL (def)
20335 && (regno1 == DF_REF_REGNO (def)
20336 || regno2 == DF_REF_REGNO (def)))
20337 return true;
20338
20339 return false;
20340 }
20341
20342 /* Function checks if instruction INSN uses register number
20343 REGNO as a part of address expression. */
20344
20345 static bool
20346 insn_uses_reg_mem (unsigned int regno, rtx insn)
20347 {
20348 df_ref use;
20349
20350 FOR_EACH_INSN_USE (use, insn)
20351 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
20352 return true;
20353
20354 return false;
20355 }
20356
20357 /* Search backward for non-agu definition of register number REGNO1
20358 or register number REGNO2 in basic block starting from instruction
20359 START up to head of basic block or instruction INSN.
20360
20361 Function puts true value into *FOUND var if definition was found
20362 and false otherwise.
20363
20364 Distance in half-cycles between START and found instruction or head
20365 of BB is added to DISTANCE and returned. */
20366
20367 static int
20368 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
20369 rtx_insn *insn, int distance,
20370 rtx_insn *start, bool *found)
20371 {
20372 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
20373 rtx_insn *prev = start;
20374 rtx_insn *next = NULL;
20375
20376 *found = false;
20377
20378 while (prev
20379 && prev != insn
20380 && distance < LEA_SEARCH_THRESHOLD)
20381 {
20382 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
20383 {
20384 distance = increase_distance (prev, next, distance);
20385 if (insn_defines_reg (regno1, regno2, prev))
20386 {
20387 if (recog_memoized (prev) < 0
20388 || get_attr_type (prev) != TYPE_LEA)
20389 {
20390 *found = true;
20391 return distance;
20392 }
20393 }
20394
20395 next = prev;
20396 }
20397 if (prev == BB_HEAD (bb))
20398 break;
20399
20400 prev = PREV_INSN (prev);
20401 }
20402
20403 return distance;
20404 }
20405
20406 /* Search backward for non-agu definition of register number REGNO1
20407 or register number REGNO2 in INSN's basic block until
20408 1. Pass LEA_SEARCH_THRESHOLD instructions, or
20409 2. Reach neighbor BBs boundary, or
20410 3. Reach agu definition.
20411 Returns the distance between the non-agu definition point and INSN.
20412 If no definition point, returns -1. */
20413
20414 static int
20415 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
20416 rtx_insn *insn)
20417 {
20418 basic_block bb = BLOCK_FOR_INSN (insn);
20419 int distance = 0;
20420 bool found = false;
20421
20422 if (insn != BB_HEAD (bb))
20423 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
20424 distance, PREV_INSN (insn),
20425 &found);
20426
20427 if (!found && distance < LEA_SEARCH_THRESHOLD)
20428 {
20429 edge e;
20430 edge_iterator ei;
20431 bool simple_loop = false;
20432
20433 FOR_EACH_EDGE (e, ei, bb->preds)
20434 if (e->src == bb)
20435 {
20436 simple_loop = true;
20437 break;
20438 }
20439
20440 if (simple_loop)
20441 distance = distance_non_agu_define_in_bb (regno1, regno2,
20442 insn, distance,
20443 BB_END (bb), &found);
20444 else
20445 {
20446 int shortest_dist = -1;
20447 bool found_in_bb = false;
20448
20449 FOR_EACH_EDGE (e, ei, bb->preds)
20450 {
20451 int bb_dist
20452 = distance_non_agu_define_in_bb (regno1, regno2,
20453 insn, distance,
20454 BB_END (e->src),
20455 &found_in_bb);
20456 if (found_in_bb)
20457 {
20458 if (shortest_dist < 0)
20459 shortest_dist = bb_dist;
20460 else if (bb_dist > 0)
20461 shortest_dist = MIN (bb_dist, shortest_dist);
20462
20463 found = true;
20464 }
20465 }
20466
20467 distance = shortest_dist;
20468 }
20469 }
20470
20471 /* get_attr_type may modify recog data. We want to make sure
20472 that recog data is valid for instruction INSN, on which
20473 distance_non_agu_define is called. INSN is unchanged here. */
20474 extract_insn_cached (insn);
20475
20476 if (!found)
20477 return -1;
20478
20479 return distance >> 1;
20480 }
20481
20482 /* Return the distance in half-cycles between INSN and the next
20483 insn that uses register number REGNO in memory address added
20484 to DISTANCE. Return -1 if REGNO0 is set.
20485
20486 Put true value into *FOUND if register usage was found and
20487 false otherwise.
20488 Put true value into *REDEFINED if register redefinition was
20489 found and false otherwise. */
20490
20491 static int
20492 distance_agu_use_in_bb (unsigned int regno,
20493 rtx_insn *insn, int distance, rtx_insn *start,
20494 bool *found, bool *redefined)
20495 {
20496 basic_block bb = NULL;
20497 rtx_insn *next = start;
20498 rtx_insn *prev = NULL;
20499
20500 *found = false;
20501 *redefined = false;
20502
20503 if (start != NULL_RTX)
20504 {
20505 bb = BLOCK_FOR_INSN (start);
20506 if (start != BB_HEAD (bb))
20507 /* If insn and start belong to the same bb, set prev to insn,
20508 so the call to increase_distance will increase the distance
20509 between insns by 1. */
20510 prev = insn;
20511 }
20512
20513 while (next
20514 && next != insn
20515 && distance < LEA_SEARCH_THRESHOLD)
20516 {
20517 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
20518 {
20519 distance = increase_distance(prev, next, distance);
20520 if (insn_uses_reg_mem (regno, next))
20521 {
20522 /* Return DISTANCE if OP0 is used in memory
20523 address in NEXT. */
20524 *found = true;
20525 return distance;
20526 }
20527
20528 if (insn_defines_reg (regno, INVALID_REGNUM, next))
20529 {
20530 /* Return -1 if OP0 is set in NEXT. */
20531 *redefined = true;
20532 return -1;
20533 }
20534
20535 prev = next;
20536 }
20537
20538 if (next == BB_END (bb))
20539 break;
20540
20541 next = NEXT_INSN (next);
20542 }
20543
20544 return distance;
20545 }
20546
20547 /* Return the distance between INSN and the next insn that uses
20548 register number REGNO0 in memory address. Return -1 if no such
20549 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
20550
20551 static int
20552 distance_agu_use (unsigned int regno0, rtx_insn *insn)
20553 {
20554 basic_block bb = BLOCK_FOR_INSN (insn);
20555 int distance = 0;
20556 bool found = false;
20557 bool redefined = false;
20558
20559 if (insn != BB_END (bb))
20560 distance = distance_agu_use_in_bb (regno0, insn, distance,
20561 NEXT_INSN (insn),
20562 &found, &redefined);
20563
20564 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
20565 {
20566 edge e;
20567 edge_iterator ei;
20568 bool simple_loop = false;
20569
20570 FOR_EACH_EDGE (e, ei, bb->succs)
20571 if (e->dest == bb)
20572 {
20573 simple_loop = true;
20574 break;
20575 }
20576
20577 if (simple_loop)
20578 distance = distance_agu_use_in_bb (regno0, insn,
20579 distance, BB_HEAD (bb),
20580 &found, &redefined);
20581 else
20582 {
20583 int shortest_dist = -1;
20584 bool found_in_bb = false;
20585 bool redefined_in_bb = false;
20586
20587 FOR_EACH_EDGE (e, ei, bb->succs)
20588 {
20589 int bb_dist
20590 = distance_agu_use_in_bb (regno0, insn,
20591 distance, BB_HEAD (e->dest),
20592 &found_in_bb, &redefined_in_bb);
20593 if (found_in_bb)
20594 {
20595 if (shortest_dist < 0)
20596 shortest_dist = bb_dist;
20597 else if (bb_dist > 0)
20598 shortest_dist = MIN (bb_dist, shortest_dist);
20599
20600 found = true;
20601 }
20602 }
20603
20604 distance = shortest_dist;
20605 }
20606 }
20607
20608 if (!found || redefined)
20609 return -1;
20610
20611 return distance >> 1;
20612 }
20613
20614 /* Define this macro to tune LEA priority vs ADD, it take effect when
20615 there is a dilemma of choicing LEA or ADD
20616 Negative value: ADD is more preferred than LEA
20617 Zero: Netrual
20618 Positive value: LEA is more preferred than ADD*/
20619 #define IX86_LEA_PRIORITY 0
20620
20621 /* Return true if usage of lea INSN has performance advantage
20622 over a sequence of instructions. Instructions sequence has
20623 SPLIT_COST cycles higher latency than lea latency. */
20624
20625 static bool
20626 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
20627 unsigned int regno2, int split_cost, bool has_scale)
20628 {
20629 int dist_define, dist_use;
20630
20631 /* For Silvermont if using a 2-source or 3-source LEA for
20632 non-destructive destination purposes, or due to wanting
20633 ability to use SCALE, the use of LEA is justified. */
20634 if (TARGET_SILVERMONT || TARGET_INTEL)
20635 {
20636 if (has_scale)
20637 return true;
20638 if (split_cost < 1)
20639 return false;
20640 if (regno0 == regno1 || regno0 == regno2)
20641 return false;
20642 return true;
20643 }
20644
20645 dist_define = distance_non_agu_define (regno1, regno2, insn);
20646 dist_use = distance_agu_use (regno0, insn);
20647
20648 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
20649 {
20650 /* If there is no non AGU operand definition, no AGU
20651 operand usage and split cost is 0 then both lea
20652 and non lea variants have same priority. Currently
20653 we prefer lea for 64 bit code and non lea on 32 bit
20654 code. */
20655 if (dist_use < 0 && split_cost == 0)
20656 return TARGET_64BIT || IX86_LEA_PRIORITY;
20657 else
20658 return true;
20659 }
20660
20661 /* With longer definitions distance lea is more preferable.
20662 Here we change it to take into account splitting cost and
20663 lea priority. */
20664 dist_define += split_cost + IX86_LEA_PRIORITY;
20665
20666 /* If there is no use in memory addess then we just check
20667 that split cost exceeds AGU stall. */
20668 if (dist_use < 0)
20669 return dist_define > LEA_MAX_STALL;
20670
20671 /* If this insn has both backward non-agu dependence and forward
20672 agu dependence, the one with short distance takes effect. */
20673 return dist_define >= dist_use;
20674 }
20675
20676 /* Return true if it is legal to clobber flags by INSN and
20677 false otherwise. */
20678
20679 static bool
20680 ix86_ok_to_clobber_flags (rtx_insn *insn)
20681 {
20682 basic_block bb = BLOCK_FOR_INSN (insn);
20683 df_ref use;
20684 bitmap live;
20685
20686 while (insn)
20687 {
20688 if (NONDEBUG_INSN_P (insn))
20689 {
20690 FOR_EACH_INSN_USE (use, insn)
20691 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
20692 return false;
20693
20694 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
20695 return true;
20696 }
20697
20698 if (insn == BB_END (bb))
20699 break;
20700
20701 insn = NEXT_INSN (insn);
20702 }
20703
20704 live = df_get_live_out(bb);
20705 return !REGNO_REG_SET_P (live, FLAGS_REG);
20706 }
20707
20708 /* Return true if we need to split op0 = op1 + op2 into a sequence of
20709 move and add to avoid AGU stalls. */
20710
20711 bool
20712 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
20713 {
20714 unsigned int regno0, regno1, regno2;
20715
20716 /* Check if we need to optimize. */
20717 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
20718 return false;
20719
20720 /* Check it is correct to split here. */
20721 if (!ix86_ok_to_clobber_flags(insn))
20722 return false;
20723
20724 regno0 = true_regnum (operands[0]);
20725 regno1 = true_regnum (operands[1]);
20726 regno2 = true_regnum (operands[2]);
20727
20728 /* We need to split only adds with non destructive
20729 destination operand. */
20730 if (regno0 == regno1 || regno0 == regno2)
20731 return false;
20732 else
20733 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
20734 }
20735
20736 /* Return true if we should emit lea instruction instead of mov
20737 instruction. */
20738
20739 bool
20740 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
20741 {
20742 unsigned int regno0, regno1;
20743
20744 /* Check if we need to optimize. */
20745 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
20746 return false;
20747
20748 /* Use lea for reg to reg moves only. */
20749 if (!REG_P (operands[0]) || !REG_P (operands[1]))
20750 return false;
20751
20752 regno0 = true_regnum (operands[0]);
20753 regno1 = true_regnum (operands[1]);
20754
20755 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
20756 }
20757
20758 /* Return true if we need to split lea into a sequence of
20759 instructions to avoid AGU stalls. */
20760
20761 bool
20762 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
20763 {
20764 unsigned int regno0, regno1, regno2;
20765 int split_cost;
20766 struct ix86_address parts;
20767 int ok;
20768
20769 /* Check we need to optimize. */
20770 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
20771 return false;
20772
20773 /* The "at least two components" test below might not catch simple
20774 move or zero extension insns if parts.base is non-NULL and parts.disp
20775 is const0_rtx as the only components in the address, e.g. if the
20776 register is %rbp or %r13. As this test is much cheaper and moves or
20777 zero extensions are the common case, do this check first. */
20778 if (REG_P (operands[1])
20779 || (SImode_address_operand (operands[1], VOIDmode)
20780 && REG_P (XEXP (operands[1], 0))))
20781 return false;
20782
20783 /* Check if it is OK to split here. */
20784 if (!ix86_ok_to_clobber_flags (insn))
20785 return false;
20786
20787 ok = ix86_decompose_address (operands[1], &parts);
20788 gcc_assert (ok);
20789
20790 /* There should be at least two components in the address. */
20791 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
20792 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
20793 return false;
20794
20795 /* We should not split into add if non legitimate pic
20796 operand is used as displacement. */
20797 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
20798 return false;
20799
20800 regno0 = true_regnum (operands[0]) ;
20801 regno1 = INVALID_REGNUM;
20802 regno2 = INVALID_REGNUM;
20803
20804 if (parts.base)
20805 regno1 = true_regnum (parts.base);
20806 if (parts.index)
20807 regno2 = true_regnum (parts.index);
20808
20809 split_cost = 0;
20810
20811 /* Compute how many cycles we will add to execution time
20812 if split lea into a sequence of instructions. */
20813 if (parts.base || parts.index)
20814 {
20815 /* Have to use mov instruction if non desctructive
20816 destination form is used. */
20817 if (regno1 != regno0 && regno2 != regno0)
20818 split_cost += 1;
20819
20820 /* Have to add index to base if both exist. */
20821 if (parts.base && parts.index)
20822 split_cost += 1;
20823
20824 /* Have to use shift and adds if scale is 2 or greater. */
20825 if (parts.scale > 1)
20826 {
20827 if (regno0 != regno1)
20828 split_cost += 1;
20829 else if (regno2 == regno0)
20830 split_cost += 4;
20831 else
20832 split_cost += parts.scale;
20833 }
20834
20835 /* Have to use add instruction with immediate if
20836 disp is non zero. */
20837 if (parts.disp && parts.disp != const0_rtx)
20838 split_cost += 1;
20839
20840 /* Subtract the price of lea. */
20841 split_cost -= 1;
20842 }
20843
20844 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
20845 parts.scale > 1);
20846 }
20847
20848 /* Emit x86 binary operand CODE in mode MODE, where the first operand
20849 matches destination. RTX includes clobber of FLAGS_REG. */
20850
20851 static void
20852 ix86_emit_binop (enum rtx_code code, machine_mode mode,
20853 rtx dst, rtx src)
20854 {
20855 rtx op, clob;
20856
20857 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, dst, src));
20858 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
20859
20860 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
20861 }
20862
20863 /* Return true if regno1 def is nearest to the insn. */
20864
20865 static bool
20866 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
20867 {
20868 rtx_insn *prev = insn;
20869 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
20870
20871 if (insn == start)
20872 return false;
20873 while (prev && prev != start)
20874 {
20875 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
20876 {
20877 prev = PREV_INSN (prev);
20878 continue;
20879 }
20880 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
20881 return true;
20882 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
20883 return false;
20884 prev = PREV_INSN (prev);
20885 }
20886
20887 /* None of the regs is defined in the bb. */
20888 return false;
20889 }
20890
20891 /* Split lea instructions into a sequence of instructions
20892 which are executed on ALU to avoid AGU stalls.
20893 It is assumed that it is allowed to clobber flags register
20894 at lea position. */
20895
20896 void
20897 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
20898 {
20899 unsigned int regno0, regno1, regno2;
20900 struct ix86_address parts;
20901 rtx target, tmp;
20902 int ok, adds;
20903
20904 ok = ix86_decompose_address (operands[1], &parts);
20905 gcc_assert (ok);
20906
20907 target = gen_lowpart (mode, operands[0]);
20908
20909 regno0 = true_regnum (target);
20910 regno1 = INVALID_REGNUM;
20911 regno2 = INVALID_REGNUM;
20912
20913 if (parts.base)
20914 {
20915 parts.base = gen_lowpart (mode, parts.base);
20916 regno1 = true_regnum (parts.base);
20917 }
20918
20919 if (parts.index)
20920 {
20921 parts.index = gen_lowpart (mode, parts.index);
20922 regno2 = true_regnum (parts.index);
20923 }
20924
20925 if (parts.disp)
20926 parts.disp = gen_lowpart (mode, parts.disp);
20927
20928 if (parts.scale > 1)
20929 {
20930 /* Case r1 = r1 + ... */
20931 if (regno1 == regno0)
20932 {
20933 /* If we have a case r1 = r1 + C * r2 then we
20934 should use multiplication which is very
20935 expensive. Assume cost model is wrong if we
20936 have such case here. */
20937 gcc_assert (regno2 != regno0);
20938
20939 for (adds = parts.scale; adds > 0; adds--)
20940 ix86_emit_binop (PLUS, mode, target, parts.index);
20941 }
20942 else
20943 {
20944 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
20945 if (regno0 != regno2)
20946 emit_insn (gen_rtx_SET (target, parts.index));
20947
20948 /* Use shift for scaling. */
20949 ix86_emit_binop (ASHIFT, mode, target,
20950 GEN_INT (exact_log2 (parts.scale)));
20951
20952 if (parts.base)
20953 ix86_emit_binop (PLUS, mode, target, parts.base);
20954
20955 if (parts.disp && parts.disp != const0_rtx)
20956 ix86_emit_binop (PLUS, mode, target, parts.disp);
20957 }
20958 }
20959 else if (!parts.base && !parts.index)
20960 {
20961 gcc_assert(parts.disp);
20962 emit_insn (gen_rtx_SET (target, parts.disp));
20963 }
20964 else
20965 {
20966 if (!parts.base)
20967 {
20968 if (regno0 != regno2)
20969 emit_insn (gen_rtx_SET (target, parts.index));
20970 }
20971 else if (!parts.index)
20972 {
20973 if (regno0 != regno1)
20974 emit_insn (gen_rtx_SET (target, parts.base));
20975 }
20976 else
20977 {
20978 if (regno0 == regno1)
20979 tmp = parts.index;
20980 else if (regno0 == regno2)
20981 tmp = parts.base;
20982 else
20983 {
20984 rtx tmp1;
20985
20986 /* Find better operand for SET instruction, depending
20987 on which definition is farther from the insn. */
20988 if (find_nearest_reg_def (insn, regno1, regno2))
20989 tmp = parts.index, tmp1 = parts.base;
20990 else
20991 tmp = parts.base, tmp1 = parts.index;
20992
20993 emit_insn (gen_rtx_SET (target, tmp));
20994
20995 if (parts.disp && parts.disp != const0_rtx)
20996 ix86_emit_binop (PLUS, mode, target, parts.disp);
20997
20998 ix86_emit_binop (PLUS, mode, target, tmp1);
20999 return;
21000 }
21001
21002 ix86_emit_binop (PLUS, mode, target, tmp);
21003 }
21004
21005 if (parts.disp && parts.disp != const0_rtx)
21006 ix86_emit_binop (PLUS, mode, target, parts.disp);
21007 }
21008 }
21009
21010 /* Return true if it is ok to optimize an ADD operation to LEA
21011 operation to avoid flag register consumation. For most processors,
21012 ADD is faster than LEA. For the processors like BONNELL, if the
21013 destination register of LEA holds an actual address which will be
21014 used soon, LEA is better and otherwise ADD is better. */
21015
21016 bool
21017 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
21018 {
21019 unsigned int regno0 = true_regnum (operands[0]);
21020 unsigned int regno1 = true_regnum (operands[1]);
21021 unsigned int regno2 = true_regnum (operands[2]);
21022
21023 /* If a = b + c, (a!=b && a!=c), must use lea form. */
21024 if (regno0 != regno1 && regno0 != regno2)
21025 return true;
21026
21027 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
21028 return false;
21029
21030 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
21031 }
21032
21033 /* Return true if destination reg of SET_BODY is shift count of
21034 USE_BODY. */
21035
21036 static bool
21037 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
21038 {
21039 rtx set_dest;
21040 rtx shift_rtx;
21041 int i;
21042
21043 /* Retrieve destination of SET_BODY. */
21044 switch (GET_CODE (set_body))
21045 {
21046 case SET:
21047 set_dest = SET_DEST (set_body);
21048 if (!set_dest || !REG_P (set_dest))
21049 return false;
21050 break;
21051 case PARALLEL:
21052 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
21053 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
21054 use_body))
21055 return true;
21056 /* FALLTHROUGH */
21057 default:
21058 return false;
21059 }
21060
21061 /* Retrieve shift count of USE_BODY. */
21062 switch (GET_CODE (use_body))
21063 {
21064 case SET:
21065 shift_rtx = XEXP (use_body, 1);
21066 break;
21067 case PARALLEL:
21068 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
21069 if (ix86_dep_by_shift_count_body (set_body,
21070 XVECEXP (use_body, 0, i)))
21071 return true;
21072 /* FALLTHROUGH */
21073 default:
21074 return false;
21075 }
21076
21077 if (shift_rtx
21078 && (GET_CODE (shift_rtx) == ASHIFT
21079 || GET_CODE (shift_rtx) == LSHIFTRT
21080 || GET_CODE (shift_rtx) == ASHIFTRT
21081 || GET_CODE (shift_rtx) == ROTATE
21082 || GET_CODE (shift_rtx) == ROTATERT))
21083 {
21084 rtx shift_count = XEXP (shift_rtx, 1);
21085
21086 /* Return true if shift count is dest of SET_BODY. */
21087 if (REG_P (shift_count))
21088 {
21089 /* Add check since it can be invoked before register
21090 allocation in pre-reload schedule. */
21091 if (reload_completed
21092 && true_regnum (set_dest) == true_regnum (shift_count))
21093 return true;
21094 else if (REGNO(set_dest) == REGNO(shift_count))
21095 return true;
21096 }
21097 }
21098
21099 return false;
21100 }
21101
21102 /* Return true if destination reg of SET_INSN is shift count of
21103 USE_INSN. */
21104
21105 bool
21106 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
21107 {
21108 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
21109 PATTERN (use_insn));
21110 }
21111
21112 /* Return TRUE or FALSE depending on whether the unary operator meets the
21113 appropriate constraints. */
21114
21115 bool
21116 ix86_unary_operator_ok (enum rtx_code,
21117 machine_mode,
21118 rtx operands[2])
21119 {
21120 /* If one of operands is memory, source and destination must match. */
21121 if ((MEM_P (operands[0])
21122 || MEM_P (operands[1]))
21123 && ! rtx_equal_p (operands[0], operands[1]))
21124 return false;
21125 return true;
21126 }
21127
21128 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
21129 are ok, keeping in mind the possible movddup alternative. */
21130
21131 bool
21132 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
21133 {
21134 if (MEM_P (operands[0]))
21135 return rtx_equal_p (operands[0], operands[1 + high]);
21136 if (MEM_P (operands[1]) && MEM_P (operands[2]))
21137 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
21138 return true;
21139 }
21140
21141 /* Post-reload splitter for converting an SF or DFmode value in an
21142 SSE register into an unsigned SImode. */
21143
21144 void
21145 ix86_split_convert_uns_si_sse (rtx operands[])
21146 {
21147 machine_mode vecmode;
21148 rtx value, large, zero_or_two31, input, two31, x;
21149
21150 large = operands[1];
21151 zero_or_two31 = operands[2];
21152 input = operands[3];
21153 two31 = operands[4];
21154 vecmode = GET_MODE (large);
21155 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
21156
21157 /* Load up the value into the low element. We must ensure that the other
21158 elements are valid floats -- zero is the easiest such value. */
21159 if (MEM_P (input))
21160 {
21161 if (vecmode == V4SFmode)
21162 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
21163 else
21164 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
21165 }
21166 else
21167 {
21168 input = gen_rtx_REG (vecmode, REGNO (input));
21169 emit_move_insn (value, CONST0_RTX (vecmode));
21170 if (vecmode == V4SFmode)
21171 emit_insn (gen_sse_movss (value, value, input));
21172 else
21173 emit_insn (gen_sse2_movsd (value, value, input));
21174 }
21175
21176 emit_move_insn (large, two31);
21177 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
21178
21179 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
21180 emit_insn (gen_rtx_SET (large, x));
21181
21182 x = gen_rtx_AND (vecmode, zero_or_two31, large);
21183 emit_insn (gen_rtx_SET (zero_or_two31, x));
21184
21185 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
21186 emit_insn (gen_rtx_SET (value, x));
21187
21188 large = gen_rtx_REG (V4SImode, REGNO (large));
21189 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
21190
21191 x = gen_rtx_REG (V4SImode, REGNO (value));
21192 if (vecmode == V4SFmode)
21193 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
21194 else
21195 emit_insn (gen_sse2_cvttpd2dq (x, value));
21196 value = x;
21197
21198 emit_insn (gen_xorv4si3 (value, value, large));
21199 }
21200
21201 /* Convert an unsigned DImode value into a DFmode, using only SSE.
21202 Expects the 64-bit DImode to be supplied in a pair of integral
21203 registers. Requires SSE2; will use SSE3 if available. For x86_32,
21204 -mfpmath=sse, !optimize_size only. */
21205
21206 void
21207 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
21208 {
21209 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
21210 rtx int_xmm, fp_xmm;
21211 rtx biases, exponents;
21212 rtx x;
21213
21214 int_xmm = gen_reg_rtx (V4SImode);
21215 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
21216 emit_insn (gen_movdi_to_sse (int_xmm, input));
21217 else if (TARGET_SSE_SPLIT_REGS)
21218 {
21219 emit_clobber (int_xmm);
21220 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
21221 }
21222 else
21223 {
21224 x = gen_reg_rtx (V2DImode);
21225 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
21226 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
21227 }
21228
21229 x = gen_rtx_CONST_VECTOR (V4SImode,
21230 gen_rtvec (4, GEN_INT (0x43300000UL),
21231 GEN_INT (0x45300000UL),
21232 const0_rtx, const0_rtx));
21233 exponents = validize_mem (force_const_mem (V4SImode, x));
21234
21235 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
21236 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
21237
21238 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
21239 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
21240 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
21241 (0x1.0p84 + double(fp_value_hi_xmm)).
21242 Note these exponents differ by 32. */
21243
21244 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
21245
21246 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
21247 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
21248 real_ldexp (&bias_lo_rvt, &dconst1, 52);
21249 real_ldexp (&bias_hi_rvt, &dconst1, 84);
21250 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
21251 x = const_double_from_real_value (bias_hi_rvt, DFmode);
21252 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
21253 biases = validize_mem (force_const_mem (V2DFmode, biases));
21254 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
21255
21256 /* Add the upper and lower DFmode values together. */
21257 if (TARGET_SSE3)
21258 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
21259 else
21260 {
21261 x = copy_to_mode_reg (V2DFmode, fp_xmm);
21262 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
21263 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
21264 }
21265
21266 ix86_expand_vector_extract (false, target, fp_xmm, 0);
21267 }
21268
21269 /* Not used, but eases macroization of patterns. */
21270 void
21271 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
21272 {
21273 gcc_unreachable ();
21274 }
21275
21276 /* Convert an unsigned SImode value into a DFmode. Only currently used
21277 for SSE, but applicable anywhere. */
21278
21279 void
21280 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
21281 {
21282 REAL_VALUE_TYPE TWO31r;
21283 rtx x, fp;
21284
21285 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
21286 NULL, 1, OPTAB_DIRECT);
21287
21288 fp = gen_reg_rtx (DFmode);
21289 emit_insn (gen_floatsidf2 (fp, x));
21290
21291 real_ldexp (&TWO31r, &dconst1, 31);
21292 x = const_double_from_real_value (TWO31r, DFmode);
21293
21294 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
21295 if (x != target)
21296 emit_move_insn (target, x);
21297 }
21298
21299 /* Convert a signed DImode value into a DFmode. Only used for SSE in
21300 32-bit mode; otherwise we have a direct convert instruction. */
21301
21302 void
21303 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
21304 {
21305 REAL_VALUE_TYPE TWO32r;
21306 rtx fp_lo, fp_hi, x;
21307
21308 fp_lo = gen_reg_rtx (DFmode);
21309 fp_hi = gen_reg_rtx (DFmode);
21310
21311 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
21312
21313 real_ldexp (&TWO32r, &dconst1, 32);
21314 x = const_double_from_real_value (TWO32r, DFmode);
21315 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
21316
21317 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
21318
21319 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
21320 0, OPTAB_DIRECT);
21321 if (x != target)
21322 emit_move_insn (target, x);
21323 }
21324
21325 /* Convert an unsigned SImode value into a SFmode, using only SSE.
21326 For x86_32, -mfpmath=sse, !optimize_size only. */
21327 void
21328 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
21329 {
21330 REAL_VALUE_TYPE ONE16r;
21331 rtx fp_hi, fp_lo, int_hi, int_lo, x;
21332
21333 real_ldexp (&ONE16r, &dconst1, 16);
21334 x = const_double_from_real_value (ONE16r, SFmode);
21335 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
21336 NULL, 0, OPTAB_DIRECT);
21337 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
21338 NULL, 0, OPTAB_DIRECT);
21339 fp_hi = gen_reg_rtx (SFmode);
21340 fp_lo = gen_reg_rtx (SFmode);
21341 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
21342 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
21343 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
21344 0, OPTAB_DIRECT);
21345 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
21346 0, OPTAB_DIRECT);
21347 if (!rtx_equal_p (target, fp_hi))
21348 emit_move_insn (target, fp_hi);
21349 }
21350
21351 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
21352 a vector of unsigned ints VAL to vector of floats TARGET. */
21353
21354 void
21355 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
21356 {
21357 rtx tmp[8];
21358 REAL_VALUE_TYPE TWO16r;
21359 machine_mode intmode = GET_MODE (val);
21360 machine_mode fltmode = GET_MODE (target);
21361 rtx (*cvt) (rtx, rtx);
21362
21363 if (intmode == V4SImode)
21364 cvt = gen_floatv4siv4sf2;
21365 else
21366 cvt = gen_floatv8siv8sf2;
21367 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
21368 tmp[0] = force_reg (intmode, tmp[0]);
21369 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
21370 OPTAB_DIRECT);
21371 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
21372 NULL_RTX, 1, OPTAB_DIRECT);
21373 tmp[3] = gen_reg_rtx (fltmode);
21374 emit_insn (cvt (tmp[3], tmp[1]));
21375 tmp[4] = gen_reg_rtx (fltmode);
21376 emit_insn (cvt (tmp[4], tmp[2]));
21377 real_ldexp (&TWO16r, &dconst1, 16);
21378 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
21379 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
21380 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
21381 OPTAB_DIRECT);
21382 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
21383 OPTAB_DIRECT);
21384 if (tmp[7] != target)
21385 emit_move_insn (target, tmp[7]);
21386 }
21387
21388 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
21389 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
21390 This is done by doing just signed conversion if < 0x1p31, and otherwise by
21391 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
21392
21393 rtx
21394 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
21395 {
21396 REAL_VALUE_TYPE TWO31r;
21397 rtx two31r, tmp[4];
21398 machine_mode mode = GET_MODE (val);
21399 machine_mode scalarmode = GET_MODE_INNER (mode);
21400 machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
21401 rtx (*cmp) (rtx, rtx, rtx, rtx);
21402 int i;
21403
21404 for (i = 0; i < 3; i++)
21405 tmp[i] = gen_reg_rtx (mode);
21406 real_ldexp (&TWO31r, &dconst1, 31);
21407 two31r = const_double_from_real_value (TWO31r, scalarmode);
21408 two31r = ix86_build_const_vector (mode, 1, two31r);
21409 two31r = force_reg (mode, two31r);
21410 switch (mode)
21411 {
21412 case E_V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
21413 case E_V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
21414 case E_V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
21415 case E_V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
21416 default: gcc_unreachable ();
21417 }
21418 tmp[3] = gen_rtx_LE (mode, two31r, val);
21419 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
21420 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
21421 0, OPTAB_DIRECT);
21422 if (intmode == V4SImode || TARGET_AVX2)
21423 *xorp = expand_simple_binop (intmode, ASHIFT,
21424 gen_lowpart (intmode, tmp[0]),
21425 GEN_INT (31), NULL_RTX, 0,
21426 OPTAB_DIRECT);
21427 else
21428 {
21429 rtx two31 = GEN_INT (HOST_WIDE_INT_1U << 31);
21430 two31 = ix86_build_const_vector (intmode, 1, two31);
21431 *xorp = expand_simple_binop (intmode, AND,
21432 gen_lowpart (intmode, tmp[0]),
21433 two31, NULL_RTX, 0,
21434 OPTAB_DIRECT);
21435 }
21436 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
21437 0, OPTAB_DIRECT);
21438 }
21439
21440 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
21441 then replicate the value for all elements of the vector
21442 register. */
21443
21444 rtx
21445 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
21446 {
21447 int i, n_elt;
21448 rtvec v;
21449 machine_mode scalar_mode;
21450
21451 switch (mode)
21452 {
21453 case E_V64QImode:
21454 case E_V32QImode:
21455 case E_V16QImode:
21456 case E_V32HImode:
21457 case E_V16HImode:
21458 case E_V8HImode:
21459 case E_V16SImode:
21460 case E_V8SImode:
21461 case E_V4SImode:
21462 case E_V8DImode:
21463 case E_V4DImode:
21464 case E_V2DImode:
21465 gcc_assert (vect);
21466 /* FALLTHRU */
21467 case E_V16SFmode:
21468 case E_V8SFmode:
21469 case E_V4SFmode:
21470 case E_V8DFmode:
21471 case E_V4DFmode:
21472 case E_V2DFmode:
21473 n_elt = GET_MODE_NUNITS (mode);
21474 v = rtvec_alloc (n_elt);
21475 scalar_mode = GET_MODE_INNER (mode);
21476
21477 RTVEC_ELT (v, 0) = value;
21478
21479 for (i = 1; i < n_elt; ++i)
21480 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
21481
21482 return gen_rtx_CONST_VECTOR (mode, v);
21483
21484 default:
21485 gcc_unreachable ();
21486 }
21487 }
21488
21489 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
21490 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
21491 for an SSE register. If VECT is true, then replicate the mask for
21492 all elements of the vector register. If INVERT is true, then create
21493 a mask excluding the sign bit. */
21494
21495 rtx
21496 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
21497 {
21498 machine_mode vec_mode, imode;
21499 wide_int w;
21500 rtx mask, v;
21501
21502 switch (mode)
21503 {
21504 case E_V16SImode:
21505 case E_V16SFmode:
21506 case E_V8SImode:
21507 case E_V4SImode:
21508 case E_V8SFmode:
21509 case E_V4SFmode:
21510 vec_mode = mode;
21511 imode = SImode;
21512 break;
21513
21514 case E_V8DImode:
21515 case E_V4DImode:
21516 case E_V2DImode:
21517 case E_V8DFmode:
21518 case E_V4DFmode:
21519 case E_V2DFmode:
21520 vec_mode = mode;
21521 imode = DImode;
21522 break;
21523
21524 case E_TImode:
21525 case E_TFmode:
21526 vec_mode = VOIDmode;
21527 imode = TImode;
21528 break;
21529
21530 default:
21531 gcc_unreachable ();
21532 }
21533
21534 machine_mode inner_mode = GET_MODE_INNER (mode);
21535 w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1,
21536 GET_MODE_BITSIZE (inner_mode));
21537 if (invert)
21538 w = wi::bit_not (w);
21539
21540 /* Force this value into the low part of a fp vector constant. */
21541 mask = immed_wide_int_const (w, imode);
21542 mask = gen_lowpart (inner_mode, mask);
21543
21544 if (vec_mode == VOIDmode)
21545 return force_reg (inner_mode, mask);
21546
21547 v = ix86_build_const_vector (vec_mode, vect, mask);
21548 return force_reg (vec_mode, v);
21549 }
21550
21551 /* Generate code for floating point ABS or NEG. */
21552
21553 void
21554 ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode,
21555 rtx operands[])
21556 {
21557 rtx mask, set, dst, src;
21558 bool use_sse = false;
21559 bool vector_mode = VECTOR_MODE_P (mode);
21560 machine_mode vmode = mode;
21561
21562 if (vector_mode)
21563 use_sse = true;
21564 else if (mode == TFmode)
21565 use_sse = true;
21566 else if (TARGET_SSE_MATH)
21567 {
21568 use_sse = SSE_FLOAT_MODE_P (mode);
21569 if (mode == SFmode)
21570 vmode = V4SFmode;
21571 else if (mode == DFmode)
21572 vmode = V2DFmode;
21573 }
21574
21575 /* NEG and ABS performed with SSE use bitwise mask operations.
21576 Create the appropriate mask now. */
21577 if (use_sse)
21578 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
21579 else
21580 mask = NULL_RTX;
21581
21582 dst = operands[0];
21583 src = operands[1];
21584
21585 set = gen_rtx_fmt_e (code, mode, src);
21586 set = gen_rtx_SET (dst, set);
21587
21588 if (mask)
21589 {
21590 rtx use, clob;
21591 rtvec par;
21592
21593 use = gen_rtx_USE (VOIDmode, mask);
21594 if (vector_mode)
21595 par = gen_rtvec (2, set, use);
21596 else
21597 {
21598 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
21599 par = gen_rtvec (3, set, use, clob);
21600 }
21601 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
21602 }
21603 else
21604 emit_insn (set);
21605 }
21606
21607 /* Expand a copysign operation. Special case operand 0 being a constant. */
21608
21609 void
21610 ix86_expand_copysign (rtx operands[])
21611 {
21612 machine_mode mode, vmode;
21613 rtx dest, op0, op1, mask, nmask;
21614
21615 dest = operands[0];
21616 op0 = operands[1];
21617 op1 = operands[2];
21618
21619 mode = GET_MODE (dest);
21620
21621 if (mode == SFmode)
21622 vmode = V4SFmode;
21623 else if (mode == DFmode)
21624 vmode = V2DFmode;
21625 else
21626 vmode = mode;
21627
21628 if (CONST_DOUBLE_P (op0))
21629 {
21630 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
21631
21632 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
21633 op0 = simplify_unary_operation (ABS, mode, op0, mode);
21634
21635 if (mode == SFmode || mode == DFmode)
21636 {
21637 if (op0 == CONST0_RTX (mode))
21638 op0 = CONST0_RTX (vmode);
21639 else
21640 {
21641 rtx v = ix86_build_const_vector (vmode, false, op0);
21642
21643 op0 = force_reg (vmode, v);
21644 }
21645 }
21646 else if (op0 != CONST0_RTX (mode))
21647 op0 = force_reg (mode, op0);
21648
21649 mask = ix86_build_signbit_mask (vmode, 0, 0);
21650
21651 if (mode == SFmode)
21652 copysign_insn = gen_copysignsf3_const;
21653 else if (mode == DFmode)
21654 copysign_insn = gen_copysigndf3_const;
21655 else
21656 copysign_insn = gen_copysigntf3_const;
21657
21658 emit_insn (copysign_insn (dest, op0, op1, mask));
21659 }
21660 else
21661 {
21662 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
21663
21664 nmask = ix86_build_signbit_mask (vmode, 0, 1);
21665 mask = ix86_build_signbit_mask (vmode, 0, 0);
21666
21667 if (mode == SFmode)
21668 copysign_insn = gen_copysignsf3_var;
21669 else if (mode == DFmode)
21670 copysign_insn = gen_copysigndf3_var;
21671 else
21672 copysign_insn = gen_copysigntf3_var;
21673
21674 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
21675 }
21676 }
21677
21678 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
21679 be a constant, and so has already been expanded into a vector constant. */
21680
21681 void
21682 ix86_split_copysign_const (rtx operands[])
21683 {
21684 machine_mode mode, vmode;
21685 rtx dest, op0, mask, x;
21686
21687 dest = operands[0];
21688 op0 = operands[1];
21689 mask = operands[3];
21690
21691 mode = GET_MODE (dest);
21692 vmode = GET_MODE (mask);
21693
21694 dest = lowpart_subreg (vmode, dest, mode);
21695 x = gen_rtx_AND (vmode, dest, mask);
21696 emit_insn (gen_rtx_SET (dest, x));
21697
21698 if (op0 != CONST0_RTX (vmode))
21699 {
21700 x = gen_rtx_IOR (vmode, dest, op0);
21701 emit_insn (gen_rtx_SET (dest, x));
21702 }
21703 }
21704
21705 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
21706 so we have to do two masks. */
21707
21708 void
21709 ix86_split_copysign_var (rtx operands[])
21710 {
21711 machine_mode mode, vmode;
21712 rtx dest, scratch, op0, op1, mask, nmask, x;
21713
21714 dest = operands[0];
21715 scratch = operands[1];
21716 op0 = operands[2];
21717 op1 = operands[3];
21718 nmask = operands[4];
21719 mask = operands[5];
21720
21721 mode = GET_MODE (dest);
21722 vmode = GET_MODE (mask);
21723
21724 if (rtx_equal_p (op0, op1))
21725 {
21726 /* Shouldn't happen often (it's useless, obviously), but when it does
21727 we'd generate incorrect code if we continue below. */
21728 emit_move_insn (dest, op0);
21729 return;
21730 }
21731
21732 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
21733 {
21734 gcc_assert (REGNO (op1) == REGNO (scratch));
21735
21736 x = gen_rtx_AND (vmode, scratch, mask);
21737 emit_insn (gen_rtx_SET (scratch, x));
21738
21739 dest = mask;
21740 op0 = lowpart_subreg (vmode, op0, mode);
21741 x = gen_rtx_NOT (vmode, dest);
21742 x = gen_rtx_AND (vmode, x, op0);
21743 emit_insn (gen_rtx_SET (dest, x));
21744 }
21745 else
21746 {
21747 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
21748 {
21749 x = gen_rtx_AND (vmode, scratch, mask);
21750 }
21751 else /* alternative 2,4 */
21752 {
21753 gcc_assert (REGNO (mask) == REGNO (scratch));
21754 op1 = lowpart_subreg (vmode, op1, mode);
21755 x = gen_rtx_AND (vmode, scratch, op1);
21756 }
21757 emit_insn (gen_rtx_SET (scratch, x));
21758
21759 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
21760 {
21761 dest = lowpart_subreg (vmode, op0, mode);
21762 x = gen_rtx_AND (vmode, dest, nmask);
21763 }
21764 else /* alternative 3,4 */
21765 {
21766 gcc_assert (REGNO (nmask) == REGNO (dest));
21767 dest = nmask;
21768 op0 = lowpart_subreg (vmode, op0, mode);
21769 x = gen_rtx_AND (vmode, dest, op0);
21770 }
21771 emit_insn (gen_rtx_SET (dest, x));
21772 }
21773
21774 x = gen_rtx_IOR (vmode, dest, scratch);
21775 emit_insn (gen_rtx_SET (dest, x));
21776 }
21777
21778 /* Return TRUE or FALSE depending on whether the first SET in INSN
21779 has source and destination with matching CC modes, and that the
21780 CC mode is at least as constrained as REQ_MODE. */
21781
21782 bool
21783 ix86_match_ccmode (rtx insn, machine_mode req_mode)
21784 {
21785 rtx set;
21786 machine_mode set_mode;
21787
21788 set = PATTERN (insn);
21789 if (GET_CODE (set) == PARALLEL)
21790 set = XVECEXP (set, 0, 0);
21791 gcc_assert (GET_CODE (set) == SET);
21792 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
21793
21794 set_mode = GET_MODE (SET_DEST (set));
21795 switch (set_mode)
21796 {
21797 case E_CCNOmode:
21798 if (req_mode != CCNOmode
21799 && (req_mode != CCmode
21800 || XEXP (SET_SRC (set), 1) != const0_rtx))
21801 return false;
21802 break;
21803 case E_CCmode:
21804 if (req_mode == CCGCmode)
21805 return false;
21806 /* FALLTHRU */
21807 case E_CCGCmode:
21808 if (req_mode == CCGOCmode || req_mode == CCNOmode)
21809 return false;
21810 /* FALLTHRU */
21811 case E_CCGOCmode:
21812 if (req_mode == CCZmode)
21813 return false;
21814 /* FALLTHRU */
21815 case E_CCZmode:
21816 break;
21817
21818 case E_CCGZmode:
21819
21820 case E_CCAmode:
21821 case E_CCCmode:
21822 case E_CCOmode:
21823 case E_CCPmode:
21824 case E_CCSmode:
21825 if (set_mode != req_mode)
21826 return false;
21827 break;
21828
21829 default:
21830 gcc_unreachable ();
21831 }
21832
21833 return GET_MODE (SET_SRC (set)) == set_mode;
21834 }
21835
21836 /* Generate insn patterns to do an integer compare of OPERANDS. */
21837
21838 static rtx
21839 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
21840 {
21841 machine_mode cmpmode;
21842 rtx tmp, flags;
21843
21844 cmpmode = SELECT_CC_MODE (code, op0, op1);
21845 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
21846
21847 /* This is very simple, but making the interface the same as in the
21848 FP case makes the rest of the code easier. */
21849 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
21850 emit_insn (gen_rtx_SET (flags, tmp));
21851
21852 /* Return the test that should be put into the flags user, i.e.
21853 the bcc, scc, or cmov instruction. */
21854 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
21855 }
21856
21857 /* Figure out whether to use unordered fp comparisons. */
21858
21859 static bool
21860 ix86_unordered_fp_compare (enum rtx_code code)
21861 {
21862 if (!TARGET_IEEE_FP)
21863 return false;
21864
21865 switch (code)
21866 {
21867 case GT:
21868 case GE:
21869 case LT:
21870 case LE:
21871 return false;
21872
21873 case EQ:
21874 case NE:
21875
21876 case LTGT:
21877 case UNORDERED:
21878 case ORDERED:
21879 case UNLT:
21880 case UNLE:
21881 case UNGT:
21882 case UNGE:
21883 case UNEQ:
21884 return true;
21885
21886 default:
21887 gcc_unreachable ();
21888 }
21889 }
21890
21891 machine_mode
21892 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
21893 {
21894 machine_mode mode = GET_MODE (op0);
21895
21896 if (SCALAR_FLOAT_MODE_P (mode))
21897 {
21898 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
21899 return CCFPmode;
21900 }
21901
21902 switch (code)
21903 {
21904 /* Only zero flag is needed. */
21905 case EQ: /* ZF=0 */
21906 case NE: /* ZF!=0 */
21907 return CCZmode;
21908 /* Codes needing carry flag. */
21909 case GEU: /* CF=0 */
21910 case LTU: /* CF=1 */
21911 /* Detect overflow checks. They need just the carry flag. */
21912 if (GET_CODE (op0) == PLUS
21913 && (rtx_equal_p (op1, XEXP (op0, 0))
21914 || rtx_equal_p (op1, XEXP (op0, 1))))
21915 return CCCmode;
21916 else
21917 return CCmode;
21918 case GTU: /* CF=0 & ZF=0 */
21919 case LEU: /* CF=1 | ZF=1 */
21920 return CCmode;
21921 /* Codes possibly doable only with sign flag when
21922 comparing against zero. */
21923 case GE: /* SF=OF or SF=0 */
21924 case LT: /* SF<>OF or SF=1 */
21925 if (op1 == const0_rtx)
21926 return CCGOCmode;
21927 else
21928 /* For other cases Carry flag is not required. */
21929 return CCGCmode;
21930 /* Codes doable only with sign flag when comparing
21931 against zero, but we miss jump instruction for it
21932 so we need to use relational tests against overflow
21933 that thus needs to be zero. */
21934 case GT: /* ZF=0 & SF=OF */
21935 case LE: /* ZF=1 | SF<>OF */
21936 if (op1 == const0_rtx)
21937 return CCNOmode;
21938 else
21939 return CCGCmode;
21940 /* strcmp pattern do (use flags) and combine may ask us for proper
21941 mode. */
21942 case USE:
21943 return CCmode;
21944 default:
21945 gcc_unreachable ();
21946 }
21947 }
21948
21949 /* Return the fixed registers used for condition codes. */
21950
21951 static bool
21952 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
21953 {
21954 *p1 = FLAGS_REG;
21955 *p2 = FPSR_REG;
21956 return true;
21957 }
21958
21959 /* If two condition code modes are compatible, return a condition code
21960 mode which is compatible with both. Otherwise, return
21961 VOIDmode. */
21962
21963 static machine_mode
21964 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
21965 {
21966 if (m1 == m2)
21967 return m1;
21968
21969 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
21970 return VOIDmode;
21971
21972 if ((m1 == CCGCmode && m2 == CCGOCmode)
21973 || (m1 == CCGOCmode && m2 == CCGCmode))
21974 return CCGCmode;
21975
21976 if ((m1 == CCNOmode && m2 == CCGOCmode)
21977 || (m1 == CCGOCmode && m2 == CCNOmode))
21978 return CCNOmode;
21979
21980 if (m1 == CCZmode
21981 && (m2 == CCGCmode || m2 == CCGOCmode || m2 == CCNOmode))
21982 return m2;
21983 else if (m2 == CCZmode
21984 && (m1 == CCGCmode || m1 == CCGOCmode || m1 == CCNOmode))
21985 return m1;
21986
21987 switch (m1)
21988 {
21989 default:
21990 gcc_unreachable ();
21991
21992 case E_CCmode:
21993 case E_CCGCmode:
21994 case E_CCGOCmode:
21995 case E_CCNOmode:
21996 case E_CCAmode:
21997 case E_CCCmode:
21998 case E_CCOmode:
21999 case E_CCPmode:
22000 case E_CCSmode:
22001 case E_CCZmode:
22002 switch (m2)
22003 {
22004 default:
22005 return VOIDmode;
22006
22007 case E_CCmode:
22008 case E_CCGCmode:
22009 case E_CCGOCmode:
22010 case E_CCNOmode:
22011 case E_CCAmode:
22012 case E_CCCmode:
22013 case E_CCOmode:
22014 case E_CCPmode:
22015 case E_CCSmode:
22016 case E_CCZmode:
22017 return CCmode;
22018 }
22019
22020 case E_CCFPmode:
22021 /* These are only compatible with themselves, which we already
22022 checked above. */
22023 return VOIDmode;
22024 }
22025 }
22026
22027
22028 /* Return a comparison we can do and that it is equivalent to
22029 swap_condition (code) apart possibly from orderedness.
22030 But, never change orderedness if TARGET_IEEE_FP, returning
22031 UNKNOWN in that case if necessary. */
22032
22033 static enum rtx_code
22034 ix86_fp_swap_condition (enum rtx_code code)
22035 {
22036 switch (code)
22037 {
22038 case GT: /* GTU - CF=0 & ZF=0 */
22039 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
22040 case GE: /* GEU - CF=0 */
22041 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
22042 case UNLT: /* LTU - CF=1 */
22043 return TARGET_IEEE_FP ? UNKNOWN : GT;
22044 case UNLE: /* LEU - CF=1 | ZF=1 */
22045 return TARGET_IEEE_FP ? UNKNOWN : GE;
22046 default:
22047 return swap_condition (code);
22048 }
22049 }
22050
22051 /* Return cost of comparison CODE using the best strategy for performance.
22052 All following functions do use number of instructions as a cost metrics.
22053 In future this should be tweaked to compute bytes for optimize_size and
22054 take into account performance of various instructions on various CPUs. */
22055
22056 static int
22057 ix86_fp_comparison_cost (enum rtx_code code)
22058 {
22059 int arith_cost;
22060
22061 /* The cost of code using bit-twiddling on %ah. */
22062 switch (code)
22063 {
22064 case UNLE:
22065 case UNLT:
22066 case LTGT:
22067 case GT:
22068 case GE:
22069 case UNORDERED:
22070 case ORDERED:
22071 case UNEQ:
22072 arith_cost = 4;
22073 break;
22074 case LT:
22075 case NE:
22076 case EQ:
22077 case UNGE:
22078 arith_cost = TARGET_IEEE_FP ? 5 : 4;
22079 break;
22080 case LE:
22081 case UNGT:
22082 arith_cost = TARGET_IEEE_FP ? 6 : 4;
22083 break;
22084 default:
22085 gcc_unreachable ();
22086 }
22087
22088 switch (ix86_fp_comparison_strategy (code))
22089 {
22090 case IX86_FPCMP_COMI:
22091 return arith_cost > 4 ? 3 : 2;
22092 case IX86_FPCMP_SAHF:
22093 return arith_cost > 4 ? 4 : 3;
22094 default:
22095 return arith_cost;
22096 }
22097 }
22098
22099 /* Return strategy to use for floating-point. We assume that fcomi is always
22100 preferrable where available, since that is also true when looking at size
22101 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
22102
22103 enum ix86_fpcmp_strategy
22104 ix86_fp_comparison_strategy (enum rtx_code)
22105 {
22106 /* Do fcomi/sahf based test when profitable. */
22107
22108 if (TARGET_CMOVE)
22109 return IX86_FPCMP_COMI;
22110
22111 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
22112 return IX86_FPCMP_SAHF;
22113
22114 return IX86_FPCMP_ARITH;
22115 }
22116
22117 /* Swap, force into registers, or otherwise massage the two operands
22118 to a fp comparison. The operands are updated in place; the new
22119 comparison code is returned. */
22120
22121 static enum rtx_code
22122 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
22123 {
22124 bool unordered_compare = ix86_unordered_fp_compare (code);
22125 rtx op0 = *pop0, op1 = *pop1;
22126 machine_mode op_mode = GET_MODE (op0);
22127 bool is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
22128
22129 /* All of the unordered compare instructions only work on registers.
22130 The same is true of the fcomi compare instructions. The XFmode
22131 compare instructions require registers except when comparing
22132 against zero or when converting operand 1 from fixed point to
22133 floating point. */
22134
22135 if (!is_sse
22136 && (unordered_compare
22137 || (op_mode == XFmode
22138 && ! (standard_80387_constant_p (op0) == 1
22139 || standard_80387_constant_p (op1) == 1)
22140 && GET_CODE (op1) != FLOAT)
22141 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
22142 {
22143 op0 = force_reg (op_mode, op0);
22144 op1 = force_reg (op_mode, op1);
22145 }
22146 else
22147 {
22148 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
22149 things around if they appear profitable, otherwise force op0
22150 into a register. */
22151
22152 if (standard_80387_constant_p (op0) == 0
22153 || (MEM_P (op0)
22154 && ! (standard_80387_constant_p (op1) == 0
22155 || MEM_P (op1))))
22156 {
22157 enum rtx_code new_code = ix86_fp_swap_condition (code);
22158 if (new_code != UNKNOWN)
22159 {
22160 std::swap (op0, op1);
22161 code = new_code;
22162 }
22163 }
22164
22165 if (!REG_P (op0))
22166 op0 = force_reg (op_mode, op0);
22167
22168 if (CONSTANT_P (op1))
22169 {
22170 int tmp = standard_80387_constant_p (op1);
22171 if (tmp == 0)
22172 op1 = validize_mem (force_const_mem (op_mode, op1));
22173 else if (tmp == 1)
22174 {
22175 if (TARGET_CMOVE)
22176 op1 = force_reg (op_mode, op1);
22177 }
22178 else
22179 op1 = force_reg (op_mode, op1);
22180 }
22181 }
22182
22183 /* Try to rearrange the comparison to make it cheaper. */
22184 if (ix86_fp_comparison_cost (code)
22185 > ix86_fp_comparison_cost (swap_condition (code))
22186 && (REG_P (op1) || can_create_pseudo_p ()))
22187 {
22188 std::swap (op0, op1);
22189 code = swap_condition (code);
22190 if (!REG_P (op0))
22191 op0 = force_reg (op_mode, op0);
22192 }
22193
22194 *pop0 = op0;
22195 *pop1 = op1;
22196 return code;
22197 }
22198
22199 /* Convert comparison codes we use to represent FP comparison to integer
22200 code that will result in proper branch. Return UNKNOWN if no such code
22201 is available. */
22202
22203 enum rtx_code
22204 ix86_fp_compare_code_to_integer (enum rtx_code code)
22205 {
22206 switch (code)
22207 {
22208 case GT:
22209 return GTU;
22210 case GE:
22211 return GEU;
22212 case ORDERED:
22213 case UNORDERED:
22214 return code;
22215 case UNEQ:
22216 return EQ;
22217 case UNLT:
22218 return LTU;
22219 case UNLE:
22220 return LEU;
22221 case LTGT:
22222 return NE;
22223 default:
22224 return UNKNOWN;
22225 }
22226 }
22227
22228 /* Generate insn patterns to do a floating point compare of OPERANDS. */
22229
22230 static rtx
22231 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
22232 {
22233 bool unordered_compare = ix86_unordered_fp_compare (code);
22234 machine_mode intcmp_mode;
22235 rtx tmp, tmp2;
22236
22237 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
22238
22239 /* Do fcomi/sahf based test when profitable. */
22240 switch (ix86_fp_comparison_strategy (code))
22241 {
22242 case IX86_FPCMP_COMI:
22243 intcmp_mode = CCFPmode;
22244 tmp = gen_rtx_COMPARE (CCFPmode, op0, op1);
22245 if (unordered_compare)
22246 tmp = gen_rtx_UNSPEC (CCFPmode, gen_rtvec (1, tmp), UNSPEC_NOTRAP);
22247 emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode, FLAGS_REG), tmp));
22248 break;
22249
22250 case IX86_FPCMP_SAHF:
22251 intcmp_mode = CCFPmode;
22252 tmp = gen_rtx_COMPARE (CCFPmode, op0, op1);
22253 if (unordered_compare)
22254 tmp = gen_rtx_UNSPEC (CCFPmode, gen_rtvec (1, tmp), UNSPEC_NOTRAP);
22255 tmp = gen_rtx_SET (gen_rtx_REG (CCFPmode, FLAGS_REG), tmp);
22256 if (!scratch)
22257 scratch = gen_reg_rtx (HImode);
22258 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
22259 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
22260 break;
22261
22262 case IX86_FPCMP_ARITH:
22263 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
22264 tmp = gen_rtx_COMPARE (CCFPmode, op0, op1);
22265 if (unordered_compare)
22266 tmp = gen_rtx_UNSPEC (CCFPmode, gen_rtvec (1, tmp), UNSPEC_NOTRAP);
22267 tmp = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
22268 if (!scratch)
22269 scratch = gen_reg_rtx (HImode);
22270 emit_insn (gen_rtx_SET (scratch, tmp));
22271
22272 /* In the unordered case, we have to check C2 for NaN's, which
22273 doesn't happen to work out to anything nice combination-wise.
22274 So do some bit twiddling on the value we've got in AH to come
22275 up with an appropriate set of condition codes. */
22276
22277 intcmp_mode = CCNOmode;
22278 switch (code)
22279 {
22280 case GT:
22281 case UNGT:
22282 if (code == GT || !TARGET_IEEE_FP)
22283 {
22284 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x45)));
22285 code = EQ;
22286 }
22287 else
22288 {
22289 emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
22290 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
22291 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
22292 intcmp_mode = CCmode;
22293 code = GEU;
22294 }
22295 break;
22296 case LT:
22297 case UNLT:
22298 if (code == LT && TARGET_IEEE_FP)
22299 {
22300 emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
22301 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
22302 intcmp_mode = CCmode;
22303 code = EQ;
22304 }
22305 else
22306 {
22307 emit_insn (gen_testqi_ext_1_ccno (scratch, const1_rtx));
22308 code = NE;
22309 }
22310 break;
22311 case GE:
22312 case UNGE:
22313 if (code == GE || !TARGET_IEEE_FP)
22314 {
22315 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x05)));
22316 code = EQ;
22317 }
22318 else
22319 {
22320 emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
22321 emit_insn (gen_xorqi_ext_1_cc (scratch, scratch, const1_rtx));
22322 code = NE;
22323 }
22324 break;
22325 case LE:
22326 case UNLE:
22327 if (code == LE && TARGET_IEEE_FP)
22328 {
22329 emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
22330 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
22331 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
22332 intcmp_mode = CCmode;
22333 code = LTU;
22334 }
22335 else
22336 {
22337 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x45)));
22338 code = NE;
22339 }
22340 break;
22341 case EQ:
22342 case UNEQ:
22343 if (code == EQ && TARGET_IEEE_FP)
22344 {
22345 emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
22346 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
22347 intcmp_mode = CCmode;
22348 code = EQ;
22349 }
22350 else
22351 {
22352 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x40)));
22353 code = NE;
22354 }
22355 break;
22356 case NE:
22357 case LTGT:
22358 if (code == NE && TARGET_IEEE_FP)
22359 {
22360 emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
22361 emit_insn (gen_xorqi_ext_1_cc (scratch, scratch,
22362 GEN_INT (0x40)));
22363 code = NE;
22364 }
22365 else
22366 {
22367 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x40)));
22368 code = EQ;
22369 }
22370 break;
22371
22372 case UNORDERED:
22373 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x04)));
22374 code = NE;
22375 break;
22376 case ORDERED:
22377 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x04)));
22378 code = EQ;
22379 break;
22380
22381 default:
22382 gcc_unreachable ();
22383 }
22384 break;
22385
22386 default:
22387 gcc_unreachable();
22388 }
22389
22390 /* Return the test that should be put into the flags user, i.e.
22391 the bcc, scc, or cmov instruction. */
22392 return gen_rtx_fmt_ee (code, VOIDmode,
22393 gen_rtx_REG (intcmp_mode, FLAGS_REG),
22394 const0_rtx);
22395 }
22396
22397 static rtx
22398 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
22399 {
22400 rtx ret;
22401
22402 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
22403 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
22404
22405 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
22406 {
22407 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
22408 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
22409 }
22410 else
22411 ret = ix86_expand_int_compare (code, op0, op1);
22412
22413 return ret;
22414 }
22415
22416 void
22417 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
22418 {
22419 machine_mode mode = GET_MODE (op0);
22420 rtx tmp;
22421
22422 /* Handle special case - vector comparsion with boolean result, transform
22423 it using ptest instruction. */
22424 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22425 {
22426 rtx flag = gen_rtx_REG (CCZmode, FLAGS_REG);
22427 machine_mode p_mode = GET_MODE_SIZE (mode) == 32 ? V4DImode : V2DImode;
22428
22429 gcc_assert (code == EQ || code == NE);
22430 /* Generate XOR since we can't check that one operand is zero vector. */
22431 tmp = gen_reg_rtx (mode);
22432 emit_insn (gen_rtx_SET (tmp, gen_rtx_XOR (mode, op0, op1)));
22433 tmp = gen_lowpart (p_mode, tmp);
22434 emit_insn (gen_rtx_SET (gen_rtx_REG (CCmode, FLAGS_REG),
22435 gen_rtx_UNSPEC (CCmode,
22436 gen_rtvec (2, tmp, tmp),
22437 UNSPEC_PTEST)));
22438 tmp = gen_rtx_fmt_ee (code, VOIDmode, flag, const0_rtx);
22439 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
22440 gen_rtx_LABEL_REF (VOIDmode, label),
22441 pc_rtx);
22442 emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
22443 return;
22444 }
22445
22446 switch (mode)
22447 {
22448 case E_SFmode:
22449 case E_DFmode:
22450 case E_XFmode:
22451 case E_QImode:
22452 case E_HImode:
22453 case E_SImode:
22454 simple:
22455 tmp = ix86_expand_compare (code, op0, op1);
22456 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
22457 gen_rtx_LABEL_REF (VOIDmode, label),
22458 pc_rtx);
22459 emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
22460 return;
22461
22462 case E_DImode:
22463 if (TARGET_64BIT)
22464 goto simple;
22465 /* For 32-bit target DI comparison may be performed on
22466 SSE registers. To allow this we should avoid split
22467 to SI mode which is achieved by doing xor in DI mode
22468 and then comparing with zero (which is recognized by
22469 STV pass). We don't compare using xor when optimizing
22470 for size. */
22471 if (!optimize_insn_for_size_p ()
22472 && TARGET_STV
22473 && (code == EQ || code == NE))
22474 {
22475 op0 = force_reg (mode, gen_rtx_XOR (mode, op0, op1));
22476 op1 = const0_rtx;
22477 }
22478 /* FALLTHRU */
22479 case E_TImode:
22480 /* Expand DImode branch into multiple compare+branch. */
22481 {
22482 rtx lo[2], hi[2];
22483 rtx_code_label *label2;
22484 enum rtx_code code1, code2, code3;
22485 machine_mode submode;
22486
22487 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
22488 {
22489 std::swap (op0, op1);
22490 code = swap_condition (code);
22491 }
22492
22493 split_double_mode (mode, &op0, 1, lo+0, hi+0);
22494 split_double_mode (mode, &op1, 1, lo+1, hi+1);
22495
22496 submode = mode == DImode ? SImode : DImode;
22497
22498 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
22499 avoid two branches. This costs one extra insn, so disable when
22500 optimizing for size. */
22501
22502 if ((code == EQ || code == NE)
22503 && (!optimize_insn_for_size_p ()
22504 || hi[1] == const0_rtx || lo[1] == const0_rtx))
22505 {
22506 rtx xor0, xor1;
22507
22508 xor1 = hi[0];
22509 if (hi[1] != const0_rtx)
22510 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
22511 NULL_RTX, 0, OPTAB_WIDEN);
22512
22513 xor0 = lo[0];
22514 if (lo[1] != const0_rtx)
22515 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
22516 NULL_RTX, 0, OPTAB_WIDEN);
22517
22518 tmp = expand_binop (submode, ior_optab, xor1, xor0,
22519 NULL_RTX, 0, OPTAB_WIDEN);
22520
22521 ix86_expand_branch (code, tmp, const0_rtx, label);
22522 return;
22523 }
22524
22525 /* Otherwise, if we are doing less-than or greater-or-equal-than,
22526 op1 is a constant and the low word is zero, then we can just
22527 examine the high word. Similarly for low word -1 and
22528 less-or-equal-than or greater-than. */
22529
22530 if (CONST_INT_P (hi[1]))
22531 switch (code)
22532 {
22533 case LT: case LTU: case GE: case GEU:
22534 if (lo[1] == const0_rtx)
22535 {
22536 ix86_expand_branch (code, hi[0], hi[1], label);
22537 return;
22538 }
22539 break;
22540 case LE: case LEU: case GT: case GTU:
22541 if (lo[1] == constm1_rtx)
22542 {
22543 ix86_expand_branch (code, hi[0], hi[1], label);
22544 return;
22545 }
22546 break;
22547 default:
22548 break;
22549 }
22550
22551 /* Emulate comparisons that do not depend on Zero flag with
22552 double-word subtraction. Note that only Overflow, Sign
22553 and Carry flags are valid, so swap arguments and condition
22554 of comparisons that would otherwise test Zero flag. */
22555
22556 switch (code)
22557 {
22558 case LE: case LEU: case GT: case GTU:
22559 std::swap (lo[0], lo[1]);
22560 std::swap (hi[0], hi[1]);
22561 code = swap_condition (code);
22562 /* FALLTHRU */
22563
22564 case LT: case LTU: case GE: case GEU:
22565 {
22566 rtx (*cmp_insn) (rtx, rtx);
22567 rtx (*sbb_insn) (rtx, rtx, rtx);
22568 bool uns = (code == LTU || code == GEU);
22569
22570 if (TARGET_64BIT)
22571 {
22572 cmp_insn = gen_cmpdi_1;
22573 sbb_insn
22574 = uns ? gen_subdi3_carry_ccc : gen_subdi3_carry_ccgz;
22575 }
22576 else
22577 {
22578 cmp_insn = gen_cmpsi_1;
22579 sbb_insn
22580 = uns ? gen_subsi3_carry_ccc : gen_subsi3_carry_ccgz;
22581 }
22582
22583 if (!nonimmediate_operand (lo[0], submode))
22584 lo[0] = force_reg (submode, lo[0]);
22585 if (!x86_64_general_operand (lo[1], submode))
22586 lo[1] = force_reg (submode, lo[1]);
22587
22588 if (!register_operand (hi[0], submode))
22589 hi[0] = force_reg (submode, hi[0]);
22590 if ((uns && !nonimmediate_operand (hi[1], submode))
22591 || (!uns && !x86_64_general_operand (hi[1], submode)))
22592 hi[1] = force_reg (submode, hi[1]);
22593
22594 emit_insn (cmp_insn (lo[0], lo[1]));
22595 emit_insn (sbb_insn (gen_rtx_SCRATCH (submode), hi[0], hi[1]));
22596
22597 tmp = gen_rtx_REG (uns ? CCCmode : CCGZmode, FLAGS_REG);
22598
22599 ix86_expand_branch (code, tmp, const0_rtx, label);
22600 return;
22601 }
22602
22603 default:
22604 break;
22605 }
22606
22607 /* Otherwise, we need two or three jumps. */
22608
22609 label2 = gen_label_rtx ();
22610
22611 code1 = code;
22612 code2 = swap_condition (code);
22613 code3 = unsigned_condition (code);
22614
22615 switch (code)
22616 {
22617 case LT: case GT: case LTU: case GTU:
22618 break;
22619
22620 case LE: code1 = LT; code2 = GT; break;
22621 case GE: code1 = GT; code2 = LT; break;
22622 case LEU: code1 = LTU; code2 = GTU; break;
22623 case GEU: code1 = GTU; code2 = LTU; break;
22624
22625 case EQ: code1 = UNKNOWN; code2 = NE; break;
22626 case NE: code2 = UNKNOWN; break;
22627
22628 default:
22629 gcc_unreachable ();
22630 }
22631
22632 /*
22633 * a < b =>
22634 * if (hi(a) < hi(b)) goto true;
22635 * if (hi(a) > hi(b)) goto false;
22636 * if (lo(a) < lo(b)) goto true;
22637 * false:
22638 */
22639
22640 if (code1 != UNKNOWN)
22641 ix86_expand_branch (code1, hi[0], hi[1], label);
22642 if (code2 != UNKNOWN)
22643 ix86_expand_branch (code2, hi[0], hi[1], label2);
22644
22645 ix86_expand_branch (code3, lo[0], lo[1], label);
22646
22647 if (code2 != UNKNOWN)
22648 emit_label (label2);
22649 return;
22650 }
22651
22652 default:
22653 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
22654 goto simple;
22655 }
22656 }
22657
22658 void
22659 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
22660 {
22661 rtx ret;
22662
22663 gcc_assert (GET_MODE (dest) == QImode);
22664
22665 ret = ix86_expand_compare (code, op0, op1);
22666 PUT_MODE (ret, QImode);
22667 emit_insn (gen_rtx_SET (dest, ret));
22668 }
22669
22670 /* Expand comparison setting or clearing carry flag. Return true when
22671 successful and set pop for the operation. */
22672 static bool
22673 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
22674 {
22675 machine_mode mode =
22676 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
22677
22678 /* Do not handle double-mode compares that go through special path. */
22679 if (mode == (TARGET_64BIT ? TImode : DImode))
22680 return false;
22681
22682 if (SCALAR_FLOAT_MODE_P (mode))
22683 {
22684 rtx compare_op;
22685 rtx_insn *compare_seq;
22686
22687 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
22688
22689 /* Shortcut: following common codes never translate
22690 into carry flag compares. */
22691 if (code == EQ || code == NE || code == UNEQ || code == LTGT
22692 || code == ORDERED || code == UNORDERED)
22693 return false;
22694
22695 /* These comparisons require zero flag; swap operands so they won't. */
22696 if ((code == GT || code == UNLE || code == LE || code == UNGT)
22697 && !TARGET_IEEE_FP)
22698 {
22699 std::swap (op0, op1);
22700 code = swap_condition (code);
22701 }
22702
22703 /* Try to expand the comparison and verify that we end up with
22704 carry flag based comparison. This fails to be true only when
22705 we decide to expand comparison using arithmetic that is not
22706 too common scenario. */
22707 start_sequence ();
22708 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
22709 compare_seq = get_insns ();
22710 end_sequence ();
22711
22712 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode)
22713 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
22714 else
22715 code = GET_CODE (compare_op);
22716
22717 if (code != LTU && code != GEU)
22718 return false;
22719
22720 emit_insn (compare_seq);
22721 *pop = compare_op;
22722 return true;
22723 }
22724
22725 if (!INTEGRAL_MODE_P (mode))
22726 return false;
22727
22728 switch (code)
22729 {
22730 case LTU:
22731 case GEU:
22732 break;
22733
22734 /* Convert a==0 into (unsigned)a<1. */
22735 case EQ:
22736 case NE:
22737 if (op1 != const0_rtx)
22738 return false;
22739 op1 = const1_rtx;
22740 code = (code == EQ ? LTU : GEU);
22741 break;
22742
22743 /* Convert a>b into b<a or a>=b-1. */
22744 case GTU:
22745 case LEU:
22746 if (CONST_INT_P (op1))
22747 {
22748 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
22749 /* Bail out on overflow. We still can swap operands but that
22750 would force loading of the constant into register. */
22751 if (op1 == const0_rtx
22752 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
22753 return false;
22754 code = (code == GTU ? GEU : LTU);
22755 }
22756 else
22757 {
22758 std::swap (op0, op1);
22759 code = (code == GTU ? LTU : GEU);
22760 }
22761 break;
22762
22763 /* Convert a>=0 into (unsigned)a<0x80000000. */
22764 case LT:
22765 case GE:
22766 if (mode == DImode || op1 != const0_rtx)
22767 return false;
22768 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
22769 code = (code == LT ? GEU : LTU);
22770 break;
22771 case LE:
22772 case GT:
22773 if (mode == DImode || op1 != constm1_rtx)
22774 return false;
22775 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
22776 code = (code == LE ? GEU : LTU);
22777 break;
22778
22779 default:
22780 return false;
22781 }
22782 /* Swapping operands may cause constant to appear as first operand. */
22783 if (!nonimmediate_operand (op0, VOIDmode))
22784 {
22785 if (!can_create_pseudo_p ())
22786 return false;
22787 op0 = force_reg (mode, op0);
22788 }
22789 *pop = ix86_expand_compare (code, op0, op1);
22790 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
22791 return true;
22792 }
22793
22794 bool
22795 ix86_expand_int_movcc (rtx operands[])
22796 {
22797 enum rtx_code code = GET_CODE (operands[1]), compare_code;
22798 rtx_insn *compare_seq;
22799 rtx compare_op;
22800 machine_mode mode = GET_MODE (operands[0]);
22801 bool sign_bit_compare_p = false;
22802 rtx op0 = XEXP (operands[1], 0);
22803 rtx op1 = XEXP (operands[1], 1);
22804
22805 if (GET_MODE (op0) == TImode
22806 || (GET_MODE (op0) == DImode
22807 && !TARGET_64BIT))
22808 return false;
22809
22810 start_sequence ();
22811 compare_op = ix86_expand_compare (code, op0, op1);
22812 compare_seq = get_insns ();
22813 end_sequence ();
22814
22815 compare_code = GET_CODE (compare_op);
22816
22817 if ((op1 == const0_rtx && (code == GE || code == LT))
22818 || (op1 == constm1_rtx && (code == GT || code == LE)))
22819 sign_bit_compare_p = true;
22820
22821 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
22822 HImode insns, we'd be swallowed in word prefix ops. */
22823
22824 if ((mode != HImode || TARGET_FAST_PREFIX)
22825 && (mode != (TARGET_64BIT ? TImode : DImode))
22826 && CONST_INT_P (operands[2])
22827 && CONST_INT_P (operands[3]))
22828 {
22829 rtx out = operands[0];
22830 HOST_WIDE_INT ct = INTVAL (operands[2]);
22831 HOST_WIDE_INT cf = INTVAL (operands[3]);
22832 HOST_WIDE_INT diff;
22833
22834 diff = ct - cf;
22835 /* Sign bit compares are better done using shifts than we do by using
22836 sbb. */
22837 if (sign_bit_compare_p
22838 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22839 {
22840 /* Detect overlap between destination and compare sources. */
22841 rtx tmp = out;
22842
22843 if (!sign_bit_compare_p)
22844 {
22845 rtx flags;
22846 bool fpcmp = false;
22847
22848 compare_code = GET_CODE (compare_op);
22849
22850 flags = XEXP (compare_op, 0);
22851
22852 if (GET_MODE (flags) == CCFPmode)
22853 {
22854 fpcmp = true;
22855 compare_code
22856 = ix86_fp_compare_code_to_integer (compare_code);
22857 }
22858
22859 /* To simplify rest of code, restrict to the GEU case. */
22860 if (compare_code == LTU)
22861 {
22862 std::swap (ct, cf);
22863 compare_code = reverse_condition (compare_code);
22864 code = reverse_condition (code);
22865 }
22866 else
22867 {
22868 if (fpcmp)
22869 PUT_CODE (compare_op,
22870 reverse_condition_maybe_unordered
22871 (GET_CODE (compare_op)));
22872 else
22873 PUT_CODE (compare_op,
22874 reverse_condition (GET_CODE (compare_op)));
22875 }
22876 diff = ct - cf;
22877
22878 if (reg_overlap_mentioned_p (out, op0)
22879 || reg_overlap_mentioned_p (out, op1))
22880 tmp = gen_reg_rtx (mode);
22881
22882 if (mode == DImode)
22883 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
22884 else
22885 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
22886 flags, compare_op));
22887 }
22888 else
22889 {
22890 if (code == GT || code == GE)
22891 code = reverse_condition (code);
22892 else
22893 {
22894 std::swap (ct, cf);
22895 diff = ct - cf;
22896 }
22897 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
22898 }
22899
22900 if (diff == 1)
22901 {
22902 /*
22903 * cmpl op0,op1
22904 * sbbl dest,dest
22905 * [addl dest, ct]
22906 *
22907 * Size 5 - 8.
22908 */
22909 if (ct)
22910 tmp = expand_simple_binop (mode, PLUS,
22911 tmp, GEN_INT (ct),
22912 copy_rtx (tmp), 1, OPTAB_DIRECT);
22913 }
22914 else if (cf == -1)
22915 {
22916 /*
22917 * cmpl op0,op1
22918 * sbbl dest,dest
22919 * orl $ct, dest
22920 *
22921 * Size 8.
22922 */
22923 tmp = expand_simple_binop (mode, IOR,
22924 tmp, GEN_INT (ct),
22925 copy_rtx (tmp), 1, OPTAB_DIRECT);
22926 }
22927 else if (diff == -1 && ct)
22928 {
22929 /*
22930 * cmpl op0,op1
22931 * sbbl dest,dest
22932 * notl dest
22933 * [addl dest, cf]
22934 *
22935 * Size 8 - 11.
22936 */
22937 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
22938 if (cf)
22939 tmp = expand_simple_binop (mode, PLUS,
22940 copy_rtx (tmp), GEN_INT (cf),
22941 copy_rtx (tmp), 1, OPTAB_DIRECT);
22942 }
22943 else
22944 {
22945 /*
22946 * cmpl op0,op1
22947 * sbbl dest,dest
22948 * [notl dest]
22949 * andl cf - ct, dest
22950 * [addl dest, ct]
22951 *
22952 * Size 8 - 11.
22953 */
22954
22955 if (cf == 0)
22956 {
22957 cf = ct;
22958 ct = 0;
22959 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
22960 }
22961
22962 tmp = expand_simple_binop (mode, AND,
22963 copy_rtx (tmp),
22964 gen_int_mode (cf - ct, mode),
22965 copy_rtx (tmp), 1, OPTAB_DIRECT);
22966 if (ct)
22967 tmp = expand_simple_binop (mode, PLUS,
22968 copy_rtx (tmp), GEN_INT (ct),
22969 copy_rtx (tmp), 1, OPTAB_DIRECT);
22970 }
22971
22972 if (!rtx_equal_p (tmp, out))
22973 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
22974
22975 return true;
22976 }
22977
22978 if (diff < 0)
22979 {
22980 machine_mode cmp_mode = GET_MODE (op0);
22981 enum rtx_code new_code;
22982
22983 if (SCALAR_FLOAT_MODE_P (cmp_mode))
22984 {
22985 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
22986
22987 /* We may be reversing unordered compare to normal compare, that
22988 is not valid in general (we may convert non-trapping condition
22989 to trapping one), however on i386 we currently emit all
22990 comparisons unordered. */
22991 new_code = reverse_condition_maybe_unordered (code);
22992 }
22993 else
22994 new_code = ix86_reverse_condition (code, cmp_mode);
22995 if (new_code != UNKNOWN)
22996 {
22997 std::swap (ct, cf);
22998 diff = -diff;
22999 code = new_code;
23000 }
23001 }
23002
23003 compare_code = UNKNOWN;
23004 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
23005 && CONST_INT_P (op1))
23006 {
23007 if (op1 == const0_rtx
23008 && (code == LT || code == GE))
23009 compare_code = code;
23010 else if (op1 == constm1_rtx)
23011 {
23012 if (code == LE)
23013 compare_code = LT;
23014 else if (code == GT)
23015 compare_code = GE;
23016 }
23017 }
23018
23019 /* Optimize dest = (op0 < 0) ? -1 : cf. */
23020 if (compare_code != UNKNOWN
23021 && GET_MODE (op0) == GET_MODE (out)
23022 && (cf == -1 || ct == -1))
23023 {
23024 /* If lea code below could be used, only optimize
23025 if it results in a 2 insn sequence. */
23026
23027 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
23028 || diff == 3 || diff == 5 || diff == 9)
23029 || (compare_code == LT && ct == -1)
23030 || (compare_code == GE && cf == -1))
23031 {
23032 /*
23033 * notl op1 (if necessary)
23034 * sarl $31, op1
23035 * orl cf, op1
23036 */
23037 if (ct != -1)
23038 {
23039 cf = ct;
23040 ct = -1;
23041 code = reverse_condition (code);
23042 }
23043
23044 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
23045
23046 out = expand_simple_binop (mode, IOR,
23047 out, GEN_INT (cf),
23048 out, 1, OPTAB_DIRECT);
23049 if (out != operands[0])
23050 emit_move_insn (operands[0], out);
23051
23052 return true;
23053 }
23054 }
23055
23056
23057 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
23058 || diff == 3 || diff == 5 || diff == 9)
23059 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
23060 && (mode != DImode
23061 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
23062 {
23063 /*
23064 * xorl dest,dest
23065 * cmpl op1,op2
23066 * setcc dest
23067 * lea cf(dest*(ct-cf)),dest
23068 *
23069 * Size 14.
23070 *
23071 * This also catches the degenerate setcc-only case.
23072 */
23073
23074 rtx tmp;
23075 int nops;
23076
23077 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
23078
23079 nops = 0;
23080 /* On x86_64 the lea instruction operates on Pmode, so we need
23081 to get arithmetics done in proper mode to match. */
23082 if (diff == 1)
23083 tmp = copy_rtx (out);
23084 else
23085 {
23086 rtx out1;
23087 out1 = copy_rtx (out);
23088 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
23089 nops++;
23090 if (diff & 1)
23091 {
23092 tmp = gen_rtx_PLUS (mode, tmp, out1);
23093 nops++;
23094 }
23095 }
23096 if (cf != 0)
23097 {
23098 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
23099 nops++;
23100 }
23101 if (!rtx_equal_p (tmp, out))
23102 {
23103 if (nops == 1)
23104 out = force_operand (tmp, copy_rtx (out));
23105 else
23106 emit_insn (gen_rtx_SET (copy_rtx (out), copy_rtx (tmp)));
23107 }
23108 if (!rtx_equal_p (out, operands[0]))
23109 emit_move_insn (operands[0], copy_rtx (out));
23110
23111 return true;
23112 }
23113
23114 /*
23115 * General case: Jumpful:
23116 * xorl dest,dest cmpl op1, op2
23117 * cmpl op1, op2 movl ct, dest
23118 * setcc dest jcc 1f
23119 * decl dest movl cf, dest
23120 * andl (cf-ct),dest 1:
23121 * addl ct,dest
23122 *
23123 * Size 20. Size 14.
23124 *
23125 * This is reasonably steep, but branch mispredict costs are
23126 * high on modern cpus, so consider failing only if optimizing
23127 * for space.
23128 */
23129
23130 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
23131 && BRANCH_COST (optimize_insn_for_speed_p (),
23132 false) >= 2)
23133 {
23134 if (cf == 0)
23135 {
23136 machine_mode cmp_mode = GET_MODE (op0);
23137 enum rtx_code new_code;
23138
23139 if (SCALAR_FLOAT_MODE_P (cmp_mode))
23140 {
23141 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
23142
23143 /* We may be reversing unordered compare to normal compare,
23144 that is not valid in general (we may convert non-trapping
23145 condition to trapping one), however on i386 we currently
23146 emit all comparisons unordered. */
23147 new_code = reverse_condition_maybe_unordered (code);
23148 }
23149 else
23150 {
23151 new_code = ix86_reverse_condition (code, cmp_mode);
23152 if (compare_code != UNKNOWN && new_code != UNKNOWN)
23153 compare_code = reverse_condition (compare_code);
23154 }
23155
23156 if (new_code != UNKNOWN)
23157 {
23158 cf = ct;
23159 ct = 0;
23160 code = new_code;
23161 }
23162 }
23163
23164 if (compare_code != UNKNOWN)
23165 {
23166 /* notl op1 (if needed)
23167 sarl $31, op1
23168 andl (cf-ct), op1
23169 addl ct, op1
23170
23171 For x < 0 (resp. x <= -1) there will be no notl,
23172 so if possible swap the constants to get rid of the
23173 complement.
23174 True/false will be -1/0 while code below (store flag
23175 followed by decrement) is 0/-1, so the constants need
23176 to be exchanged once more. */
23177
23178 if (compare_code == GE || !cf)
23179 {
23180 code = reverse_condition (code);
23181 compare_code = LT;
23182 }
23183 else
23184 std::swap (ct, cf);
23185
23186 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
23187 }
23188 else
23189 {
23190 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
23191
23192 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
23193 constm1_rtx,
23194 copy_rtx (out), 1, OPTAB_DIRECT);
23195 }
23196
23197 out = expand_simple_binop (mode, AND, copy_rtx (out),
23198 gen_int_mode (cf - ct, mode),
23199 copy_rtx (out), 1, OPTAB_DIRECT);
23200 if (ct)
23201 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
23202 copy_rtx (out), 1, OPTAB_DIRECT);
23203 if (!rtx_equal_p (out, operands[0]))
23204 emit_move_insn (operands[0], copy_rtx (out));
23205
23206 return true;
23207 }
23208 }
23209
23210 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
23211 {
23212 /* Try a few things more with specific constants and a variable. */
23213
23214 optab op;
23215 rtx var, orig_out, out, tmp;
23216
23217 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
23218 return false;
23219
23220 /* If one of the two operands is an interesting constant, load a
23221 constant with the above and mask it in with a logical operation. */
23222
23223 if (CONST_INT_P (operands[2]))
23224 {
23225 var = operands[3];
23226 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
23227 operands[3] = constm1_rtx, op = and_optab;
23228 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
23229 operands[3] = const0_rtx, op = ior_optab;
23230 else
23231 return false;
23232 }
23233 else if (CONST_INT_P (operands[3]))
23234 {
23235 var = operands[2];
23236 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
23237 operands[2] = constm1_rtx, op = and_optab;
23238 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
23239 operands[2] = const0_rtx, op = ior_optab;
23240 else
23241 return false;
23242 }
23243 else
23244 return false;
23245
23246 orig_out = operands[0];
23247 tmp = gen_reg_rtx (mode);
23248 operands[0] = tmp;
23249
23250 /* Recurse to get the constant loaded. */
23251 if (!ix86_expand_int_movcc (operands))
23252 return false;
23253
23254 /* Mask in the interesting variable. */
23255 out = expand_binop (mode, op, var, tmp, orig_out, 0,
23256 OPTAB_WIDEN);
23257 if (!rtx_equal_p (out, orig_out))
23258 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
23259
23260 return true;
23261 }
23262
23263 /*
23264 * For comparison with above,
23265 *
23266 * movl cf,dest
23267 * movl ct,tmp
23268 * cmpl op1,op2
23269 * cmovcc tmp,dest
23270 *
23271 * Size 15.
23272 */
23273
23274 if (! nonimmediate_operand (operands[2], mode))
23275 operands[2] = force_reg (mode, operands[2]);
23276 if (! nonimmediate_operand (operands[3], mode))
23277 operands[3] = force_reg (mode, operands[3]);
23278
23279 if (! register_operand (operands[2], VOIDmode)
23280 && (mode == QImode
23281 || ! register_operand (operands[3], VOIDmode)))
23282 operands[2] = force_reg (mode, operands[2]);
23283
23284 if (mode == QImode
23285 && ! register_operand (operands[3], VOIDmode))
23286 operands[3] = force_reg (mode, operands[3]);
23287
23288 emit_insn (compare_seq);
23289 emit_insn (gen_rtx_SET (operands[0],
23290 gen_rtx_IF_THEN_ELSE (mode,
23291 compare_op, operands[2],
23292 operands[3])));
23293 return true;
23294 }
23295
23296 /* Swap, force into registers, or otherwise massage the two operands
23297 to an sse comparison with a mask result. Thus we differ a bit from
23298 ix86_prepare_fp_compare_args which expects to produce a flags result.
23299
23300 The DEST operand exists to help determine whether to commute commutative
23301 operators. The POP0/POP1 operands are updated in place. The new
23302 comparison code is returned, or UNKNOWN if not implementable. */
23303
23304 static enum rtx_code
23305 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
23306 rtx *pop0, rtx *pop1)
23307 {
23308 switch (code)
23309 {
23310 case LTGT:
23311 case UNEQ:
23312 /* AVX supports all the needed comparisons. */
23313 if (TARGET_AVX)
23314 break;
23315 /* We have no LTGT as an operator. We could implement it with
23316 NE & ORDERED, but this requires an extra temporary. It's
23317 not clear that it's worth it. */
23318 return UNKNOWN;
23319
23320 case LT:
23321 case LE:
23322 case UNGT:
23323 case UNGE:
23324 /* These are supported directly. */
23325 break;
23326
23327 case EQ:
23328 case NE:
23329 case UNORDERED:
23330 case ORDERED:
23331 /* AVX has 3 operand comparisons, no need to swap anything. */
23332 if (TARGET_AVX)
23333 break;
23334 /* For commutative operators, try to canonicalize the destination
23335 operand to be first in the comparison - this helps reload to
23336 avoid extra moves. */
23337 if (!dest || !rtx_equal_p (dest, *pop1))
23338 break;
23339 /* FALLTHRU */
23340
23341 case GE:
23342 case GT:
23343 case UNLE:
23344 case UNLT:
23345 /* These are not supported directly before AVX, and furthermore
23346 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
23347 comparison operands to transform into something that is
23348 supported. */
23349 std::swap (*pop0, *pop1);
23350 code = swap_condition (code);
23351 break;
23352
23353 default:
23354 gcc_unreachable ();
23355 }
23356
23357 return code;
23358 }
23359
23360 /* Detect conditional moves that exactly match min/max operational
23361 semantics. Note that this is IEEE safe, as long as we don't
23362 interchange the operands.
23363
23364 Returns FALSE if this conditional move doesn't match a MIN/MAX,
23365 and TRUE if the operation is successful and instructions are emitted. */
23366
23367 static bool
23368 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
23369 rtx cmp_op1, rtx if_true, rtx if_false)
23370 {
23371 machine_mode mode;
23372 bool is_min;
23373 rtx tmp;
23374
23375 if (code == LT)
23376 ;
23377 else if (code == UNGE)
23378 std::swap (if_true, if_false);
23379 else
23380 return false;
23381
23382 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
23383 is_min = true;
23384 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
23385 is_min = false;
23386 else
23387 return false;
23388
23389 mode = GET_MODE (dest);
23390
23391 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
23392 but MODE may be a vector mode and thus not appropriate. */
23393 if (!flag_finite_math_only || flag_signed_zeros)
23394 {
23395 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
23396 rtvec v;
23397
23398 if_true = force_reg (mode, if_true);
23399 v = gen_rtvec (2, if_true, if_false);
23400 tmp = gen_rtx_UNSPEC (mode, v, u);
23401 }
23402 else
23403 {
23404 code = is_min ? SMIN : SMAX;
23405 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
23406 }
23407
23408 emit_insn (gen_rtx_SET (dest, tmp));
23409 return true;
23410 }
23411
23412 /* Expand an sse vector comparison. Return the register with the result. */
23413
23414 static rtx
23415 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
23416 rtx op_true, rtx op_false)
23417 {
23418 machine_mode mode = GET_MODE (dest);
23419 machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
23420
23421 /* In general case result of comparison can differ from operands' type. */
23422 machine_mode cmp_mode;
23423
23424 /* In AVX512F the result of comparison is an integer mask. */
23425 bool maskcmp = false;
23426 rtx x;
23427
23428 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
23429 {
23430 unsigned int nbits = GET_MODE_NUNITS (cmp_ops_mode);
23431 cmp_mode = int_mode_for_size (nbits, 0).require ();
23432 maskcmp = true;
23433 }
23434 else
23435 cmp_mode = cmp_ops_mode;
23436
23437
23438 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
23439 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
23440 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
23441
23442 if (optimize
23443 || (maskcmp && cmp_mode != mode)
23444 || (op_true && reg_overlap_mentioned_p (dest, op_true))
23445 || (op_false && reg_overlap_mentioned_p (dest, op_false)))
23446 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
23447
23448 /* Compare patterns for int modes are unspec in AVX512F only. */
23449 if (maskcmp && (code == GT || code == EQ))
23450 {
23451 rtx (*gen)(rtx, rtx, rtx);
23452
23453 switch (cmp_ops_mode)
23454 {
23455 case E_V64QImode:
23456 gcc_assert (TARGET_AVX512BW);
23457 gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1;
23458 break;
23459 case E_V32HImode:
23460 gcc_assert (TARGET_AVX512BW);
23461 gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1;
23462 break;
23463 case E_V16SImode:
23464 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
23465 break;
23466 case E_V8DImode:
23467 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
23468 break;
23469 default:
23470 gen = NULL;
23471 }
23472
23473 if (gen)
23474 {
23475 emit_insn (gen (dest, cmp_op0, cmp_op1));
23476 return dest;
23477 }
23478 }
23479 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
23480
23481 if (cmp_mode != mode && !maskcmp)
23482 {
23483 x = force_reg (cmp_ops_mode, x);
23484 convert_move (dest, x, false);
23485 }
23486 else
23487 emit_insn (gen_rtx_SET (dest, x));
23488
23489 return dest;
23490 }
23491
23492 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
23493 operations. This is used for both scalar and vector conditional moves. */
23494
23495 void
23496 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
23497 {
23498 machine_mode mode = GET_MODE (dest);
23499 machine_mode cmpmode = GET_MODE (cmp);
23500
23501 /* In AVX512F the result of comparison is an integer mask. */
23502 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
23503
23504 rtx t2, t3, x;
23505
23506 /* If we have an integer mask and FP value then we need
23507 to cast mask to FP mode. */
23508 if (mode != cmpmode && VECTOR_MODE_P (cmpmode))
23509 {
23510 cmp = force_reg (cmpmode, cmp);
23511 cmp = gen_rtx_SUBREG (mode, cmp, 0);
23512 }
23513
23514 if (vector_all_ones_operand (op_true, mode)
23515 && rtx_equal_p (op_false, CONST0_RTX (mode))
23516 && !maskcmp)
23517 {
23518 emit_insn (gen_rtx_SET (dest, cmp));
23519 }
23520 else if (op_false == CONST0_RTX (mode)
23521 && !maskcmp)
23522 {
23523 op_true = force_reg (mode, op_true);
23524 x = gen_rtx_AND (mode, cmp, op_true);
23525 emit_insn (gen_rtx_SET (dest, x));
23526 }
23527 else if (op_true == CONST0_RTX (mode)
23528 && !maskcmp)
23529 {
23530 op_false = force_reg (mode, op_false);
23531 x = gen_rtx_NOT (mode, cmp);
23532 x = gen_rtx_AND (mode, x, op_false);
23533 emit_insn (gen_rtx_SET (dest, x));
23534 }
23535 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
23536 && !maskcmp)
23537 {
23538 op_false = force_reg (mode, op_false);
23539 x = gen_rtx_IOR (mode, cmp, op_false);
23540 emit_insn (gen_rtx_SET (dest, x));
23541 }
23542 else if (TARGET_XOP
23543 && !maskcmp)
23544 {
23545 op_true = force_reg (mode, op_true);
23546
23547 if (!nonimmediate_operand (op_false, mode))
23548 op_false = force_reg (mode, op_false);
23549
23550 emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cmp,
23551 op_true,
23552 op_false)));
23553 }
23554 else
23555 {
23556 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
23557 rtx d = dest;
23558
23559 if (!nonimmediate_operand (op_true, mode))
23560 op_true = force_reg (mode, op_true);
23561
23562 op_false = force_reg (mode, op_false);
23563
23564 switch (mode)
23565 {
23566 case E_V4SFmode:
23567 if (TARGET_SSE4_1)
23568 gen = gen_sse4_1_blendvps;
23569 break;
23570 case E_V2DFmode:
23571 if (TARGET_SSE4_1)
23572 gen = gen_sse4_1_blendvpd;
23573 break;
23574 case E_V16QImode:
23575 case E_V8HImode:
23576 case E_V4SImode:
23577 case E_V2DImode:
23578 if (TARGET_SSE4_1)
23579 {
23580 gen = gen_sse4_1_pblendvb;
23581 if (mode != V16QImode)
23582 d = gen_reg_rtx (V16QImode);
23583 op_false = gen_lowpart (V16QImode, op_false);
23584 op_true = gen_lowpart (V16QImode, op_true);
23585 cmp = gen_lowpart (V16QImode, cmp);
23586 }
23587 break;
23588 case E_V8SFmode:
23589 if (TARGET_AVX)
23590 gen = gen_avx_blendvps256;
23591 break;
23592 case E_V4DFmode:
23593 if (TARGET_AVX)
23594 gen = gen_avx_blendvpd256;
23595 break;
23596 case E_V32QImode:
23597 case E_V16HImode:
23598 case E_V8SImode:
23599 case E_V4DImode:
23600 if (TARGET_AVX2)
23601 {
23602 gen = gen_avx2_pblendvb;
23603 if (mode != V32QImode)
23604 d = gen_reg_rtx (V32QImode);
23605 op_false = gen_lowpart (V32QImode, op_false);
23606 op_true = gen_lowpart (V32QImode, op_true);
23607 cmp = gen_lowpart (V32QImode, cmp);
23608 }
23609 break;
23610
23611 case E_V64QImode:
23612 gen = gen_avx512bw_blendmv64qi;
23613 break;
23614 case E_V32HImode:
23615 gen = gen_avx512bw_blendmv32hi;
23616 break;
23617 case E_V16SImode:
23618 gen = gen_avx512f_blendmv16si;
23619 break;
23620 case E_V8DImode:
23621 gen = gen_avx512f_blendmv8di;
23622 break;
23623 case E_V8DFmode:
23624 gen = gen_avx512f_blendmv8df;
23625 break;
23626 case E_V16SFmode:
23627 gen = gen_avx512f_blendmv16sf;
23628 break;
23629
23630 default:
23631 break;
23632 }
23633
23634 if (gen != NULL)
23635 {
23636 emit_insn (gen (d, op_false, op_true, cmp));
23637 if (d != dest)
23638 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
23639 }
23640 else
23641 {
23642 op_true = force_reg (mode, op_true);
23643
23644 t2 = gen_reg_rtx (mode);
23645 if (optimize)
23646 t3 = gen_reg_rtx (mode);
23647 else
23648 t3 = dest;
23649
23650 x = gen_rtx_AND (mode, op_true, cmp);
23651 emit_insn (gen_rtx_SET (t2, x));
23652
23653 x = gen_rtx_NOT (mode, cmp);
23654 x = gen_rtx_AND (mode, x, op_false);
23655 emit_insn (gen_rtx_SET (t3, x));
23656
23657 x = gen_rtx_IOR (mode, t3, t2);
23658 emit_insn (gen_rtx_SET (dest, x));
23659 }
23660 }
23661 }
23662
23663 /* Expand a floating-point conditional move. Return true if successful. */
23664
23665 bool
23666 ix86_expand_fp_movcc (rtx operands[])
23667 {
23668 machine_mode mode = GET_MODE (operands[0]);
23669 enum rtx_code code = GET_CODE (operands[1]);
23670 rtx tmp, compare_op;
23671 rtx op0 = XEXP (operands[1], 0);
23672 rtx op1 = XEXP (operands[1], 1);
23673
23674 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
23675 {
23676 machine_mode cmode;
23677
23678 /* Since we've no cmove for sse registers, don't force bad register
23679 allocation just to gain access to it. Deny movcc when the
23680 comparison mode doesn't match the move mode. */
23681 cmode = GET_MODE (op0);
23682 if (cmode == VOIDmode)
23683 cmode = GET_MODE (op1);
23684 if (cmode != mode)
23685 return false;
23686
23687 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
23688 if (code == UNKNOWN)
23689 return false;
23690
23691 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
23692 operands[2], operands[3]))
23693 return true;
23694
23695 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
23696 operands[2], operands[3]);
23697 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
23698 return true;
23699 }
23700
23701 if (GET_MODE (op0) == TImode
23702 || (GET_MODE (op0) == DImode
23703 && !TARGET_64BIT))
23704 return false;
23705
23706 /* The floating point conditional move instructions don't directly
23707 support conditions resulting from a signed integer comparison. */
23708
23709 compare_op = ix86_expand_compare (code, op0, op1);
23710 if (!fcmov_comparison_operator (compare_op, VOIDmode))
23711 {
23712 tmp = gen_reg_rtx (QImode);
23713 ix86_expand_setcc (tmp, code, op0, op1);
23714
23715 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
23716 }
23717
23718 emit_insn (gen_rtx_SET (operands[0],
23719 gen_rtx_IF_THEN_ELSE (mode, compare_op,
23720 operands[2], operands[3])));
23721
23722 return true;
23723 }
23724
23725 /* Helper for ix86_cmp_code_to_pcmp_immediate for int modes. */
23726
23727 static int
23728 ix86_int_cmp_code_to_pcmp_immediate (enum rtx_code code)
23729 {
23730 switch (code)
23731 {
23732 case EQ:
23733 return 0;
23734 case LT:
23735 case LTU:
23736 return 1;
23737 case LE:
23738 case LEU:
23739 return 2;
23740 case NE:
23741 return 4;
23742 case GE:
23743 case GEU:
23744 return 5;
23745 case GT:
23746 case GTU:
23747 return 6;
23748 default:
23749 gcc_unreachable ();
23750 }
23751 }
23752
23753 /* Helper for ix86_cmp_code_to_pcmp_immediate for fp modes. */
23754
23755 static int
23756 ix86_fp_cmp_code_to_pcmp_immediate (enum rtx_code code)
23757 {
23758 switch (code)
23759 {
23760 case EQ:
23761 return 0x00;
23762 case NE:
23763 return 0x04;
23764 case GT:
23765 return 0x0e;
23766 case LE:
23767 return 0x02;
23768 case GE:
23769 return 0x0d;
23770 case LT:
23771 return 0x01;
23772 case UNLE:
23773 return 0x0a;
23774 case UNLT:
23775 return 0x09;
23776 case UNGE:
23777 return 0x05;
23778 case UNGT:
23779 return 0x06;
23780 case UNEQ:
23781 return 0x18;
23782 case LTGT:
23783 return 0x0c;
23784 case ORDERED:
23785 return 0x07;
23786 case UNORDERED:
23787 return 0x03;
23788 default:
23789 gcc_unreachable ();
23790 }
23791 }
23792
23793 /* Return immediate value to be used in UNSPEC_PCMP
23794 for comparison CODE in MODE. */
23795
23796 static int
23797 ix86_cmp_code_to_pcmp_immediate (enum rtx_code code, machine_mode mode)
23798 {
23799 if (FLOAT_MODE_P (mode))
23800 return ix86_fp_cmp_code_to_pcmp_immediate (code);
23801 return ix86_int_cmp_code_to_pcmp_immediate (code);
23802 }
23803
23804 /* Expand AVX-512 vector comparison. */
23805
23806 bool
23807 ix86_expand_mask_vec_cmp (rtx operands[])
23808 {
23809 machine_mode mask_mode = GET_MODE (operands[0]);
23810 machine_mode cmp_mode = GET_MODE (operands[2]);
23811 enum rtx_code code = GET_CODE (operands[1]);
23812 rtx imm = GEN_INT (ix86_cmp_code_to_pcmp_immediate (code, cmp_mode));
23813 int unspec_code;
23814 rtx unspec;
23815
23816 switch (code)
23817 {
23818 case LEU:
23819 case GTU:
23820 case GEU:
23821 case LTU:
23822 unspec_code = UNSPEC_UNSIGNED_PCMP;
23823 break;
23824
23825 default:
23826 unspec_code = UNSPEC_PCMP;
23827 }
23828
23829 unspec = gen_rtx_UNSPEC (mask_mode, gen_rtvec (3, operands[2],
23830 operands[3], imm),
23831 unspec_code);
23832 emit_insn (gen_rtx_SET (operands[0], unspec));
23833
23834 return true;
23835 }
23836
23837 /* Expand fp vector comparison. */
23838
23839 bool
23840 ix86_expand_fp_vec_cmp (rtx operands[])
23841 {
23842 enum rtx_code code = GET_CODE (operands[1]);
23843 rtx cmp;
23844
23845 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
23846 &operands[2], &operands[3]);
23847 if (code == UNKNOWN)
23848 {
23849 rtx temp;
23850 switch (GET_CODE (operands[1]))
23851 {
23852 case LTGT:
23853 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[2],
23854 operands[3], NULL, NULL);
23855 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[2],
23856 operands[3], NULL, NULL);
23857 code = AND;
23858 break;
23859 case UNEQ:
23860 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[2],
23861 operands[3], NULL, NULL);
23862 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[2],
23863 operands[3], NULL, NULL);
23864 code = IOR;
23865 break;
23866 default:
23867 gcc_unreachable ();
23868 }
23869 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
23870 OPTAB_DIRECT);
23871 }
23872 else
23873 cmp = ix86_expand_sse_cmp (operands[0], code, operands[2], operands[3],
23874 operands[1], operands[2]);
23875
23876 if (operands[0] != cmp)
23877 emit_move_insn (operands[0], cmp);
23878
23879 return true;
23880 }
23881
23882 static rtx
23883 ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1,
23884 rtx op_true, rtx op_false, bool *negate)
23885 {
23886 machine_mode data_mode = GET_MODE (dest);
23887 machine_mode mode = GET_MODE (cop0);
23888 rtx x;
23889
23890 *negate = false;
23891
23892 /* XOP supports all of the comparisons on all 128-bit vector int types. */
23893 if (TARGET_XOP
23894 && (mode == V16QImode || mode == V8HImode
23895 || mode == V4SImode || mode == V2DImode))
23896 ;
23897 else
23898 {
23899 /* Canonicalize the comparison to EQ, GT, GTU. */
23900 switch (code)
23901 {
23902 case EQ:
23903 case GT:
23904 case GTU:
23905 break;
23906
23907 case NE:
23908 case LE:
23909 case LEU:
23910 code = reverse_condition (code);
23911 *negate = true;
23912 break;
23913
23914 case GE:
23915 case GEU:
23916 code = reverse_condition (code);
23917 *negate = true;
23918 /* FALLTHRU */
23919
23920 case LT:
23921 case LTU:
23922 std::swap (cop0, cop1);
23923 code = swap_condition (code);
23924 break;
23925
23926 default:
23927 gcc_unreachable ();
23928 }
23929
23930 /* Only SSE4.1/SSE4.2 supports V2DImode. */
23931 if (mode == V2DImode)
23932 {
23933 switch (code)
23934 {
23935 case EQ:
23936 /* SSE4.1 supports EQ. */
23937 if (!TARGET_SSE4_1)
23938 return NULL;
23939 break;
23940
23941 case GT:
23942 case GTU:
23943 /* SSE4.2 supports GT/GTU. */
23944 if (!TARGET_SSE4_2)
23945 return NULL;
23946 break;
23947
23948 default:
23949 gcc_unreachable ();
23950 }
23951 }
23952
23953 /* Unsigned parallel compare is not supported by the hardware.
23954 Play some tricks to turn this into a signed comparison
23955 against 0. */
23956 if (code == GTU)
23957 {
23958 cop0 = force_reg (mode, cop0);
23959
23960 switch (mode)
23961 {
23962 case E_V16SImode:
23963 case E_V8DImode:
23964 case E_V8SImode:
23965 case E_V4DImode:
23966 case E_V4SImode:
23967 case E_V2DImode:
23968 {
23969 rtx t1, t2, mask;
23970 rtx (*gen_sub3) (rtx, rtx, rtx);
23971
23972 switch (mode)
23973 {
23974 case E_V16SImode: gen_sub3 = gen_subv16si3; break;
23975 case E_V8DImode: gen_sub3 = gen_subv8di3; break;
23976 case E_V8SImode: gen_sub3 = gen_subv8si3; break;
23977 case E_V4DImode: gen_sub3 = gen_subv4di3; break;
23978 case E_V4SImode: gen_sub3 = gen_subv4si3; break;
23979 case E_V2DImode: gen_sub3 = gen_subv2di3; break;
23980 default:
23981 gcc_unreachable ();
23982 }
23983 /* Subtract (-(INT MAX) - 1) from both operands to make
23984 them signed. */
23985 mask = ix86_build_signbit_mask (mode, true, false);
23986 t1 = gen_reg_rtx (mode);
23987 emit_insn (gen_sub3 (t1, cop0, mask));
23988
23989 t2 = gen_reg_rtx (mode);
23990 emit_insn (gen_sub3 (t2, cop1, mask));
23991
23992 cop0 = t1;
23993 cop1 = t2;
23994 code = GT;
23995 }
23996 break;
23997
23998 case E_V64QImode:
23999 case E_V32HImode:
24000 case E_V32QImode:
24001 case E_V16HImode:
24002 case E_V16QImode:
24003 case E_V8HImode:
24004 /* Perform a parallel unsigned saturating subtraction. */
24005 x = gen_reg_rtx (mode);
24006 emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, cop0,
24007 cop1)));
24008
24009 cop0 = x;
24010 cop1 = CONST0_RTX (mode);
24011 code = EQ;
24012 *negate = !*negate;
24013 break;
24014
24015 default:
24016 gcc_unreachable ();
24017 }
24018 }
24019 }
24020
24021 if (*negate)
24022 std::swap (op_true, op_false);
24023
24024 /* Allow the comparison to be done in one mode, but the movcc to
24025 happen in another mode. */
24026 if (data_mode == mode)
24027 {
24028 x = ix86_expand_sse_cmp (dest, code, cop0, cop1,
24029 op_true, op_false);
24030 }
24031 else
24032 {
24033 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
24034 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
24035 op_true, op_false);
24036 if (GET_MODE (x) == mode)
24037 x = gen_lowpart (data_mode, x);
24038 }
24039
24040 return x;
24041 }
24042
24043 /* Expand integer vector comparison. */
24044
24045 bool
24046 ix86_expand_int_vec_cmp (rtx operands[])
24047 {
24048 rtx_code code = GET_CODE (operands[1]);
24049 bool negate = false;
24050 rtx cmp = ix86_expand_int_sse_cmp (operands[0], code, operands[2],
24051 operands[3], NULL, NULL, &negate);
24052
24053 if (!cmp)
24054 return false;
24055
24056 if (negate)
24057 cmp = ix86_expand_int_sse_cmp (operands[0], EQ, cmp,
24058 CONST0_RTX (GET_MODE (cmp)),
24059 NULL, NULL, &negate);
24060
24061 gcc_assert (!negate);
24062
24063 if (operands[0] != cmp)
24064 emit_move_insn (operands[0], cmp);
24065
24066 return true;
24067 }
24068
24069 /* Expand a floating-point vector conditional move; a vcond operation
24070 rather than a movcc operation. */
24071
24072 bool
24073 ix86_expand_fp_vcond (rtx operands[])
24074 {
24075 enum rtx_code code = GET_CODE (operands[3]);
24076 rtx cmp;
24077
24078 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
24079 &operands[4], &operands[5]);
24080 if (code == UNKNOWN)
24081 {
24082 rtx temp;
24083 switch (GET_CODE (operands[3]))
24084 {
24085 case LTGT:
24086 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
24087 operands[5], operands[0], operands[0]);
24088 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
24089 operands[5], operands[1], operands[2]);
24090 code = AND;
24091 break;
24092 case UNEQ:
24093 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
24094 operands[5], operands[0], operands[0]);
24095 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
24096 operands[5], operands[1], operands[2]);
24097 code = IOR;
24098 break;
24099 default:
24100 gcc_unreachable ();
24101 }
24102 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
24103 OPTAB_DIRECT);
24104 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
24105 return true;
24106 }
24107
24108 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
24109 operands[5], operands[1], operands[2]))
24110 return true;
24111
24112 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
24113 operands[1], operands[2]);
24114 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
24115 return true;
24116 }
24117
24118 /* Expand a signed/unsigned integral vector conditional move. */
24119
24120 bool
24121 ix86_expand_int_vcond (rtx operands[])
24122 {
24123 machine_mode data_mode = GET_MODE (operands[0]);
24124 machine_mode mode = GET_MODE (operands[4]);
24125 enum rtx_code code = GET_CODE (operands[3]);
24126 bool negate = false;
24127 rtx x, cop0, cop1;
24128
24129 cop0 = operands[4];
24130 cop1 = operands[5];
24131
24132 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
24133 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
24134 if ((code == LT || code == GE)
24135 && data_mode == mode
24136 && cop1 == CONST0_RTX (mode)
24137 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
24138 && GET_MODE_UNIT_SIZE (data_mode) > 1
24139 && GET_MODE_UNIT_SIZE (data_mode) <= 8
24140 && (GET_MODE_SIZE (data_mode) == 16
24141 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
24142 {
24143 rtx negop = operands[2 - (code == LT)];
24144 int shift = GET_MODE_UNIT_BITSIZE (data_mode) - 1;
24145 if (negop == CONST1_RTX (data_mode))
24146 {
24147 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
24148 operands[0], 1, OPTAB_DIRECT);
24149 if (res != operands[0])
24150 emit_move_insn (operands[0], res);
24151 return true;
24152 }
24153 else if (GET_MODE_INNER (data_mode) != DImode
24154 && vector_all_ones_operand (negop, data_mode))
24155 {
24156 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
24157 operands[0], 0, OPTAB_DIRECT);
24158 if (res != operands[0])
24159 emit_move_insn (operands[0], res);
24160 return true;
24161 }
24162 }
24163
24164 if (!nonimmediate_operand (cop1, mode))
24165 cop1 = force_reg (mode, cop1);
24166 if (!general_operand (operands[1], data_mode))
24167 operands[1] = force_reg (data_mode, operands[1]);
24168 if (!general_operand (operands[2], data_mode))
24169 operands[2] = force_reg (data_mode, operands[2]);
24170
24171 x = ix86_expand_int_sse_cmp (operands[0], code, cop0, cop1,
24172 operands[1], operands[2], &negate);
24173
24174 if (!x)
24175 return false;
24176
24177 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
24178 operands[2-negate]);
24179 return true;
24180 }
24181
24182 /* AVX512F does support 64-byte integer vector operations,
24183 thus the longest vector we are faced with is V64QImode. */
24184 #define MAX_VECT_LEN 64
24185
24186 struct expand_vec_perm_d
24187 {
24188 rtx target, op0, op1;
24189 unsigned char perm[MAX_VECT_LEN];
24190 machine_mode vmode;
24191 unsigned char nelt;
24192 bool one_operand_p;
24193 bool testing_p;
24194 };
24195
24196 static bool
24197 ix86_expand_vec_perm_vpermt2 (rtx target, rtx mask, rtx op0, rtx op1,
24198 struct expand_vec_perm_d *d)
24199 {
24200 /* ix86_expand_vec_perm_vpermt2 is called from both const and non-const
24201 expander, so args are either in d, or in op0, op1 etc. */
24202 machine_mode mode = GET_MODE (d ? d->op0 : op0);
24203 machine_mode maskmode = mode;
24204 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
24205
24206 switch (mode)
24207 {
24208 case E_V8HImode:
24209 if (TARGET_AVX512VL && TARGET_AVX512BW)
24210 gen = gen_avx512vl_vpermt2varv8hi3;
24211 break;
24212 case E_V16HImode:
24213 if (TARGET_AVX512VL && TARGET_AVX512BW)
24214 gen = gen_avx512vl_vpermt2varv16hi3;
24215 break;
24216 case E_V64QImode:
24217 if (TARGET_AVX512VBMI)
24218 gen = gen_avx512bw_vpermt2varv64qi3;
24219 break;
24220 case E_V32HImode:
24221 if (TARGET_AVX512BW)
24222 gen = gen_avx512bw_vpermt2varv32hi3;
24223 break;
24224 case E_V4SImode:
24225 if (TARGET_AVX512VL)
24226 gen = gen_avx512vl_vpermt2varv4si3;
24227 break;
24228 case E_V8SImode:
24229 if (TARGET_AVX512VL)
24230 gen = gen_avx512vl_vpermt2varv8si3;
24231 break;
24232 case E_V16SImode:
24233 if (TARGET_AVX512F)
24234 gen = gen_avx512f_vpermt2varv16si3;
24235 break;
24236 case E_V4SFmode:
24237 if (TARGET_AVX512VL)
24238 {
24239 gen = gen_avx512vl_vpermt2varv4sf3;
24240 maskmode = V4SImode;
24241 }
24242 break;
24243 case E_V8SFmode:
24244 if (TARGET_AVX512VL)
24245 {
24246 gen = gen_avx512vl_vpermt2varv8sf3;
24247 maskmode = V8SImode;
24248 }
24249 break;
24250 case E_V16SFmode:
24251 if (TARGET_AVX512F)
24252 {
24253 gen = gen_avx512f_vpermt2varv16sf3;
24254 maskmode = V16SImode;
24255 }
24256 break;
24257 case E_V2DImode:
24258 if (TARGET_AVX512VL)
24259 gen = gen_avx512vl_vpermt2varv2di3;
24260 break;
24261 case E_V4DImode:
24262 if (TARGET_AVX512VL)
24263 gen = gen_avx512vl_vpermt2varv4di3;
24264 break;
24265 case E_V8DImode:
24266 if (TARGET_AVX512F)
24267 gen = gen_avx512f_vpermt2varv8di3;
24268 break;
24269 case E_V2DFmode:
24270 if (TARGET_AVX512VL)
24271 {
24272 gen = gen_avx512vl_vpermt2varv2df3;
24273 maskmode = V2DImode;
24274 }
24275 break;
24276 case E_V4DFmode:
24277 if (TARGET_AVX512VL)
24278 {
24279 gen = gen_avx512vl_vpermt2varv4df3;
24280 maskmode = V4DImode;
24281 }
24282 break;
24283 case E_V8DFmode:
24284 if (TARGET_AVX512F)
24285 {
24286 gen = gen_avx512f_vpermt2varv8df3;
24287 maskmode = V8DImode;
24288 }
24289 break;
24290 default:
24291 break;
24292 }
24293
24294 if (gen == NULL)
24295 return false;
24296
24297 /* ix86_expand_vec_perm_vpermt2 is called from both const and non-const
24298 expander, so args are either in d, or in op0, op1 etc. */
24299 if (d)
24300 {
24301 rtx vec[64];
24302 target = d->target;
24303 op0 = d->op0;
24304 op1 = d->op1;
24305 for (int i = 0; i < d->nelt; ++i)
24306 vec[i] = GEN_INT (d->perm[i]);
24307 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
24308 }
24309
24310 emit_insn (gen (target, force_reg (maskmode, mask), op0, op1));
24311 return true;
24312 }
24313
24314 /* Expand a variable vector permutation. */
24315
24316 void
24317 ix86_expand_vec_perm (rtx operands[])
24318 {
24319 rtx target = operands[0];
24320 rtx op0 = operands[1];
24321 rtx op1 = operands[2];
24322 rtx mask = operands[3];
24323 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
24324 machine_mode mode = GET_MODE (op0);
24325 machine_mode maskmode = GET_MODE (mask);
24326 int w, e, i;
24327 bool one_operand_shuffle = rtx_equal_p (op0, op1);
24328
24329 /* Number of elements in the vector. */
24330 w = GET_MODE_NUNITS (mode);
24331 e = GET_MODE_UNIT_SIZE (mode);
24332 gcc_assert (w <= 64);
24333
24334 if (TARGET_AVX512F && one_operand_shuffle)
24335 {
24336 rtx (*gen) (rtx, rtx, rtx) = NULL;
24337 switch (mode)
24338 {
24339 case E_V16SImode:
24340 gen =gen_avx512f_permvarv16si;
24341 break;
24342 case E_V16SFmode:
24343 gen = gen_avx512f_permvarv16sf;
24344 break;
24345 case E_V8DImode:
24346 gen = gen_avx512f_permvarv8di;
24347 break;
24348 case E_V8DFmode:
24349 gen = gen_avx512f_permvarv8df;
24350 break;
24351 default:
24352 break;
24353 }
24354 if (gen != NULL)
24355 {
24356 emit_insn (gen (target, op0, mask));
24357 return;
24358 }
24359 }
24360
24361 if (ix86_expand_vec_perm_vpermt2 (target, mask, op0, op1, NULL))
24362 return;
24363
24364 if (TARGET_AVX2)
24365 {
24366 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
24367 {
24368 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
24369 an constant shuffle operand. With a tiny bit of effort we can
24370 use VPERMD instead. A re-interpretation stall for V4DFmode is
24371 unfortunate but there's no avoiding it.
24372 Similarly for V16HImode we don't have instructions for variable
24373 shuffling, while for V32QImode we can use after preparing suitable
24374 masks vpshufb; vpshufb; vpermq; vpor. */
24375
24376 if (mode == V16HImode)
24377 {
24378 maskmode = mode = V32QImode;
24379 w = 32;
24380 e = 1;
24381 }
24382 else
24383 {
24384 maskmode = mode = V8SImode;
24385 w = 8;
24386 e = 4;
24387 }
24388 t1 = gen_reg_rtx (maskmode);
24389
24390 /* Replicate the low bits of the V4DImode mask into V8SImode:
24391 mask = { A B C D }
24392 t1 = { A A B B C C D D }. */
24393 for (i = 0; i < w / 2; ++i)
24394 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
24395 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
24396 vt = force_reg (maskmode, vt);
24397 mask = gen_lowpart (maskmode, mask);
24398 if (maskmode == V8SImode)
24399 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
24400 else
24401 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
24402
24403 /* Multiply the shuffle indicies by two. */
24404 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
24405 OPTAB_DIRECT);
24406
24407 /* Add one to the odd shuffle indicies:
24408 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
24409 for (i = 0; i < w / 2; ++i)
24410 {
24411 vec[i * 2] = const0_rtx;
24412 vec[i * 2 + 1] = const1_rtx;
24413 }
24414 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
24415 vt = validize_mem (force_const_mem (maskmode, vt));
24416 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
24417 OPTAB_DIRECT);
24418
24419 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
24420 operands[3] = mask = t1;
24421 target = gen_reg_rtx (mode);
24422 op0 = gen_lowpart (mode, op0);
24423 op1 = gen_lowpart (mode, op1);
24424 }
24425
24426 switch (mode)
24427 {
24428 case E_V8SImode:
24429 /* The VPERMD and VPERMPS instructions already properly ignore
24430 the high bits of the shuffle elements. No need for us to
24431 perform an AND ourselves. */
24432 if (one_operand_shuffle)
24433 {
24434 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
24435 if (target != operands[0])
24436 emit_move_insn (operands[0],
24437 gen_lowpart (GET_MODE (operands[0]), target));
24438 }
24439 else
24440 {
24441 t1 = gen_reg_rtx (V8SImode);
24442 t2 = gen_reg_rtx (V8SImode);
24443 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
24444 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
24445 goto merge_two;
24446 }
24447 return;
24448
24449 case E_V8SFmode:
24450 mask = gen_lowpart (V8SImode, mask);
24451 if (one_operand_shuffle)
24452 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
24453 else
24454 {
24455 t1 = gen_reg_rtx (V8SFmode);
24456 t2 = gen_reg_rtx (V8SFmode);
24457 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
24458 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
24459 goto merge_two;
24460 }
24461 return;
24462
24463 case E_V4SImode:
24464 /* By combining the two 128-bit input vectors into one 256-bit
24465 input vector, we can use VPERMD and VPERMPS for the full
24466 two-operand shuffle. */
24467 t1 = gen_reg_rtx (V8SImode);
24468 t2 = gen_reg_rtx (V8SImode);
24469 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
24470 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
24471 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
24472 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
24473 return;
24474
24475 case E_V4SFmode:
24476 t1 = gen_reg_rtx (V8SFmode);
24477 t2 = gen_reg_rtx (V8SImode);
24478 mask = gen_lowpart (V4SImode, mask);
24479 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
24480 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
24481 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
24482 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
24483 return;
24484
24485 case E_V32QImode:
24486 t1 = gen_reg_rtx (V32QImode);
24487 t2 = gen_reg_rtx (V32QImode);
24488 t3 = gen_reg_rtx (V32QImode);
24489 vt2 = GEN_INT (-128);
24490 vt = gen_const_vec_duplicate (V32QImode, vt2);
24491 vt = force_reg (V32QImode, vt);
24492 for (i = 0; i < 32; i++)
24493 vec[i] = i < 16 ? vt2 : const0_rtx;
24494 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
24495 vt2 = force_reg (V32QImode, vt2);
24496 /* From mask create two adjusted masks, which contain the same
24497 bits as mask in the low 7 bits of each vector element.
24498 The first mask will have the most significant bit clear
24499 if it requests element from the same 128-bit lane
24500 and MSB set if it requests element from the other 128-bit lane.
24501 The second mask will have the opposite values of the MSB,
24502 and additionally will have its 128-bit lanes swapped.
24503 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
24504 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
24505 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
24506 stands for other 12 bytes. */
24507 /* The bit whether element is from the same lane or the other
24508 lane is bit 4, so shift it up by 3 to the MSB position. */
24509 t5 = gen_reg_rtx (V4DImode);
24510 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
24511 GEN_INT (3)));
24512 /* Clear MSB bits from the mask just in case it had them set. */
24513 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
24514 /* After this t1 will have MSB set for elements from other lane. */
24515 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
24516 /* Clear bits other than MSB. */
24517 emit_insn (gen_andv32qi3 (t1, t1, vt));
24518 /* Or in the lower bits from mask into t3. */
24519 emit_insn (gen_iorv32qi3 (t3, t1, t2));
24520 /* And invert MSB bits in t1, so MSB is set for elements from the same
24521 lane. */
24522 emit_insn (gen_xorv32qi3 (t1, t1, vt));
24523 /* Swap 128-bit lanes in t3. */
24524 t6 = gen_reg_rtx (V4DImode);
24525 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
24526 const2_rtx, GEN_INT (3),
24527 const0_rtx, const1_rtx));
24528 /* And or in the lower bits from mask into t1. */
24529 emit_insn (gen_iorv32qi3 (t1, t1, t2));
24530 if (one_operand_shuffle)
24531 {
24532 /* Each of these shuffles will put 0s in places where
24533 element from the other 128-bit lane is needed, otherwise
24534 will shuffle in the requested value. */
24535 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
24536 gen_lowpart (V32QImode, t6)));
24537 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
24538 /* For t3 the 128-bit lanes are swapped again. */
24539 t7 = gen_reg_rtx (V4DImode);
24540 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
24541 const2_rtx, GEN_INT (3),
24542 const0_rtx, const1_rtx));
24543 /* And oring both together leads to the result. */
24544 emit_insn (gen_iorv32qi3 (target, t1,
24545 gen_lowpart (V32QImode, t7)));
24546 if (target != operands[0])
24547 emit_move_insn (operands[0],
24548 gen_lowpart (GET_MODE (operands[0]), target));
24549 return;
24550 }
24551
24552 t4 = gen_reg_rtx (V32QImode);
24553 /* Similarly to the above one_operand_shuffle code,
24554 just for repeated twice for each operand. merge_two:
24555 code will merge the two results together. */
24556 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
24557 gen_lowpart (V32QImode, t6)));
24558 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
24559 gen_lowpart (V32QImode, t6)));
24560 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
24561 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
24562 t7 = gen_reg_rtx (V4DImode);
24563 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
24564 const2_rtx, GEN_INT (3),
24565 const0_rtx, const1_rtx));
24566 t8 = gen_reg_rtx (V4DImode);
24567 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
24568 const2_rtx, GEN_INT (3),
24569 const0_rtx, const1_rtx));
24570 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
24571 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
24572 t1 = t4;
24573 t2 = t3;
24574 goto merge_two;
24575
24576 default:
24577 gcc_assert (GET_MODE_SIZE (mode) <= 16);
24578 break;
24579 }
24580 }
24581
24582 if (TARGET_XOP)
24583 {
24584 /* The XOP VPPERM insn supports three inputs. By ignoring the
24585 one_operand_shuffle special case, we avoid creating another
24586 set of constant vectors in memory. */
24587 one_operand_shuffle = false;
24588
24589 /* mask = mask & {2*w-1, ...} */
24590 vt = GEN_INT (2*w - 1);
24591 }
24592 else
24593 {
24594 /* mask = mask & {w-1, ...} */
24595 vt = GEN_INT (w - 1);
24596 }
24597
24598 vt = gen_const_vec_duplicate (maskmode, vt);
24599 mask = expand_simple_binop (maskmode, AND, mask, vt,
24600 NULL_RTX, 0, OPTAB_DIRECT);
24601
24602 /* For non-QImode operations, convert the word permutation control
24603 into a byte permutation control. */
24604 if (mode != V16QImode)
24605 {
24606 mask = expand_simple_binop (maskmode, ASHIFT, mask,
24607 GEN_INT (exact_log2 (e)),
24608 NULL_RTX, 0, OPTAB_DIRECT);
24609
24610 /* Convert mask to vector of chars. */
24611 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
24612
24613 /* Replicate each of the input bytes into byte positions:
24614 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
24615 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
24616 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
24617 for (i = 0; i < 16; ++i)
24618 vec[i] = GEN_INT (i/e * e);
24619 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
24620 vt = validize_mem (force_const_mem (V16QImode, vt));
24621 if (TARGET_XOP)
24622 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
24623 else
24624 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
24625
24626 /* Convert it into the byte positions by doing
24627 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
24628 for (i = 0; i < 16; ++i)
24629 vec[i] = GEN_INT (i % e);
24630 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
24631 vt = validize_mem (force_const_mem (V16QImode, vt));
24632 emit_insn (gen_addv16qi3 (mask, mask, vt));
24633 }
24634
24635 /* The actual shuffle operations all operate on V16QImode. */
24636 op0 = gen_lowpart (V16QImode, op0);
24637 op1 = gen_lowpart (V16QImode, op1);
24638
24639 if (TARGET_XOP)
24640 {
24641 if (GET_MODE (target) != V16QImode)
24642 target = gen_reg_rtx (V16QImode);
24643 emit_insn (gen_xop_pperm (target, op0, op1, mask));
24644 if (target != operands[0])
24645 emit_move_insn (operands[0],
24646 gen_lowpart (GET_MODE (operands[0]), target));
24647 }
24648 else if (one_operand_shuffle)
24649 {
24650 if (GET_MODE (target) != V16QImode)
24651 target = gen_reg_rtx (V16QImode);
24652 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
24653 if (target != operands[0])
24654 emit_move_insn (operands[0],
24655 gen_lowpart (GET_MODE (operands[0]), target));
24656 }
24657 else
24658 {
24659 rtx xops[6];
24660 bool ok;
24661
24662 /* Shuffle the two input vectors independently. */
24663 t1 = gen_reg_rtx (V16QImode);
24664 t2 = gen_reg_rtx (V16QImode);
24665 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
24666 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
24667
24668 merge_two:
24669 /* Then merge them together. The key is whether any given control
24670 element contained a bit set that indicates the second word. */
24671 mask = operands[3];
24672 vt = GEN_INT (w);
24673 if (maskmode == V2DImode && !TARGET_SSE4_1)
24674 {
24675 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
24676 more shuffle to convert the V2DI input mask into a V4SI
24677 input mask. At which point the masking that expand_int_vcond
24678 will work as desired. */
24679 rtx t3 = gen_reg_rtx (V4SImode);
24680 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
24681 const0_rtx, const0_rtx,
24682 const2_rtx, const2_rtx));
24683 mask = t3;
24684 maskmode = V4SImode;
24685 e = w = 4;
24686 }
24687
24688 vt = gen_const_vec_duplicate (maskmode, vt);
24689 vt = force_reg (maskmode, vt);
24690 mask = expand_simple_binop (maskmode, AND, mask, vt,
24691 NULL_RTX, 0, OPTAB_DIRECT);
24692
24693 if (GET_MODE (target) != mode)
24694 target = gen_reg_rtx (mode);
24695 xops[0] = target;
24696 xops[1] = gen_lowpart (mode, t2);
24697 xops[2] = gen_lowpart (mode, t1);
24698 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
24699 xops[4] = mask;
24700 xops[5] = vt;
24701 ok = ix86_expand_int_vcond (xops);
24702 gcc_assert (ok);
24703 if (target != operands[0])
24704 emit_move_insn (operands[0],
24705 gen_lowpart (GET_MODE (operands[0]), target));
24706 }
24707 }
24708
24709 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
24710 true if we should do zero extension, else sign extension. HIGH_P is
24711 true if we want the N/2 high elements, else the low elements. */
24712
24713 void
24714 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
24715 {
24716 machine_mode imode = GET_MODE (src);
24717 rtx tmp;
24718
24719 if (TARGET_SSE4_1)
24720 {
24721 rtx (*unpack)(rtx, rtx);
24722 rtx (*extract)(rtx, rtx) = NULL;
24723 machine_mode halfmode = BLKmode;
24724
24725 switch (imode)
24726 {
24727 case E_V64QImode:
24728 if (unsigned_p)
24729 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
24730 else
24731 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
24732 halfmode = V32QImode;
24733 extract
24734 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
24735 break;
24736 case E_V32QImode:
24737 if (unsigned_p)
24738 unpack = gen_avx2_zero_extendv16qiv16hi2;
24739 else
24740 unpack = gen_avx2_sign_extendv16qiv16hi2;
24741 halfmode = V16QImode;
24742 extract
24743 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
24744 break;
24745 case E_V32HImode:
24746 if (unsigned_p)
24747 unpack = gen_avx512f_zero_extendv16hiv16si2;
24748 else
24749 unpack = gen_avx512f_sign_extendv16hiv16si2;
24750 halfmode = V16HImode;
24751 extract
24752 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
24753 break;
24754 case E_V16HImode:
24755 if (unsigned_p)
24756 unpack = gen_avx2_zero_extendv8hiv8si2;
24757 else
24758 unpack = gen_avx2_sign_extendv8hiv8si2;
24759 halfmode = V8HImode;
24760 extract
24761 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
24762 break;
24763 case E_V16SImode:
24764 if (unsigned_p)
24765 unpack = gen_avx512f_zero_extendv8siv8di2;
24766 else
24767 unpack = gen_avx512f_sign_extendv8siv8di2;
24768 halfmode = V8SImode;
24769 extract
24770 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
24771 break;
24772 case E_V8SImode:
24773 if (unsigned_p)
24774 unpack = gen_avx2_zero_extendv4siv4di2;
24775 else
24776 unpack = gen_avx2_sign_extendv4siv4di2;
24777 halfmode = V4SImode;
24778 extract
24779 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
24780 break;
24781 case E_V16QImode:
24782 if (unsigned_p)
24783 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
24784 else
24785 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
24786 break;
24787 case E_V8HImode:
24788 if (unsigned_p)
24789 unpack = gen_sse4_1_zero_extendv4hiv4si2;
24790 else
24791 unpack = gen_sse4_1_sign_extendv4hiv4si2;
24792 break;
24793 case E_V4SImode:
24794 if (unsigned_p)
24795 unpack = gen_sse4_1_zero_extendv2siv2di2;
24796 else
24797 unpack = gen_sse4_1_sign_extendv2siv2di2;
24798 break;
24799 default:
24800 gcc_unreachable ();
24801 }
24802
24803 if (GET_MODE_SIZE (imode) >= 32)
24804 {
24805 tmp = gen_reg_rtx (halfmode);
24806 emit_insn (extract (tmp, src));
24807 }
24808 else if (high_p)
24809 {
24810 /* Shift higher 8 bytes to lower 8 bytes. */
24811 tmp = gen_reg_rtx (V1TImode);
24812 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
24813 GEN_INT (64)));
24814 tmp = gen_lowpart (imode, tmp);
24815 }
24816 else
24817 tmp = src;
24818
24819 emit_insn (unpack (dest, tmp));
24820 }
24821 else
24822 {
24823 rtx (*unpack)(rtx, rtx, rtx);
24824
24825 switch (imode)
24826 {
24827 case E_V16QImode:
24828 if (high_p)
24829 unpack = gen_vec_interleave_highv16qi;
24830 else
24831 unpack = gen_vec_interleave_lowv16qi;
24832 break;
24833 case E_V8HImode:
24834 if (high_p)
24835 unpack = gen_vec_interleave_highv8hi;
24836 else
24837 unpack = gen_vec_interleave_lowv8hi;
24838 break;
24839 case E_V4SImode:
24840 if (high_p)
24841 unpack = gen_vec_interleave_highv4si;
24842 else
24843 unpack = gen_vec_interleave_lowv4si;
24844 break;
24845 default:
24846 gcc_unreachable ();
24847 }
24848
24849 if (unsigned_p)
24850 tmp = force_reg (imode, CONST0_RTX (imode));
24851 else
24852 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
24853 src, pc_rtx, pc_rtx);
24854
24855 rtx tmp2 = gen_reg_rtx (imode);
24856 emit_insn (unpack (tmp2, src, tmp));
24857 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
24858 }
24859 }
24860
24861 /* Expand conditional increment or decrement using adb/sbb instructions.
24862 The default case using setcc followed by the conditional move can be
24863 done by generic code. */
24864 bool
24865 ix86_expand_int_addcc (rtx operands[])
24866 {
24867 enum rtx_code code = GET_CODE (operands[1]);
24868 rtx flags;
24869 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
24870 rtx compare_op;
24871 rtx val = const0_rtx;
24872 bool fpcmp = false;
24873 machine_mode mode;
24874 rtx op0 = XEXP (operands[1], 0);
24875 rtx op1 = XEXP (operands[1], 1);
24876
24877 if (operands[3] != const1_rtx
24878 && operands[3] != constm1_rtx)
24879 return false;
24880 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
24881 return false;
24882 code = GET_CODE (compare_op);
24883
24884 flags = XEXP (compare_op, 0);
24885
24886 if (GET_MODE (flags) == CCFPmode)
24887 {
24888 fpcmp = true;
24889 code = ix86_fp_compare_code_to_integer (code);
24890 }
24891
24892 if (code != LTU)
24893 {
24894 val = constm1_rtx;
24895 if (fpcmp)
24896 PUT_CODE (compare_op,
24897 reverse_condition_maybe_unordered
24898 (GET_CODE (compare_op)));
24899 else
24900 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
24901 }
24902
24903 mode = GET_MODE (operands[0]);
24904
24905 /* Construct either adc or sbb insn. */
24906 if ((code == LTU) == (operands[3] == constm1_rtx))
24907 {
24908 switch (mode)
24909 {
24910 case E_QImode:
24911 insn = gen_subqi3_carry;
24912 break;
24913 case E_HImode:
24914 insn = gen_subhi3_carry;
24915 break;
24916 case E_SImode:
24917 insn = gen_subsi3_carry;
24918 break;
24919 case E_DImode:
24920 insn = gen_subdi3_carry;
24921 break;
24922 default:
24923 gcc_unreachable ();
24924 }
24925 }
24926 else
24927 {
24928 switch (mode)
24929 {
24930 case E_QImode:
24931 insn = gen_addqi3_carry;
24932 break;
24933 case E_HImode:
24934 insn = gen_addhi3_carry;
24935 break;
24936 case E_SImode:
24937 insn = gen_addsi3_carry;
24938 break;
24939 case E_DImode:
24940 insn = gen_adddi3_carry;
24941 break;
24942 default:
24943 gcc_unreachable ();
24944 }
24945 }
24946 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
24947
24948 return true;
24949 }
24950
24951
24952 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
24953 but works for floating pointer parameters and nonoffsetable memories.
24954 For pushes, it returns just stack offsets; the values will be saved
24955 in the right order. Maximally three parts are generated. */
24956
24957 static int
24958 ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
24959 {
24960 int size;
24961
24962 if (!TARGET_64BIT)
24963 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
24964 else
24965 size = (GET_MODE_SIZE (mode) + 4) / 8;
24966
24967 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
24968 gcc_assert (size >= 2 && size <= 4);
24969
24970 /* Optimize constant pool reference to immediates. This is used by fp
24971 moves, that force all constants to memory to allow combining. */
24972 if (MEM_P (operand) && MEM_READONLY_P (operand))
24973 operand = avoid_constant_pool_reference (operand);
24974
24975 if (MEM_P (operand) && !offsettable_memref_p (operand))
24976 {
24977 /* The only non-offsetable memories we handle are pushes. */
24978 int ok = push_operand (operand, VOIDmode);
24979
24980 gcc_assert (ok);
24981
24982 operand = copy_rtx (operand);
24983 PUT_MODE (operand, word_mode);
24984 parts[0] = parts[1] = parts[2] = parts[3] = operand;
24985 return size;
24986 }
24987
24988 if (GET_CODE (operand) == CONST_VECTOR)
24989 {
24990 scalar_int_mode imode = int_mode_for_mode (mode).require ();
24991 /* Caution: if we looked through a constant pool memory above,
24992 the operand may actually have a different mode now. That's
24993 ok, since we want to pun this all the way back to an integer. */
24994 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
24995 gcc_assert (operand != NULL);
24996 mode = imode;
24997 }
24998
24999 if (!TARGET_64BIT)
25000 {
25001 if (mode == DImode)
25002 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
25003 else
25004 {
25005 int i;
25006
25007 if (REG_P (operand))
25008 {
25009 gcc_assert (reload_completed);
25010 for (i = 0; i < size; i++)
25011 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
25012 }
25013 else if (offsettable_memref_p (operand))
25014 {
25015 operand = adjust_address (operand, SImode, 0);
25016 parts[0] = operand;
25017 for (i = 1; i < size; i++)
25018 parts[i] = adjust_address (operand, SImode, 4 * i);
25019 }
25020 else if (CONST_DOUBLE_P (operand))
25021 {
25022 const REAL_VALUE_TYPE *r;
25023 long l[4];
25024
25025 r = CONST_DOUBLE_REAL_VALUE (operand);
25026 switch (mode)
25027 {
25028 case E_TFmode:
25029 real_to_target (l, r, mode);
25030 parts[3] = gen_int_mode (l[3], SImode);
25031 parts[2] = gen_int_mode (l[2], SImode);
25032 break;
25033 case E_XFmode:
25034 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
25035 long double may not be 80-bit. */
25036 real_to_target (l, r, mode);
25037 parts[2] = gen_int_mode (l[2], SImode);
25038 break;
25039 case E_DFmode:
25040 REAL_VALUE_TO_TARGET_DOUBLE (*r, l);
25041 break;
25042 default:
25043 gcc_unreachable ();
25044 }
25045 parts[1] = gen_int_mode (l[1], SImode);
25046 parts[0] = gen_int_mode (l[0], SImode);
25047 }
25048 else
25049 gcc_unreachable ();
25050 }
25051 }
25052 else
25053 {
25054 if (mode == TImode)
25055 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
25056 if (mode == XFmode || mode == TFmode)
25057 {
25058 machine_mode upper_mode = mode==XFmode ? SImode : DImode;
25059 if (REG_P (operand))
25060 {
25061 gcc_assert (reload_completed);
25062 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
25063 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
25064 }
25065 else if (offsettable_memref_p (operand))
25066 {
25067 operand = adjust_address (operand, DImode, 0);
25068 parts[0] = operand;
25069 parts[1] = adjust_address (operand, upper_mode, 8);
25070 }
25071 else if (CONST_DOUBLE_P (operand))
25072 {
25073 long l[4];
25074
25075 real_to_target (l, CONST_DOUBLE_REAL_VALUE (operand), mode);
25076
25077 /* real_to_target puts 32-bit pieces in each long. */
25078 parts[0] = gen_int_mode ((l[0] & HOST_WIDE_INT_C (0xffffffff))
25079 | ((l[1] & HOST_WIDE_INT_C (0xffffffff))
25080 << 32), DImode);
25081
25082 if (upper_mode == SImode)
25083 parts[1] = gen_int_mode (l[2], SImode);
25084 else
25085 parts[1]
25086 = gen_int_mode ((l[2] & HOST_WIDE_INT_C (0xffffffff))
25087 | ((l[3] & HOST_WIDE_INT_C (0xffffffff))
25088 << 32), DImode);
25089 }
25090 else
25091 gcc_unreachable ();
25092 }
25093 }
25094
25095 return size;
25096 }
25097
25098 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
25099 Return false when normal moves are needed; true when all required
25100 insns have been emitted. Operands 2-4 contain the input values
25101 int the correct order; operands 5-7 contain the output values. */
25102
25103 void
25104 ix86_split_long_move (rtx operands[])
25105 {
25106 rtx part[2][4];
25107 int nparts, i, j;
25108 int push = 0;
25109 int collisions = 0;
25110 machine_mode mode = GET_MODE (operands[0]);
25111 bool collisionparts[4];
25112
25113 /* The DFmode expanders may ask us to move double.
25114 For 64bit target this is single move. By hiding the fact
25115 here we simplify i386.md splitters. */
25116 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
25117 {
25118 /* Optimize constant pool reference to immediates. This is used by
25119 fp moves, that force all constants to memory to allow combining. */
25120
25121 if (MEM_P (operands[1])
25122 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
25123 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
25124 operands[1] = get_pool_constant (XEXP (operands[1], 0));
25125 if (push_operand (operands[0], VOIDmode))
25126 {
25127 operands[0] = copy_rtx (operands[0]);
25128 PUT_MODE (operands[0], word_mode);
25129 }
25130 else
25131 operands[0] = gen_lowpart (DImode, operands[0]);
25132 operands[1] = gen_lowpart (DImode, operands[1]);
25133 emit_move_insn (operands[0], operands[1]);
25134 return;
25135 }
25136
25137 /* The only non-offsettable memory we handle is push. */
25138 if (push_operand (operands[0], VOIDmode))
25139 push = 1;
25140 else
25141 gcc_assert (!MEM_P (operands[0])
25142 || offsettable_memref_p (operands[0]));
25143
25144 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
25145 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
25146
25147 /* When emitting push, take care for source operands on the stack. */
25148 if (push && MEM_P (operands[1])
25149 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
25150 {
25151 rtx src_base = XEXP (part[1][nparts - 1], 0);
25152
25153 /* Compensate for the stack decrement by 4. */
25154 if (!TARGET_64BIT && nparts == 3
25155 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
25156 src_base = plus_constant (Pmode, src_base, 4);
25157
25158 /* src_base refers to the stack pointer and is
25159 automatically decreased by emitted push. */
25160 for (i = 0; i < nparts; i++)
25161 part[1][i] = change_address (part[1][i],
25162 GET_MODE (part[1][i]), src_base);
25163 }
25164
25165 /* We need to do copy in the right order in case an address register
25166 of the source overlaps the destination. */
25167 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
25168 {
25169 rtx tmp;
25170
25171 for (i = 0; i < nparts; i++)
25172 {
25173 collisionparts[i]
25174 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
25175 if (collisionparts[i])
25176 collisions++;
25177 }
25178
25179 /* Collision in the middle part can be handled by reordering. */
25180 if (collisions == 1 && nparts == 3 && collisionparts [1])
25181 {
25182 std::swap (part[0][1], part[0][2]);
25183 std::swap (part[1][1], part[1][2]);
25184 }
25185 else if (collisions == 1
25186 && nparts == 4
25187 && (collisionparts [1] || collisionparts [2]))
25188 {
25189 if (collisionparts [1])
25190 {
25191 std::swap (part[0][1], part[0][2]);
25192 std::swap (part[1][1], part[1][2]);
25193 }
25194 else
25195 {
25196 std::swap (part[0][2], part[0][3]);
25197 std::swap (part[1][2], part[1][3]);
25198 }
25199 }
25200
25201 /* If there are more collisions, we can't handle it by reordering.
25202 Do an lea to the last part and use only one colliding move. */
25203 else if (collisions > 1)
25204 {
25205 rtx base, addr;
25206
25207 collisions = 1;
25208
25209 base = part[0][nparts - 1];
25210
25211 /* Handle the case when the last part isn't valid for lea.
25212 Happens in 64-bit mode storing the 12-byte XFmode. */
25213 if (GET_MODE (base) != Pmode)
25214 base = gen_rtx_REG (Pmode, REGNO (base));
25215
25216 addr = XEXP (part[1][0], 0);
25217 if (TARGET_TLS_DIRECT_SEG_REFS)
25218 {
25219 struct ix86_address parts;
25220 int ok = ix86_decompose_address (addr, &parts);
25221 gcc_assert (ok);
25222 /* It is not valid to use %gs: or %fs: in lea. */
25223 gcc_assert (parts.seg == ADDR_SPACE_GENERIC);
25224 }
25225 emit_insn (gen_rtx_SET (base, addr));
25226 part[1][0] = replace_equiv_address (part[1][0], base);
25227 for (i = 1; i < nparts; i++)
25228 {
25229 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
25230 part[1][i] = replace_equiv_address (part[1][i], tmp);
25231 }
25232 }
25233 }
25234
25235 if (push)
25236 {
25237 if (!TARGET_64BIT)
25238 {
25239 if (nparts == 3)
25240 {
25241 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
25242 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
25243 stack_pointer_rtx, GEN_INT (-4)));
25244 emit_move_insn (part[0][2], part[1][2]);
25245 }
25246 else if (nparts == 4)
25247 {
25248 emit_move_insn (part[0][3], part[1][3]);
25249 emit_move_insn (part[0][2], part[1][2]);
25250 }
25251 }
25252 else
25253 {
25254 /* In 64bit mode we don't have 32bit push available. In case this is
25255 register, it is OK - we will just use larger counterpart. We also
25256 retype memory - these comes from attempt to avoid REX prefix on
25257 moving of second half of TFmode value. */
25258 if (GET_MODE (part[1][1]) == SImode)
25259 {
25260 switch (GET_CODE (part[1][1]))
25261 {
25262 case MEM:
25263 part[1][1] = adjust_address (part[1][1], DImode, 0);
25264 break;
25265
25266 case REG:
25267 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
25268 break;
25269
25270 default:
25271 gcc_unreachable ();
25272 }
25273
25274 if (GET_MODE (part[1][0]) == SImode)
25275 part[1][0] = part[1][1];
25276 }
25277 }
25278 emit_move_insn (part[0][1], part[1][1]);
25279 emit_move_insn (part[0][0], part[1][0]);
25280 return;
25281 }
25282
25283 /* Choose correct order to not overwrite the source before it is copied. */
25284 if ((REG_P (part[0][0])
25285 && REG_P (part[1][1])
25286 && (REGNO (part[0][0]) == REGNO (part[1][1])
25287 || (nparts == 3
25288 && REGNO (part[0][0]) == REGNO (part[1][2]))
25289 || (nparts == 4
25290 && REGNO (part[0][0]) == REGNO (part[1][3]))))
25291 || (collisions > 0
25292 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
25293 {
25294 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
25295 {
25296 operands[2 + i] = part[0][j];
25297 operands[6 + i] = part[1][j];
25298 }
25299 }
25300 else
25301 {
25302 for (i = 0; i < nparts; i++)
25303 {
25304 operands[2 + i] = part[0][i];
25305 operands[6 + i] = part[1][i];
25306 }
25307 }
25308
25309 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
25310 if (optimize_insn_for_size_p ())
25311 {
25312 for (j = 0; j < nparts - 1; j++)
25313 if (CONST_INT_P (operands[6 + j])
25314 && operands[6 + j] != const0_rtx
25315 && REG_P (operands[2 + j]))
25316 for (i = j; i < nparts - 1; i++)
25317 if (CONST_INT_P (operands[7 + i])
25318 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
25319 operands[7 + i] = operands[2 + j];
25320 }
25321
25322 for (i = 0; i < nparts; i++)
25323 emit_move_insn (operands[2 + i], operands[6 + i]);
25324
25325 return;
25326 }
25327
25328 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
25329 left shift by a constant, either using a single shift or
25330 a sequence of add instructions. */
25331
25332 static void
25333 ix86_expand_ashl_const (rtx operand, int count, machine_mode mode)
25334 {
25335 rtx (*insn)(rtx, rtx, rtx);
25336
25337 if (count == 1
25338 || (count * ix86_cost->add <= ix86_cost->shift_const
25339 && !optimize_insn_for_size_p ()))
25340 {
25341 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
25342 while (count-- > 0)
25343 emit_insn (insn (operand, operand, operand));
25344 }
25345 else
25346 {
25347 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
25348 emit_insn (insn (operand, operand, GEN_INT (count)));
25349 }
25350 }
25351
25352 void
25353 ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
25354 {
25355 rtx (*gen_ashl3)(rtx, rtx, rtx);
25356 rtx (*gen_shld)(rtx, rtx, rtx);
25357 int half_width = GET_MODE_BITSIZE (mode) >> 1;
25358
25359 rtx low[2], high[2];
25360 int count;
25361
25362 if (CONST_INT_P (operands[2]))
25363 {
25364 split_double_mode (mode, operands, 2, low, high);
25365 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
25366
25367 if (count >= half_width)
25368 {
25369 emit_move_insn (high[0], low[1]);
25370 emit_move_insn (low[0], const0_rtx);
25371
25372 if (count > half_width)
25373 ix86_expand_ashl_const (high[0], count - half_width, mode);
25374 }
25375 else
25376 {
25377 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
25378
25379 if (!rtx_equal_p (operands[0], operands[1]))
25380 emit_move_insn (operands[0], operands[1]);
25381
25382 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
25383 ix86_expand_ashl_const (low[0], count, mode);
25384 }
25385 return;
25386 }
25387
25388 split_double_mode (mode, operands, 1, low, high);
25389
25390 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
25391
25392 if (operands[1] == const1_rtx)
25393 {
25394 /* Assuming we've chosen a QImode capable registers, then 1 << N
25395 can be done with two 32/64-bit shifts, no branches, no cmoves. */
25396 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
25397 {
25398 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
25399
25400 ix86_expand_clear (low[0]);
25401 ix86_expand_clear (high[0]);
25402 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
25403
25404 d = gen_lowpart (QImode, low[0]);
25405 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
25406 s = gen_rtx_EQ (QImode, flags, const0_rtx);
25407 emit_insn (gen_rtx_SET (d, s));
25408
25409 d = gen_lowpart (QImode, high[0]);
25410 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
25411 s = gen_rtx_NE (QImode, flags, const0_rtx);
25412 emit_insn (gen_rtx_SET (d, s));
25413 }
25414
25415 /* Otherwise, we can get the same results by manually performing
25416 a bit extract operation on bit 5/6, and then performing the two
25417 shifts. The two methods of getting 0/1 into low/high are exactly
25418 the same size. Avoiding the shift in the bit extract case helps
25419 pentium4 a bit; no one else seems to care much either way. */
25420 else
25421 {
25422 machine_mode half_mode;
25423 rtx (*gen_lshr3)(rtx, rtx, rtx);
25424 rtx (*gen_and3)(rtx, rtx, rtx);
25425 rtx (*gen_xor3)(rtx, rtx, rtx);
25426 HOST_WIDE_INT bits;
25427 rtx x;
25428
25429 if (mode == DImode)
25430 {
25431 half_mode = SImode;
25432 gen_lshr3 = gen_lshrsi3;
25433 gen_and3 = gen_andsi3;
25434 gen_xor3 = gen_xorsi3;
25435 bits = 5;
25436 }
25437 else
25438 {
25439 half_mode = DImode;
25440 gen_lshr3 = gen_lshrdi3;
25441 gen_and3 = gen_anddi3;
25442 gen_xor3 = gen_xordi3;
25443 bits = 6;
25444 }
25445
25446 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
25447 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
25448 else
25449 x = gen_lowpart (half_mode, operands[2]);
25450 emit_insn (gen_rtx_SET (high[0], x));
25451
25452 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
25453 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
25454 emit_move_insn (low[0], high[0]);
25455 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
25456 }
25457
25458 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
25459 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
25460 return;
25461 }
25462
25463 if (operands[1] == constm1_rtx)
25464 {
25465 /* For -1 << N, we can avoid the shld instruction, because we
25466 know that we're shifting 0...31/63 ones into a -1. */
25467 emit_move_insn (low[0], constm1_rtx);
25468 if (optimize_insn_for_size_p ())
25469 emit_move_insn (high[0], low[0]);
25470 else
25471 emit_move_insn (high[0], constm1_rtx);
25472 }
25473 else
25474 {
25475 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
25476
25477 if (!rtx_equal_p (operands[0], operands[1]))
25478 emit_move_insn (operands[0], operands[1]);
25479
25480 split_double_mode (mode, operands, 1, low, high);
25481 emit_insn (gen_shld (high[0], low[0], operands[2]));
25482 }
25483
25484 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
25485
25486 if (TARGET_CMOVE && scratch)
25487 {
25488 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
25489 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
25490
25491 ix86_expand_clear (scratch);
25492 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
25493 }
25494 else
25495 {
25496 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
25497 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
25498
25499 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
25500 }
25501 }
25502
25503 void
25504 ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode)
25505 {
25506 rtx (*gen_ashr3)(rtx, rtx, rtx)
25507 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
25508 rtx (*gen_shrd)(rtx, rtx, rtx);
25509 int half_width = GET_MODE_BITSIZE (mode) >> 1;
25510
25511 rtx low[2], high[2];
25512 int count;
25513
25514 if (CONST_INT_P (operands[2]))
25515 {
25516 split_double_mode (mode, operands, 2, low, high);
25517 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
25518
25519 if (count == GET_MODE_BITSIZE (mode) - 1)
25520 {
25521 emit_move_insn (high[0], high[1]);
25522 emit_insn (gen_ashr3 (high[0], high[0],
25523 GEN_INT (half_width - 1)));
25524 emit_move_insn (low[0], high[0]);
25525
25526 }
25527 else if (count >= half_width)
25528 {
25529 emit_move_insn (low[0], high[1]);
25530 emit_move_insn (high[0], low[0]);
25531 emit_insn (gen_ashr3 (high[0], high[0],
25532 GEN_INT (half_width - 1)));
25533
25534 if (count > half_width)
25535 emit_insn (gen_ashr3 (low[0], low[0],
25536 GEN_INT (count - half_width)));
25537 }
25538 else
25539 {
25540 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
25541
25542 if (!rtx_equal_p (operands[0], operands[1]))
25543 emit_move_insn (operands[0], operands[1]);
25544
25545 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
25546 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
25547 }
25548 }
25549 else
25550 {
25551 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
25552
25553 if (!rtx_equal_p (operands[0], operands[1]))
25554 emit_move_insn (operands[0], operands[1]);
25555
25556 split_double_mode (mode, operands, 1, low, high);
25557
25558 emit_insn (gen_shrd (low[0], high[0], operands[2]));
25559 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
25560
25561 if (TARGET_CMOVE && scratch)
25562 {
25563 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
25564 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
25565
25566 emit_move_insn (scratch, high[0]);
25567 emit_insn (gen_ashr3 (scratch, scratch,
25568 GEN_INT (half_width - 1)));
25569 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
25570 scratch));
25571 }
25572 else
25573 {
25574 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
25575 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
25576
25577 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
25578 }
25579 }
25580 }
25581
25582 void
25583 ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
25584 {
25585 rtx (*gen_lshr3)(rtx, rtx, rtx)
25586 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
25587 rtx (*gen_shrd)(rtx, rtx, rtx);
25588 int half_width = GET_MODE_BITSIZE (mode) >> 1;
25589
25590 rtx low[2], high[2];
25591 int count;
25592
25593 if (CONST_INT_P (operands[2]))
25594 {
25595 split_double_mode (mode, operands, 2, low, high);
25596 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
25597
25598 if (count >= half_width)
25599 {
25600 emit_move_insn (low[0], high[1]);
25601 ix86_expand_clear (high[0]);
25602
25603 if (count > half_width)
25604 emit_insn (gen_lshr3 (low[0], low[0],
25605 GEN_INT (count - half_width)));
25606 }
25607 else
25608 {
25609 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
25610
25611 if (!rtx_equal_p (operands[0], operands[1]))
25612 emit_move_insn (operands[0], operands[1]);
25613
25614 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
25615 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
25616 }
25617 }
25618 else
25619 {
25620 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
25621
25622 if (!rtx_equal_p (operands[0], operands[1]))
25623 emit_move_insn (operands[0], operands[1]);
25624
25625 split_double_mode (mode, operands, 1, low, high);
25626
25627 emit_insn (gen_shrd (low[0], high[0], operands[2]));
25628 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
25629
25630 if (TARGET_CMOVE && scratch)
25631 {
25632 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
25633 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
25634
25635 ix86_expand_clear (scratch);
25636 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
25637 scratch));
25638 }
25639 else
25640 {
25641 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
25642 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
25643
25644 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
25645 }
25646 }
25647 }
25648
25649 /* Predict just emitted jump instruction to be taken with probability PROB. */
25650 static void
25651 predict_jump (int prob)
25652 {
25653 rtx_insn *insn = get_last_insn ();
25654 gcc_assert (JUMP_P (insn));
25655 add_reg_br_prob_note (insn, profile_probability::from_reg_br_prob_base (prob));
25656 }
25657
25658 /* Helper function for the string operations below. Dest VARIABLE whether
25659 it is aligned to VALUE bytes. If true, jump to the label. */
25660 static rtx_code_label *
25661 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
25662 {
25663 rtx_code_label *label = gen_label_rtx ();
25664 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
25665 if (GET_MODE (variable) == DImode)
25666 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
25667 else
25668 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
25669 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
25670 1, label);
25671 if (epilogue)
25672 predict_jump (REG_BR_PROB_BASE * 50 / 100);
25673 else
25674 predict_jump (REG_BR_PROB_BASE * 90 / 100);
25675 return label;
25676 }
25677
25678 /* Adjust COUNTER by the VALUE. */
25679 static void
25680 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
25681 {
25682 rtx (*gen_add)(rtx, rtx, rtx)
25683 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
25684
25685 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
25686 }
25687
25688 /* Zero extend possibly SImode EXP to Pmode register. */
25689 rtx
25690 ix86_zero_extend_to_Pmode (rtx exp)
25691 {
25692 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
25693 }
25694
25695 /* Divide COUNTREG by SCALE. */
25696 static rtx
25697 scale_counter (rtx countreg, int scale)
25698 {
25699 rtx sc;
25700
25701 if (scale == 1)
25702 return countreg;
25703 if (CONST_INT_P (countreg))
25704 return GEN_INT (INTVAL (countreg) / scale);
25705 gcc_assert (REG_P (countreg));
25706
25707 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
25708 GEN_INT (exact_log2 (scale)),
25709 NULL, 1, OPTAB_DIRECT);
25710 return sc;
25711 }
25712
25713 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
25714 DImode for constant loop counts. */
25715
25716 static machine_mode
25717 counter_mode (rtx count_exp)
25718 {
25719 if (GET_MODE (count_exp) != VOIDmode)
25720 return GET_MODE (count_exp);
25721 if (!CONST_INT_P (count_exp))
25722 return Pmode;
25723 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
25724 return DImode;
25725 return SImode;
25726 }
25727
25728 /* Copy the address to a Pmode register. This is used for x32 to
25729 truncate DImode TLS address to a SImode register. */
25730
25731 static rtx
25732 ix86_copy_addr_to_reg (rtx addr)
25733 {
25734 rtx reg;
25735 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
25736 {
25737 reg = copy_addr_to_reg (addr);
25738 REG_POINTER (reg) = 1;
25739 return reg;
25740 }
25741 else
25742 {
25743 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
25744 reg = copy_to_mode_reg (DImode, addr);
25745 REG_POINTER (reg) = 1;
25746 return gen_rtx_SUBREG (SImode, reg, 0);
25747 }
25748 }
25749
25750 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
25751 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
25752 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
25753 memory by VALUE (supposed to be in MODE).
25754
25755 The size is rounded down to whole number of chunk size moved at once.
25756 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
25757
25758
25759 static void
25760 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
25761 rtx destptr, rtx srcptr, rtx value,
25762 rtx count, machine_mode mode, int unroll,
25763 int expected_size, bool issetmem)
25764 {
25765 rtx_code_label *out_label, *top_label;
25766 rtx iter, tmp;
25767 machine_mode iter_mode = counter_mode (count);
25768 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
25769 rtx piece_size = GEN_INT (piece_size_n);
25770 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
25771 rtx size;
25772 int i;
25773
25774 top_label = gen_label_rtx ();
25775 out_label = gen_label_rtx ();
25776 iter = gen_reg_rtx (iter_mode);
25777
25778 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
25779 NULL, 1, OPTAB_DIRECT);
25780 /* Those two should combine. */
25781 if (piece_size == const1_rtx)
25782 {
25783 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
25784 true, out_label);
25785 predict_jump (REG_BR_PROB_BASE * 10 / 100);
25786 }
25787 emit_move_insn (iter, const0_rtx);
25788
25789 emit_label (top_label);
25790
25791 tmp = convert_modes (Pmode, iter_mode, iter, true);
25792
25793 /* This assert could be relaxed - in this case we'll need to compute
25794 smallest power of two, containing in PIECE_SIZE_N and pass it to
25795 offset_address. */
25796 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
25797 destmem = offset_address (destmem, tmp, piece_size_n);
25798 destmem = adjust_address (destmem, mode, 0);
25799
25800 if (!issetmem)
25801 {
25802 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
25803 srcmem = adjust_address (srcmem, mode, 0);
25804
25805 /* When unrolling for chips that reorder memory reads and writes,
25806 we can save registers by using single temporary.
25807 Also using 4 temporaries is overkill in 32bit mode. */
25808 if (!TARGET_64BIT && 0)
25809 {
25810 for (i = 0; i < unroll; i++)
25811 {
25812 if (i)
25813 {
25814 destmem =
25815 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
25816 srcmem =
25817 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
25818 }
25819 emit_move_insn (destmem, srcmem);
25820 }
25821 }
25822 else
25823 {
25824 rtx tmpreg[4];
25825 gcc_assert (unroll <= 4);
25826 for (i = 0; i < unroll; i++)
25827 {
25828 tmpreg[i] = gen_reg_rtx (mode);
25829 if (i)
25830 {
25831 srcmem =
25832 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
25833 }
25834 emit_move_insn (tmpreg[i], srcmem);
25835 }
25836 for (i = 0; i < unroll; i++)
25837 {
25838 if (i)
25839 {
25840 destmem =
25841 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
25842 }
25843 emit_move_insn (destmem, tmpreg[i]);
25844 }
25845 }
25846 }
25847 else
25848 for (i = 0; i < unroll; i++)
25849 {
25850 if (i)
25851 destmem =
25852 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
25853 emit_move_insn (destmem, value);
25854 }
25855
25856 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
25857 true, OPTAB_LIB_WIDEN);
25858 if (tmp != iter)
25859 emit_move_insn (iter, tmp);
25860
25861 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
25862 true, top_label);
25863 if (expected_size != -1)
25864 {
25865 expected_size /= GET_MODE_SIZE (mode) * unroll;
25866 if (expected_size == 0)
25867 predict_jump (0);
25868 else if (expected_size > REG_BR_PROB_BASE)
25869 predict_jump (REG_BR_PROB_BASE - 1);
25870 else
25871 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
25872 }
25873 else
25874 predict_jump (REG_BR_PROB_BASE * 80 / 100);
25875 iter = ix86_zero_extend_to_Pmode (iter);
25876 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
25877 true, OPTAB_LIB_WIDEN);
25878 if (tmp != destptr)
25879 emit_move_insn (destptr, tmp);
25880 if (!issetmem)
25881 {
25882 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
25883 true, OPTAB_LIB_WIDEN);
25884 if (tmp != srcptr)
25885 emit_move_insn (srcptr, tmp);
25886 }
25887 emit_label (out_label);
25888 }
25889
25890 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
25891 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
25892 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
25893 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
25894 ORIG_VALUE is the original value passed to memset to fill the memory with.
25895 Other arguments have same meaning as for previous function. */
25896
25897 static void
25898 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
25899 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
25900 rtx count,
25901 machine_mode mode, bool issetmem)
25902 {
25903 rtx destexp;
25904 rtx srcexp;
25905 rtx countreg;
25906 HOST_WIDE_INT rounded_count;
25907
25908 /* If possible, it is shorter to use rep movs.
25909 TODO: Maybe it is better to move this logic to decide_alg. */
25910 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
25911 && (!issetmem || orig_value == const0_rtx))
25912 mode = SImode;
25913
25914 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
25915 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
25916
25917 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
25918 GET_MODE_SIZE (mode)));
25919 if (mode != QImode)
25920 {
25921 destexp = gen_rtx_ASHIFT (Pmode, countreg,
25922 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
25923 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
25924 }
25925 else
25926 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
25927 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
25928 {
25929 rounded_count
25930 = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode));
25931 destmem = shallow_copy_rtx (destmem);
25932 set_mem_size (destmem, rounded_count);
25933 }
25934 else if (MEM_SIZE_KNOWN_P (destmem))
25935 clear_mem_size (destmem);
25936
25937 if (issetmem)
25938 {
25939 value = force_reg (mode, gen_lowpart (mode, value));
25940 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
25941 }
25942 else
25943 {
25944 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
25945 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
25946 if (mode != QImode)
25947 {
25948 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
25949 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
25950 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
25951 }
25952 else
25953 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
25954 if (CONST_INT_P (count))
25955 {
25956 rounded_count
25957 = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode));
25958 srcmem = shallow_copy_rtx (srcmem);
25959 set_mem_size (srcmem, rounded_count);
25960 }
25961 else
25962 {
25963 if (MEM_SIZE_KNOWN_P (srcmem))
25964 clear_mem_size (srcmem);
25965 }
25966 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
25967 destexp, srcexp));
25968 }
25969 }
25970
25971 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
25972 DESTMEM.
25973 SRC is passed by pointer to be updated on return.
25974 Return value is updated DST. */
25975 static rtx
25976 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
25977 HOST_WIDE_INT size_to_move)
25978 {
25979 rtx dst = destmem, src = *srcmem, adjust, tempreg;
25980 enum insn_code code;
25981 machine_mode move_mode;
25982 int piece_size, i;
25983
25984 /* Find the widest mode in which we could perform moves.
25985 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
25986 it until move of such size is supported. */
25987 piece_size = 1 << floor_log2 (size_to_move);
25988 while (!int_mode_for_size (piece_size * BITS_PER_UNIT, 0).exists (&move_mode)
25989 || (code = optab_handler (mov_optab, move_mode)) == CODE_FOR_nothing)
25990 {
25991 gcc_assert (piece_size > 1);
25992 piece_size >>= 1;
25993 }
25994
25995 /* Find the corresponding vector mode with the same size as MOVE_MODE.
25996 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
25997 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
25998 {
25999 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
26000 if (!mode_for_vector (word_mode, nunits).exists (&move_mode)
26001 || (code = optab_handler (mov_optab, move_mode)) == CODE_FOR_nothing)
26002 {
26003 move_mode = word_mode;
26004 piece_size = GET_MODE_SIZE (move_mode);
26005 code = optab_handler (mov_optab, move_mode);
26006 }
26007 }
26008 gcc_assert (code != CODE_FOR_nothing);
26009
26010 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
26011 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
26012
26013 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
26014 gcc_assert (size_to_move % piece_size == 0);
26015 adjust = GEN_INT (piece_size);
26016 for (i = 0; i < size_to_move; i += piece_size)
26017 {
26018 /* We move from memory to memory, so we'll need to do it via
26019 a temporary register. */
26020 tempreg = gen_reg_rtx (move_mode);
26021 emit_insn (GEN_FCN (code) (tempreg, src));
26022 emit_insn (GEN_FCN (code) (dst, tempreg));
26023
26024 emit_move_insn (destptr,
26025 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
26026 emit_move_insn (srcptr,
26027 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
26028
26029 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
26030 piece_size);
26031 src = adjust_automodify_address_nv (src, move_mode, srcptr,
26032 piece_size);
26033 }
26034
26035 /* Update DST and SRC rtx. */
26036 *srcmem = src;
26037 return dst;
26038 }
26039
26040 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
26041 static void
26042 expand_movmem_epilogue (rtx destmem, rtx srcmem,
26043 rtx destptr, rtx srcptr, rtx count, int max_size)
26044 {
26045 rtx src, dest;
26046 if (CONST_INT_P (count))
26047 {
26048 HOST_WIDE_INT countval = INTVAL (count);
26049 HOST_WIDE_INT epilogue_size = countval % max_size;
26050 int i;
26051
26052 /* For now MAX_SIZE should be a power of 2. This assert could be
26053 relaxed, but it'll require a bit more complicated epilogue
26054 expanding. */
26055 gcc_assert ((max_size & (max_size - 1)) == 0);
26056 for (i = max_size; i >= 1; i >>= 1)
26057 {
26058 if (epilogue_size & i)
26059 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
26060 }
26061 return;
26062 }
26063 if (max_size > 8)
26064 {
26065 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
26066 count, 1, OPTAB_DIRECT);
26067 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
26068 count, QImode, 1, 4, false);
26069 return;
26070 }
26071
26072 /* When there are stringops, we can cheaply increase dest and src pointers.
26073 Otherwise we save code size by maintaining offset (zero is readily
26074 available from preceding rep operation) and using x86 addressing modes.
26075 */
26076 if (TARGET_SINGLE_STRINGOP)
26077 {
26078 if (max_size > 4)
26079 {
26080 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
26081 src = change_address (srcmem, SImode, srcptr);
26082 dest = change_address (destmem, SImode, destptr);
26083 emit_insn (gen_strmov (destptr, dest, srcptr, src));
26084 emit_label (label);
26085 LABEL_NUSES (label) = 1;
26086 }
26087 if (max_size > 2)
26088 {
26089 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
26090 src = change_address (srcmem, HImode, srcptr);
26091 dest = change_address (destmem, HImode, destptr);
26092 emit_insn (gen_strmov (destptr, dest, srcptr, src));
26093 emit_label (label);
26094 LABEL_NUSES (label) = 1;
26095 }
26096 if (max_size > 1)
26097 {
26098 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
26099 src = change_address (srcmem, QImode, srcptr);
26100 dest = change_address (destmem, QImode, destptr);
26101 emit_insn (gen_strmov (destptr, dest, srcptr, src));
26102 emit_label (label);
26103 LABEL_NUSES (label) = 1;
26104 }
26105 }
26106 else
26107 {
26108 rtx offset = force_reg (Pmode, const0_rtx);
26109 rtx tmp;
26110
26111 if (max_size > 4)
26112 {
26113 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
26114 src = change_address (srcmem, SImode, srcptr);
26115 dest = change_address (destmem, SImode, destptr);
26116 emit_move_insn (dest, src);
26117 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
26118 true, OPTAB_LIB_WIDEN);
26119 if (tmp != offset)
26120 emit_move_insn (offset, tmp);
26121 emit_label (label);
26122 LABEL_NUSES (label) = 1;
26123 }
26124 if (max_size > 2)
26125 {
26126 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
26127 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
26128 src = change_address (srcmem, HImode, tmp);
26129 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
26130 dest = change_address (destmem, HImode, tmp);
26131 emit_move_insn (dest, src);
26132 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
26133 true, OPTAB_LIB_WIDEN);
26134 if (tmp != offset)
26135 emit_move_insn (offset, tmp);
26136 emit_label (label);
26137 LABEL_NUSES (label) = 1;
26138 }
26139 if (max_size > 1)
26140 {
26141 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
26142 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
26143 src = change_address (srcmem, QImode, tmp);
26144 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
26145 dest = change_address (destmem, QImode, tmp);
26146 emit_move_insn (dest, src);
26147 emit_label (label);
26148 LABEL_NUSES (label) = 1;
26149 }
26150 }
26151 }
26152
26153 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
26154 with value PROMOTED_VAL.
26155 SRC is passed by pointer to be updated on return.
26156 Return value is updated DST. */
26157 static rtx
26158 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
26159 HOST_WIDE_INT size_to_move)
26160 {
26161 rtx dst = destmem, adjust;
26162 enum insn_code code;
26163 machine_mode move_mode;
26164 int piece_size, i;
26165
26166 /* Find the widest mode in which we could perform moves.
26167 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
26168 it until move of such size is supported. */
26169 move_mode = GET_MODE (promoted_val);
26170 if (move_mode == VOIDmode)
26171 move_mode = QImode;
26172 if (size_to_move < GET_MODE_SIZE (move_mode))
26173 {
26174 unsigned int move_bits = size_to_move * BITS_PER_UNIT;
26175 move_mode = int_mode_for_size (move_bits, 0).require ();
26176 promoted_val = gen_lowpart (move_mode, promoted_val);
26177 }
26178 piece_size = GET_MODE_SIZE (move_mode);
26179 code = optab_handler (mov_optab, move_mode);
26180 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
26181
26182 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
26183
26184 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
26185 gcc_assert (size_to_move % piece_size == 0);
26186 adjust = GEN_INT (piece_size);
26187 for (i = 0; i < size_to_move; i += piece_size)
26188 {
26189 if (piece_size <= GET_MODE_SIZE (word_mode))
26190 {
26191 emit_insn (gen_strset (destptr, dst, promoted_val));
26192 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
26193 piece_size);
26194 continue;
26195 }
26196
26197 emit_insn (GEN_FCN (code) (dst, promoted_val));
26198
26199 emit_move_insn (destptr,
26200 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
26201
26202 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
26203 piece_size);
26204 }
26205
26206 /* Update DST rtx. */
26207 return dst;
26208 }
26209 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
26210 static void
26211 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
26212 rtx count, int max_size)
26213 {
26214 count =
26215 expand_simple_binop (counter_mode (count), AND, count,
26216 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
26217 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
26218 gen_lowpart (QImode, value), count, QImode,
26219 1, max_size / 2, true);
26220 }
26221
26222 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
26223 static void
26224 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
26225 rtx count, int max_size)
26226 {
26227 rtx dest;
26228
26229 if (CONST_INT_P (count))
26230 {
26231 HOST_WIDE_INT countval = INTVAL (count);
26232 HOST_WIDE_INT epilogue_size = countval % max_size;
26233 int i;
26234
26235 /* For now MAX_SIZE should be a power of 2. This assert could be
26236 relaxed, but it'll require a bit more complicated epilogue
26237 expanding. */
26238 gcc_assert ((max_size & (max_size - 1)) == 0);
26239 for (i = max_size; i >= 1; i >>= 1)
26240 {
26241 if (epilogue_size & i)
26242 {
26243 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
26244 destmem = emit_memset (destmem, destptr, vec_value, i);
26245 else
26246 destmem = emit_memset (destmem, destptr, value, i);
26247 }
26248 }
26249 return;
26250 }
26251 if (max_size > 32)
26252 {
26253 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
26254 return;
26255 }
26256 if (max_size > 16)
26257 {
26258 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
26259 if (TARGET_64BIT)
26260 {
26261 dest = change_address (destmem, DImode, destptr);
26262 emit_insn (gen_strset (destptr, dest, value));
26263 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
26264 emit_insn (gen_strset (destptr, dest, value));
26265 }
26266 else
26267 {
26268 dest = change_address (destmem, SImode, destptr);
26269 emit_insn (gen_strset (destptr, dest, value));
26270 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
26271 emit_insn (gen_strset (destptr, dest, value));
26272 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
26273 emit_insn (gen_strset (destptr, dest, value));
26274 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
26275 emit_insn (gen_strset (destptr, dest, value));
26276 }
26277 emit_label (label);
26278 LABEL_NUSES (label) = 1;
26279 }
26280 if (max_size > 8)
26281 {
26282 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
26283 if (TARGET_64BIT)
26284 {
26285 dest = change_address (destmem, DImode, destptr);
26286 emit_insn (gen_strset (destptr, dest, value));
26287 }
26288 else
26289 {
26290 dest = change_address (destmem, SImode, destptr);
26291 emit_insn (gen_strset (destptr, dest, value));
26292 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
26293 emit_insn (gen_strset (destptr, dest, value));
26294 }
26295 emit_label (label);
26296 LABEL_NUSES (label) = 1;
26297 }
26298 if (max_size > 4)
26299 {
26300 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
26301 dest = change_address (destmem, SImode, destptr);
26302 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
26303 emit_label (label);
26304 LABEL_NUSES (label) = 1;
26305 }
26306 if (max_size > 2)
26307 {
26308 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
26309 dest = change_address (destmem, HImode, destptr);
26310 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
26311 emit_label (label);
26312 LABEL_NUSES (label) = 1;
26313 }
26314 if (max_size > 1)
26315 {
26316 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
26317 dest = change_address (destmem, QImode, destptr);
26318 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
26319 emit_label (label);
26320 LABEL_NUSES (label) = 1;
26321 }
26322 }
26323
26324 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
26325 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
26326 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
26327 ignored.
26328 Return value is updated DESTMEM. */
26329 static rtx
26330 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
26331 rtx destptr, rtx srcptr, rtx value,
26332 rtx vec_value, rtx count, int align,
26333 int desired_alignment, bool issetmem)
26334 {
26335 int i;
26336 for (i = 1; i < desired_alignment; i <<= 1)
26337 {
26338 if (align <= i)
26339 {
26340 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
26341 if (issetmem)
26342 {
26343 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
26344 destmem = emit_memset (destmem, destptr, vec_value, i);
26345 else
26346 destmem = emit_memset (destmem, destptr, value, i);
26347 }
26348 else
26349 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
26350 ix86_adjust_counter (count, i);
26351 emit_label (label);
26352 LABEL_NUSES (label) = 1;
26353 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
26354 }
26355 }
26356 return destmem;
26357 }
26358
26359 /* Test if COUNT&SIZE is nonzero and if so, expand movme
26360 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
26361 and jump to DONE_LABEL. */
26362 static void
26363 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
26364 rtx destptr, rtx srcptr,
26365 rtx value, rtx vec_value,
26366 rtx count, int size,
26367 rtx done_label, bool issetmem)
26368 {
26369 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
26370 machine_mode mode = int_mode_for_size (size * BITS_PER_UNIT, 1).else_blk ();
26371 rtx modesize;
26372 int n;
26373
26374 /* If we do not have vector value to copy, we must reduce size. */
26375 if (issetmem)
26376 {
26377 if (!vec_value)
26378 {
26379 if (GET_MODE (value) == VOIDmode && size > 8)
26380 mode = Pmode;
26381 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
26382 mode = GET_MODE (value);
26383 }
26384 else
26385 mode = GET_MODE (vec_value), value = vec_value;
26386 }
26387 else
26388 {
26389 /* Choose appropriate vector mode. */
26390 if (size >= 32)
26391 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
26392 else if (size >= 16)
26393 mode = TARGET_SSE ? V16QImode : DImode;
26394 srcmem = change_address (srcmem, mode, srcptr);
26395 }
26396 destmem = change_address (destmem, mode, destptr);
26397 modesize = GEN_INT (GET_MODE_SIZE (mode));
26398 gcc_assert (GET_MODE_SIZE (mode) <= size);
26399 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
26400 {
26401 if (issetmem)
26402 emit_move_insn (destmem, gen_lowpart (mode, value));
26403 else
26404 {
26405 emit_move_insn (destmem, srcmem);
26406 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
26407 }
26408 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
26409 }
26410
26411 destmem = offset_address (destmem, count, 1);
26412 destmem = offset_address (destmem, GEN_INT (-2 * size),
26413 GET_MODE_SIZE (mode));
26414 if (!issetmem)
26415 {
26416 srcmem = offset_address (srcmem, count, 1);
26417 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
26418 GET_MODE_SIZE (mode));
26419 }
26420 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
26421 {
26422 if (issetmem)
26423 emit_move_insn (destmem, gen_lowpart (mode, value));
26424 else
26425 {
26426 emit_move_insn (destmem, srcmem);
26427 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
26428 }
26429 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
26430 }
26431 emit_jump_insn (gen_jump (done_label));
26432 emit_barrier ();
26433
26434 emit_label (label);
26435 LABEL_NUSES (label) = 1;
26436 }
26437
26438 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
26439 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
26440 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
26441 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
26442 DONE_LABEL is a label after the whole copying sequence. The label is created
26443 on demand if *DONE_LABEL is NULL.
26444 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
26445 bounds after the initial copies.
26446
26447 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
26448 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
26449 we will dispatch to a library call for large blocks.
26450
26451 In pseudocode we do:
26452
26453 if (COUNT < SIZE)
26454 {
26455 Assume that SIZE is 4. Bigger sizes are handled analogously
26456 if (COUNT & 4)
26457 {
26458 copy 4 bytes from SRCPTR to DESTPTR
26459 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
26460 goto done_label
26461 }
26462 if (!COUNT)
26463 goto done_label;
26464 copy 1 byte from SRCPTR to DESTPTR
26465 if (COUNT & 2)
26466 {
26467 copy 2 bytes from SRCPTR to DESTPTR
26468 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
26469 }
26470 }
26471 else
26472 {
26473 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
26474 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
26475
26476 OLD_DESPTR = DESTPTR;
26477 Align DESTPTR up to DESIRED_ALIGN
26478 SRCPTR += DESTPTR - OLD_DESTPTR
26479 COUNT -= DEST_PTR - OLD_DESTPTR
26480 if (DYNAMIC_CHECK)
26481 Round COUNT down to multiple of SIZE
26482 << optional caller supplied zero size guard is here >>
26483 << optional caller supplied dynamic check is here >>
26484 << caller supplied main copy loop is here >>
26485 }
26486 done_label:
26487 */
26488 static void
26489 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
26490 rtx *destptr, rtx *srcptr,
26491 machine_mode mode,
26492 rtx value, rtx vec_value,
26493 rtx *count,
26494 rtx_code_label **done_label,
26495 int size,
26496 int desired_align,
26497 int align,
26498 unsigned HOST_WIDE_INT *min_size,
26499 bool dynamic_check,
26500 bool issetmem)
26501 {
26502 rtx_code_label *loop_label = NULL, *label;
26503 int n;
26504 rtx modesize;
26505 int prolog_size = 0;
26506 rtx mode_value;
26507
26508 /* Chose proper value to copy. */
26509 if (issetmem && VECTOR_MODE_P (mode))
26510 mode_value = vec_value;
26511 else
26512 mode_value = value;
26513 gcc_assert (GET_MODE_SIZE (mode) <= size);
26514
26515 /* See if block is big or small, handle small blocks. */
26516 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
26517 {
26518 int size2 = size;
26519 loop_label = gen_label_rtx ();
26520
26521 if (!*done_label)
26522 *done_label = gen_label_rtx ();
26523
26524 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
26525 1, loop_label);
26526 size2 >>= 1;
26527
26528 /* Handle sizes > 3. */
26529 for (;size2 > 2; size2 >>= 1)
26530 expand_small_movmem_or_setmem (destmem, srcmem,
26531 *destptr, *srcptr,
26532 value, vec_value,
26533 *count,
26534 size2, *done_label, issetmem);
26535 /* Nothing to copy? Jump to DONE_LABEL if so */
26536 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
26537 1, *done_label);
26538
26539 /* Do a byte copy. */
26540 destmem = change_address (destmem, QImode, *destptr);
26541 if (issetmem)
26542 emit_move_insn (destmem, gen_lowpart (QImode, value));
26543 else
26544 {
26545 srcmem = change_address (srcmem, QImode, *srcptr);
26546 emit_move_insn (destmem, srcmem);
26547 }
26548
26549 /* Handle sizes 2 and 3. */
26550 label = ix86_expand_aligntest (*count, 2, false);
26551 destmem = change_address (destmem, HImode, *destptr);
26552 destmem = offset_address (destmem, *count, 1);
26553 destmem = offset_address (destmem, GEN_INT (-2), 2);
26554 if (issetmem)
26555 emit_move_insn (destmem, gen_lowpart (HImode, value));
26556 else
26557 {
26558 srcmem = change_address (srcmem, HImode, *srcptr);
26559 srcmem = offset_address (srcmem, *count, 1);
26560 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
26561 emit_move_insn (destmem, srcmem);
26562 }
26563
26564 emit_label (label);
26565 LABEL_NUSES (label) = 1;
26566 emit_jump_insn (gen_jump (*done_label));
26567 emit_barrier ();
26568 }
26569 else
26570 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
26571 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
26572
26573 /* Start memcpy for COUNT >= SIZE. */
26574 if (loop_label)
26575 {
26576 emit_label (loop_label);
26577 LABEL_NUSES (loop_label) = 1;
26578 }
26579
26580 /* Copy first desired_align bytes. */
26581 if (!issetmem)
26582 srcmem = change_address (srcmem, mode, *srcptr);
26583 destmem = change_address (destmem, mode, *destptr);
26584 modesize = GEN_INT (GET_MODE_SIZE (mode));
26585 for (n = 0; prolog_size < desired_align - align; n++)
26586 {
26587 if (issetmem)
26588 emit_move_insn (destmem, mode_value);
26589 else
26590 {
26591 emit_move_insn (destmem, srcmem);
26592 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
26593 }
26594 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
26595 prolog_size += GET_MODE_SIZE (mode);
26596 }
26597
26598
26599 /* Copy last SIZE bytes. */
26600 destmem = offset_address (destmem, *count, 1);
26601 destmem = offset_address (destmem,
26602 GEN_INT (-size - prolog_size),
26603 1);
26604 if (issetmem)
26605 emit_move_insn (destmem, mode_value);
26606 else
26607 {
26608 srcmem = offset_address (srcmem, *count, 1);
26609 srcmem = offset_address (srcmem,
26610 GEN_INT (-size - prolog_size),
26611 1);
26612 emit_move_insn (destmem, srcmem);
26613 }
26614 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
26615 {
26616 destmem = offset_address (destmem, modesize, 1);
26617 if (issetmem)
26618 emit_move_insn (destmem, mode_value);
26619 else
26620 {
26621 srcmem = offset_address (srcmem, modesize, 1);
26622 emit_move_insn (destmem, srcmem);
26623 }
26624 }
26625
26626 /* Align destination. */
26627 if (desired_align > 1 && desired_align > align)
26628 {
26629 rtx saveddest = *destptr;
26630
26631 gcc_assert (desired_align <= size);
26632 /* Align destptr up, place it to new register. */
26633 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
26634 GEN_INT (prolog_size),
26635 NULL_RTX, 1, OPTAB_DIRECT);
26636 if (REG_P (*destptr) && REG_P (saveddest) && REG_POINTER (saveddest))
26637 REG_POINTER (*destptr) = 1;
26638 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
26639 GEN_INT (-desired_align),
26640 *destptr, 1, OPTAB_DIRECT);
26641 /* See how many bytes we skipped. */
26642 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
26643 *destptr,
26644 saveddest, 1, OPTAB_DIRECT);
26645 /* Adjust srcptr and count. */
26646 if (!issetmem)
26647 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr,
26648 saveddest, *srcptr, 1, OPTAB_DIRECT);
26649 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
26650 saveddest, *count, 1, OPTAB_DIRECT);
26651 /* We copied at most size + prolog_size. */
26652 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
26653 *min_size
26654 = ROUND_DOWN (*min_size - size, (unsigned HOST_WIDE_INT)size);
26655 else
26656 *min_size = 0;
26657
26658 /* Our loops always round down the block size, but for dispatch to
26659 library we need precise value. */
26660 if (dynamic_check)
26661 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
26662 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
26663 }
26664 else
26665 {
26666 gcc_assert (prolog_size == 0);
26667 /* Decrease count, so we won't end up copying last word twice. */
26668 if (!CONST_INT_P (*count))
26669 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
26670 constm1_rtx, *count, 1, OPTAB_DIRECT);
26671 else
26672 *count = GEN_INT (ROUND_DOWN (UINTVAL (*count) - 1,
26673 (unsigned HOST_WIDE_INT)size));
26674 if (*min_size)
26675 *min_size = ROUND_DOWN (*min_size - 1, (unsigned HOST_WIDE_INT)size);
26676 }
26677 }
26678
26679
26680 /* This function is like the previous one, except here we know how many bytes
26681 need to be copied. That allows us to update alignment not only of DST, which
26682 is returned, but also of SRC, which is passed as a pointer for that
26683 reason. */
26684 static rtx
26685 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
26686 rtx srcreg, rtx value, rtx vec_value,
26687 int desired_align, int align_bytes,
26688 bool issetmem)
26689 {
26690 rtx src = NULL;
26691 rtx orig_dst = dst;
26692 rtx orig_src = NULL;
26693 int piece_size = 1;
26694 int copied_bytes = 0;
26695
26696 if (!issetmem)
26697 {
26698 gcc_assert (srcp != NULL);
26699 src = *srcp;
26700 orig_src = src;
26701 }
26702
26703 for (piece_size = 1;
26704 piece_size <= desired_align && copied_bytes < align_bytes;
26705 piece_size <<= 1)
26706 {
26707 if (align_bytes & piece_size)
26708 {
26709 if (issetmem)
26710 {
26711 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
26712 dst = emit_memset (dst, destreg, vec_value, piece_size);
26713 else
26714 dst = emit_memset (dst, destreg, value, piece_size);
26715 }
26716 else
26717 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
26718 copied_bytes += piece_size;
26719 }
26720 }
26721 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
26722 set_mem_align (dst, desired_align * BITS_PER_UNIT);
26723 if (MEM_SIZE_KNOWN_P (orig_dst))
26724 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
26725
26726 if (!issetmem)
26727 {
26728 int src_align_bytes = get_mem_align_offset (src, desired_align
26729 * BITS_PER_UNIT);
26730 if (src_align_bytes >= 0)
26731 src_align_bytes = desired_align - src_align_bytes;
26732 if (src_align_bytes >= 0)
26733 {
26734 unsigned int src_align;
26735 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
26736 {
26737 if ((src_align_bytes & (src_align - 1))
26738 == (align_bytes & (src_align - 1)))
26739 break;
26740 }
26741 if (src_align > (unsigned int) desired_align)
26742 src_align = desired_align;
26743 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
26744 set_mem_align (src, src_align * BITS_PER_UNIT);
26745 }
26746 if (MEM_SIZE_KNOWN_P (orig_src))
26747 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
26748 *srcp = src;
26749 }
26750
26751 return dst;
26752 }
26753
26754 /* Return true if ALG can be used in current context.
26755 Assume we expand memset if MEMSET is true. */
26756 static bool
26757 alg_usable_p (enum stringop_alg alg, bool memset, bool have_as)
26758 {
26759 if (alg == no_stringop)
26760 return false;
26761 if (alg == vector_loop)
26762 return TARGET_SSE || TARGET_AVX;
26763 /* Algorithms using the rep prefix want at least edi and ecx;
26764 additionally, memset wants eax and memcpy wants esi. Don't
26765 consider such algorithms if the user has appropriated those
26766 registers for their own purposes, or if we have a non-default
26767 address space, since some string insns cannot override the segment. */
26768 if (alg == rep_prefix_1_byte
26769 || alg == rep_prefix_4_byte
26770 || alg == rep_prefix_8_byte)
26771 {
26772 if (have_as)
26773 return false;
26774 if (fixed_regs[CX_REG]
26775 || fixed_regs[DI_REG]
26776 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]))
26777 return false;
26778 }
26779 return true;
26780 }
26781
26782 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
26783 static enum stringop_alg
26784 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
26785 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
26786 bool memset, bool zero_memset, bool have_as,
26787 int *dynamic_check, bool *noalign, bool recur)
26788 {
26789 const struct stringop_algs *algs;
26790 bool optimize_for_speed;
26791 int max = 0;
26792 const struct processor_costs *cost;
26793 int i;
26794 bool any_alg_usable_p = false;
26795
26796 *noalign = false;
26797 *dynamic_check = -1;
26798
26799 /* Even if the string operation call is cold, we still might spend a lot
26800 of time processing large blocks. */
26801 if (optimize_function_for_size_p (cfun)
26802 || (optimize_insn_for_size_p ()
26803 && (max_size < 256
26804 || (expected_size != -1 && expected_size < 256))))
26805 optimize_for_speed = false;
26806 else
26807 optimize_for_speed = true;
26808
26809 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
26810 if (memset)
26811 algs = &cost->memset[TARGET_64BIT != 0];
26812 else
26813 algs = &cost->memcpy[TARGET_64BIT != 0];
26814
26815 /* See maximal size for user defined algorithm. */
26816 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
26817 {
26818 enum stringop_alg candidate = algs->size[i].alg;
26819 bool usable = alg_usable_p (candidate, memset, have_as);
26820 any_alg_usable_p |= usable;
26821
26822 if (candidate != libcall && candidate && usable)
26823 max = algs->size[i].max;
26824 }
26825
26826 /* If expected size is not known but max size is small enough
26827 so inline version is a win, set expected size into
26828 the range. */
26829 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
26830 && expected_size == -1)
26831 expected_size = min_size / 2 + max_size / 2;
26832
26833 /* If user specified the algorithm, honor it if possible. */
26834 if (ix86_stringop_alg != no_stringop
26835 && alg_usable_p (ix86_stringop_alg, memset, have_as))
26836 return ix86_stringop_alg;
26837 /* rep; movq or rep; movl is the smallest variant. */
26838 else if (!optimize_for_speed)
26839 {
26840 *noalign = true;
26841 if (!count || (count & 3) || (memset && !zero_memset))
26842 return alg_usable_p (rep_prefix_1_byte, memset, have_as)
26843 ? rep_prefix_1_byte : loop_1_byte;
26844 else
26845 return alg_usable_p (rep_prefix_4_byte, memset, have_as)
26846 ? rep_prefix_4_byte : loop;
26847 }
26848 /* Very tiny blocks are best handled via the loop, REP is expensive to
26849 setup. */
26850 else if (expected_size != -1 && expected_size < 4)
26851 return loop_1_byte;
26852 else if (expected_size != -1)
26853 {
26854 enum stringop_alg alg = libcall;
26855 bool alg_noalign = false;
26856 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
26857 {
26858 /* We get here if the algorithms that were not libcall-based
26859 were rep-prefix based and we are unable to use rep prefixes
26860 based on global register usage. Break out of the loop and
26861 use the heuristic below. */
26862 if (algs->size[i].max == 0)
26863 break;
26864 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
26865 {
26866 enum stringop_alg candidate = algs->size[i].alg;
26867
26868 if (candidate != libcall
26869 && alg_usable_p (candidate, memset, have_as))
26870 {
26871 alg = candidate;
26872 alg_noalign = algs->size[i].noalign;
26873 }
26874 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
26875 last non-libcall inline algorithm. */
26876 if (TARGET_INLINE_ALL_STRINGOPS)
26877 {
26878 /* When the current size is best to be copied by a libcall,
26879 but we are still forced to inline, run the heuristic below
26880 that will pick code for medium sized blocks. */
26881 if (alg != libcall)
26882 {
26883 *noalign = alg_noalign;
26884 return alg;
26885 }
26886 else if (!any_alg_usable_p)
26887 break;
26888 }
26889 else if (alg_usable_p (candidate, memset, have_as))
26890 {
26891 *noalign = algs->size[i].noalign;
26892 return candidate;
26893 }
26894 }
26895 }
26896 }
26897 /* When asked to inline the call anyway, try to pick meaningful choice.
26898 We look for maximal size of block that is faster to copy by hand and
26899 take blocks of at most of that size guessing that average size will
26900 be roughly half of the block.
26901
26902 If this turns out to be bad, we might simply specify the preferred
26903 choice in ix86_costs. */
26904 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
26905 && (algs->unknown_size == libcall
26906 || !alg_usable_p (algs->unknown_size, memset, have_as)))
26907 {
26908 enum stringop_alg alg;
26909 HOST_WIDE_INT new_expected_size = (max > 0 ? max : 4096) / 2;
26910
26911 /* If there aren't any usable algorithms or if recursing already,
26912 then recursing on smaller sizes or same size isn't going to
26913 find anything. Just return the simple byte-at-a-time copy loop. */
26914 if (!any_alg_usable_p || recur)
26915 {
26916 /* Pick something reasonable. */
26917 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY && !recur)
26918 *dynamic_check = 128;
26919 return loop_1_byte;
26920 }
26921 alg = decide_alg (count, new_expected_size, min_size, max_size, memset,
26922 zero_memset, have_as, dynamic_check, noalign, true);
26923 gcc_assert (*dynamic_check == -1);
26924 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
26925 *dynamic_check = max;
26926 else
26927 gcc_assert (alg != libcall);
26928 return alg;
26929 }
26930 return (alg_usable_p (algs->unknown_size, memset, have_as)
26931 ? algs->unknown_size : libcall);
26932 }
26933
26934 /* Decide on alignment. We know that the operand is already aligned to ALIGN
26935 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
26936 static int
26937 decide_alignment (int align,
26938 enum stringop_alg alg,
26939 int expected_size,
26940 machine_mode move_mode)
26941 {
26942 int desired_align = 0;
26943
26944 gcc_assert (alg != no_stringop);
26945
26946 if (alg == libcall)
26947 return 0;
26948 if (move_mode == VOIDmode)
26949 return 0;
26950
26951 desired_align = GET_MODE_SIZE (move_mode);
26952 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
26953 copying whole cacheline at once. */
26954 if (TARGET_PENTIUMPRO
26955 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
26956 desired_align = 8;
26957
26958 if (optimize_size)
26959 desired_align = 1;
26960 if (desired_align < align)
26961 desired_align = align;
26962 if (expected_size != -1 && expected_size < 4)
26963 desired_align = align;
26964
26965 return desired_align;
26966 }
26967
26968
26969 /* Helper function for memcpy. For QImode value 0xXY produce
26970 0xXYXYXYXY of wide specified by MODE. This is essentially
26971 a * 0x10101010, but we can do slightly better than
26972 synth_mult by unwinding the sequence by hand on CPUs with
26973 slow multiply. */
26974 static rtx
26975 promote_duplicated_reg (machine_mode mode, rtx val)
26976 {
26977 machine_mode valmode = GET_MODE (val);
26978 rtx tmp;
26979 int nops = mode == DImode ? 3 : 2;
26980
26981 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
26982 if (val == const0_rtx)
26983 return copy_to_mode_reg (mode, CONST0_RTX (mode));
26984 if (CONST_INT_P (val))
26985 {
26986 HOST_WIDE_INT v = INTVAL (val) & 255;
26987
26988 v |= v << 8;
26989 v |= v << 16;
26990 if (mode == DImode)
26991 v |= (v << 16) << 16;
26992 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
26993 }
26994
26995 if (valmode == VOIDmode)
26996 valmode = QImode;
26997 if (valmode != QImode)
26998 val = gen_lowpart (QImode, val);
26999 if (mode == QImode)
27000 return val;
27001 if (!TARGET_PARTIAL_REG_STALL)
27002 nops--;
27003 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
27004 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
27005 <= (ix86_cost->shift_const + ix86_cost->add) * nops
27006 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
27007 {
27008 rtx reg = convert_modes (mode, QImode, val, true);
27009 tmp = promote_duplicated_reg (mode, const1_rtx);
27010 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
27011 OPTAB_DIRECT);
27012 }
27013 else
27014 {
27015 rtx reg = convert_modes (mode, QImode, val, true);
27016
27017 if (!TARGET_PARTIAL_REG_STALL)
27018 if (mode == SImode)
27019 emit_insn (gen_insvsi_1 (reg, reg));
27020 else
27021 emit_insn (gen_insvdi_1 (reg, reg));
27022 else
27023 {
27024 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
27025 NULL, 1, OPTAB_DIRECT);
27026 reg =
27027 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
27028 }
27029 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
27030 NULL, 1, OPTAB_DIRECT);
27031 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
27032 if (mode == SImode)
27033 return reg;
27034 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
27035 NULL, 1, OPTAB_DIRECT);
27036 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
27037 return reg;
27038 }
27039 }
27040
27041 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
27042 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
27043 alignment from ALIGN to DESIRED_ALIGN. */
27044 static rtx
27045 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
27046 int align)
27047 {
27048 rtx promoted_val;
27049
27050 if (TARGET_64BIT
27051 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
27052 promoted_val = promote_duplicated_reg (DImode, val);
27053 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
27054 promoted_val = promote_duplicated_reg (SImode, val);
27055 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
27056 promoted_val = promote_duplicated_reg (HImode, val);
27057 else
27058 promoted_val = val;
27059
27060 return promoted_val;
27061 }
27062
27063 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
27064 operations when profitable. The code depends upon architecture, block size
27065 and alignment, but always has one of the following overall structures:
27066
27067 Aligned move sequence:
27068
27069 1) Prologue guard: Conditional that jumps up to epilogues for small
27070 blocks that can be handled by epilogue alone. This is faster
27071 but also needed for correctness, since prologue assume the block
27072 is larger than the desired alignment.
27073
27074 Optional dynamic check for size and libcall for large
27075 blocks is emitted here too, with -minline-stringops-dynamically.
27076
27077 2) Prologue: copy first few bytes in order to get destination
27078 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
27079 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
27080 copied. We emit either a jump tree on power of two sized
27081 blocks, or a byte loop.
27082
27083 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
27084 with specified algorithm.
27085
27086 4) Epilogue: code copying tail of the block that is too small to be
27087 handled by main body (or up to size guarded by prologue guard).
27088
27089 Misaligned move sequence
27090
27091 1) missaligned move prologue/epilogue containing:
27092 a) Prologue handling small memory blocks and jumping to done_label
27093 (skipped if blocks are known to be large enough)
27094 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
27095 needed by single possibly misaligned move
27096 (skipped if alignment is not needed)
27097 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
27098
27099 2) Zero size guard dispatching to done_label, if needed
27100
27101 3) dispatch to library call, if needed,
27102
27103 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
27104 with specified algorithm. */
27105 bool
27106 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
27107 rtx align_exp, rtx expected_align_exp,
27108 rtx expected_size_exp, rtx min_size_exp,
27109 rtx max_size_exp, rtx probable_max_size_exp,
27110 bool issetmem)
27111 {
27112 rtx destreg;
27113 rtx srcreg = NULL;
27114 rtx_code_label *label = NULL;
27115 rtx tmp;
27116 rtx_code_label *jump_around_label = NULL;
27117 HOST_WIDE_INT align = 1;
27118 unsigned HOST_WIDE_INT count = 0;
27119 HOST_WIDE_INT expected_size = -1;
27120 int size_needed = 0, epilogue_size_needed;
27121 int desired_align = 0, align_bytes = 0;
27122 enum stringop_alg alg;
27123 rtx promoted_val = NULL;
27124 rtx vec_promoted_val = NULL;
27125 bool force_loopy_epilogue = false;
27126 int dynamic_check;
27127 bool need_zero_guard = false;
27128 bool noalign;
27129 machine_mode move_mode = VOIDmode;
27130 machine_mode wider_mode;
27131 int unroll_factor = 1;
27132 /* TODO: Once value ranges are available, fill in proper data. */
27133 unsigned HOST_WIDE_INT min_size = 0;
27134 unsigned HOST_WIDE_INT max_size = -1;
27135 unsigned HOST_WIDE_INT probable_max_size = -1;
27136 bool misaligned_prologue_used = false;
27137 bool have_as;
27138
27139 if (CONST_INT_P (align_exp))
27140 align = INTVAL (align_exp);
27141 /* i386 can do misaligned access on reasonably increased cost. */
27142 if (CONST_INT_P (expected_align_exp)
27143 && INTVAL (expected_align_exp) > align)
27144 align = INTVAL (expected_align_exp);
27145 /* ALIGN is the minimum of destination and source alignment, but we care here
27146 just about destination alignment. */
27147 else if (!issetmem
27148 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
27149 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
27150
27151 if (CONST_INT_P (count_exp))
27152 {
27153 min_size = max_size = probable_max_size = count = expected_size
27154 = INTVAL (count_exp);
27155 /* When COUNT is 0, there is nothing to do. */
27156 if (!count)
27157 return true;
27158 }
27159 else
27160 {
27161 if (min_size_exp)
27162 min_size = INTVAL (min_size_exp);
27163 if (max_size_exp)
27164 max_size = INTVAL (max_size_exp);
27165 if (probable_max_size_exp)
27166 probable_max_size = INTVAL (probable_max_size_exp);
27167 if (CONST_INT_P (expected_size_exp))
27168 expected_size = INTVAL (expected_size_exp);
27169 }
27170
27171 /* Make sure we don't need to care about overflow later on. */
27172 if (count > (HOST_WIDE_INT_1U << 30))
27173 return false;
27174
27175 have_as = !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst));
27176 if (!issetmem)
27177 have_as |= !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src));
27178
27179 /* Step 0: Decide on preferred algorithm, desired alignment and
27180 size of chunks to be copied by main loop. */
27181 alg = decide_alg (count, expected_size, min_size, probable_max_size,
27182 issetmem,
27183 issetmem && val_exp == const0_rtx, have_as,
27184 &dynamic_check, &noalign, false);
27185 if (alg == libcall)
27186 return false;
27187 gcc_assert (alg != no_stringop);
27188
27189 /* For now vector-version of memset is generated only for memory zeroing, as
27190 creating of promoted vector value is very cheap in this case. */
27191 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
27192 alg = unrolled_loop;
27193
27194 if (!count)
27195 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
27196 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
27197 if (!issetmem)
27198 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
27199
27200 unroll_factor = 1;
27201 move_mode = word_mode;
27202 switch (alg)
27203 {
27204 case libcall:
27205 case no_stringop:
27206 case last_alg:
27207 gcc_unreachable ();
27208 case loop_1_byte:
27209 need_zero_guard = true;
27210 move_mode = QImode;
27211 break;
27212 case loop:
27213 need_zero_guard = true;
27214 break;
27215 case unrolled_loop:
27216 need_zero_guard = true;
27217 unroll_factor = (TARGET_64BIT ? 4 : 2);
27218 break;
27219 case vector_loop:
27220 need_zero_guard = true;
27221 unroll_factor = 4;
27222 /* Find the widest supported mode. */
27223 move_mode = word_mode;
27224 while (GET_MODE_WIDER_MODE (move_mode).exists (&wider_mode)
27225 && optab_handler (mov_optab, wider_mode) != CODE_FOR_nothing)
27226 move_mode = wider_mode;
27227
27228 if (TARGET_AVX128_OPTIMAL && GET_MODE_BITSIZE (move_mode) > 128)
27229 move_mode = TImode;
27230
27231 /* Find the corresponding vector mode with the same size as MOVE_MODE.
27232 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
27233 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
27234 {
27235 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
27236 if (!mode_for_vector (word_mode, nunits).exists (&move_mode)
27237 || optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
27238 move_mode = word_mode;
27239 }
27240 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
27241 break;
27242 case rep_prefix_8_byte:
27243 move_mode = DImode;
27244 break;
27245 case rep_prefix_4_byte:
27246 move_mode = SImode;
27247 break;
27248 case rep_prefix_1_byte:
27249 move_mode = QImode;
27250 break;
27251 }
27252 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
27253 epilogue_size_needed = size_needed;
27254
27255 /* If we are going to call any library calls conditionally, make sure any
27256 pending stack adjustment happen before the first conditional branch,
27257 otherwise they will be emitted before the library call only and won't
27258 happen from the other branches. */
27259 if (dynamic_check != -1)
27260 do_pending_stack_adjust ();
27261
27262 desired_align = decide_alignment (align, alg, expected_size, move_mode);
27263 if (!TARGET_ALIGN_STRINGOPS || noalign)
27264 align = desired_align;
27265
27266 /* Step 1: Prologue guard. */
27267
27268 /* Alignment code needs count to be in register. */
27269 if (CONST_INT_P (count_exp) && desired_align > align)
27270 {
27271 if (INTVAL (count_exp) > desired_align
27272 && INTVAL (count_exp) > size_needed)
27273 {
27274 align_bytes
27275 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
27276 if (align_bytes <= 0)
27277 align_bytes = 0;
27278 else
27279 align_bytes = desired_align - align_bytes;
27280 }
27281 if (align_bytes == 0)
27282 count_exp = force_reg (counter_mode (count_exp), count_exp);
27283 }
27284 gcc_assert (desired_align >= 1 && align >= 1);
27285
27286 /* Misaligned move sequences handle both prologue and epilogue at once.
27287 Default code generation results in a smaller code for large alignments
27288 and also avoids redundant job when sizes are known precisely. */
27289 misaligned_prologue_used
27290 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
27291 && MAX (desired_align, epilogue_size_needed) <= 32
27292 && desired_align <= epilogue_size_needed
27293 && ((desired_align > align && !align_bytes)
27294 || (!count && epilogue_size_needed > 1)));
27295
27296 /* Do the cheap promotion to allow better CSE across the
27297 main loop and epilogue (ie one load of the big constant in the
27298 front of all code.
27299 For now the misaligned move sequences do not have fast path
27300 without broadcasting. */
27301 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
27302 {
27303 if (alg == vector_loop)
27304 {
27305 gcc_assert (val_exp == const0_rtx);
27306 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
27307 promoted_val = promote_duplicated_reg_to_size (val_exp,
27308 GET_MODE_SIZE (word_mode),
27309 desired_align, align);
27310 }
27311 else
27312 {
27313 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
27314 desired_align, align);
27315 }
27316 }
27317 /* Misaligned move sequences handles both prologues and epilogues at once.
27318 Default code generation results in smaller code for large alignments and
27319 also avoids redundant job when sizes are known precisely. */
27320 if (misaligned_prologue_used)
27321 {
27322 /* Misaligned move prologue handled small blocks by itself. */
27323 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
27324 (dst, src, &destreg, &srcreg,
27325 move_mode, promoted_val, vec_promoted_val,
27326 &count_exp,
27327 &jump_around_label,
27328 desired_align < align
27329 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
27330 desired_align, align, &min_size, dynamic_check, issetmem);
27331 if (!issetmem)
27332 src = change_address (src, BLKmode, srcreg);
27333 dst = change_address (dst, BLKmode, destreg);
27334 set_mem_align (dst, desired_align * BITS_PER_UNIT);
27335 epilogue_size_needed = 0;
27336 if (need_zero_guard
27337 && min_size < (unsigned HOST_WIDE_INT) size_needed)
27338 {
27339 /* It is possible that we copied enough so the main loop will not
27340 execute. */
27341 gcc_assert (size_needed > 1);
27342 if (jump_around_label == NULL_RTX)
27343 jump_around_label = gen_label_rtx ();
27344 emit_cmp_and_jump_insns (count_exp,
27345 GEN_INT (size_needed),
27346 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
27347 if (expected_size == -1
27348 || expected_size < (desired_align - align) / 2 + size_needed)
27349 predict_jump (REG_BR_PROB_BASE * 20 / 100);
27350 else
27351 predict_jump (REG_BR_PROB_BASE * 60 / 100);
27352 }
27353 }
27354 /* Ensure that alignment prologue won't copy past end of block. */
27355 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
27356 {
27357 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
27358 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
27359 Make sure it is power of 2. */
27360 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
27361
27362 /* To improve performance of small blocks, we jump around the VAL
27363 promoting mode. This mean that if the promoted VAL is not constant,
27364 we might not use it in the epilogue and have to use byte
27365 loop variant. */
27366 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
27367 force_loopy_epilogue = true;
27368 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
27369 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
27370 {
27371 /* If main algorithm works on QImode, no epilogue is needed.
27372 For small sizes just don't align anything. */
27373 if (size_needed == 1)
27374 desired_align = align;
27375 else
27376 goto epilogue;
27377 }
27378 else if (!count
27379 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
27380 {
27381 label = gen_label_rtx ();
27382 emit_cmp_and_jump_insns (count_exp,
27383 GEN_INT (epilogue_size_needed),
27384 LTU, 0, counter_mode (count_exp), 1, label);
27385 if (expected_size == -1 || expected_size < epilogue_size_needed)
27386 predict_jump (REG_BR_PROB_BASE * 60 / 100);
27387 else
27388 predict_jump (REG_BR_PROB_BASE * 20 / 100);
27389 }
27390 }
27391
27392 /* Emit code to decide on runtime whether library call or inline should be
27393 used. */
27394 if (dynamic_check != -1)
27395 {
27396 if (!issetmem && CONST_INT_P (count_exp))
27397 {
27398 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
27399 {
27400 emit_block_copy_via_libcall (dst, src, count_exp);
27401 count_exp = const0_rtx;
27402 goto epilogue;
27403 }
27404 }
27405 else
27406 {
27407 rtx_code_label *hot_label = gen_label_rtx ();
27408 if (jump_around_label == NULL_RTX)
27409 jump_around_label = gen_label_rtx ();
27410 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
27411 LEU, 0, counter_mode (count_exp),
27412 1, hot_label);
27413 predict_jump (REG_BR_PROB_BASE * 90 / 100);
27414 if (issetmem)
27415 set_storage_via_libcall (dst, count_exp, val_exp);
27416 else
27417 emit_block_copy_via_libcall (dst, src, count_exp);
27418 emit_jump (jump_around_label);
27419 emit_label (hot_label);
27420 }
27421 }
27422
27423 /* Step 2: Alignment prologue. */
27424 /* Do the expensive promotion once we branched off the small blocks. */
27425 if (issetmem && !promoted_val)
27426 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
27427 desired_align, align);
27428
27429 if (desired_align > align && !misaligned_prologue_used)
27430 {
27431 if (align_bytes == 0)
27432 {
27433 /* Except for the first move in prologue, we no longer know
27434 constant offset in aliasing info. It don't seems to worth
27435 the pain to maintain it for the first move, so throw away
27436 the info early. */
27437 dst = change_address (dst, BLKmode, destreg);
27438 if (!issetmem)
27439 src = change_address (src, BLKmode, srcreg);
27440 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
27441 promoted_val, vec_promoted_val,
27442 count_exp, align, desired_align,
27443 issetmem);
27444 /* At most desired_align - align bytes are copied. */
27445 if (min_size < (unsigned)(desired_align - align))
27446 min_size = 0;
27447 else
27448 min_size -= desired_align - align;
27449 }
27450 else
27451 {
27452 /* If we know how many bytes need to be stored before dst is
27453 sufficiently aligned, maintain aliasing info accurately. */
27454 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
27455 srcreg,
27456 promoted_val,
27457 vec_promoted_val,
27458 desired_align,
27459 align_bytes,
27460 issetmem);
27461
27462 count_exp = plus_constant (counter_mode (count_exp),
27463 count_exp, -align_bytes);
27464 count -= align_bytes;
27465 min_size -= align_bytes;
27466 max_size -= align_bytes;
27467 }
27468 if (need_zero_guard
27469 && min_size < (unsigned HOST_WIDE_INT) size_needed
27470 && (count < (unsigned HOST_WIDE_INT) size_needed
27471 || (align_bytes == 0
27472 && count < ((unsigned HOST_WIDE_INT) size_needed
27473 + desired_align - align))))
27474 {
27475 /* It is possible that we copied enough so the main loop will not
27476 execute. */
27477 gcc_assert (size_needed > 1);
27478 if (label == NULL_RTX)
27479 label = gen_label_rtx ();
27480 emit_cmp_and_jump_insns (count_exp,
27481 GEN_INT (size_needed),
27482 LTU, 0, counter_mode (count_exp), 1, label);
27483 if (expected_size == -1
27484 || expected_size < (desired_align - align) / 2 + size_needed)
27485 predict_jump (REG_BR_PROB_BASE * 20 / 100);
27486 else
27487 predict_jump (REG_BR_PROB_BASE * 60 / 100);
27488 }
27489 }
27490 if (label && size_needed == 1)
27491 {
27492 emit_label (label);
27493 LABEL_NUSES (label) = 1;
27494 label = NULL;
27495 epilogue_size_needed = 1;
27496 if (issetmem)
27497 promoted_val = val_exp;
27498 }
27499 else if (label == NULL_RTX && !misaligned_prologue_used)
27500 epilogue_size_needed = size_needed;
27501
27502 /* Step 3: Main loop. */
27503
27504 switch (alg)
27505 {
27506 case libcall:
27507 case no_stringop:
27508 case last_alg:
27509 gcc_unreachable ();
27510 case loop_1_byte:
27511 case loop:
27512 case unrolled_loop:
27513 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
27514 count_exp, move_mode, unroll_factor,
27515 expected_size, issetmem);
27516 break;
27517 case vector_loop:
27518 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
27519 vec_promoted_val, count_exp, move_mode,
27520 unroll_factor, expected_size, issetmem);
27521 break;
27522 case rep_prefix_8_byte:
27523 case rep_prefix_4_byte:
27524 case rep_prefix_1_byte:
27525 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
27526 val_exp, count_exp, move_mode, issetmem);
27527 break;
27528 }
27529 /* Adjust properly the offset of src and dest memory for aliasing. */
27530 if (CONST_INT_P (count_exp))
27531 {
27532 if (!issetmem)
27533 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
27534 (count / size_needed) * size_needed);
27535 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
27536 (count / size_needed) * size_needed);
27537 }
27538 else
27539 {
27540 if (!issetmem)
27541 src = change_address (src, BLKmode, srcreg);
27542 dst = change_address (dst, BLKmode, destreg);
27543 }
27544
27545 /* Step 4: Epilogue to copy the remaining bytes. */
27546 epilogue:
27547 if (label)
27548 {
27549 /* When the main loop is done, COUNT_EXP might hold original count,
27550 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
27551 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
27552 bytes. Compensate if needed. */
27553
27554 if (size_needed < epilogue_size_needed)
27555 {
27556 tmp =
27557 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
27558 GEN_INT (size_needed - 1), count_exp, 1,
27559 OPTAB_DIRECT);
27560 if (tmp != count_exp)
27561 emit_move_insn (count_exp, tmp);
27562 }
27563 emit_label (label);
27564 LABEL_NUSES (label) = 1;
27565 }
27566
27567 if (count_exp != const0_rtx && epilogue_size_needed > 1)
27568 {
27569 if (force_loopy_epilogue)
27570 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
27571 epilogue_size_needed);
27572 else
27573 {
27574 if (issetmem)
27575 expand_setmem_epilogue (dst, destreg, promoted_val,
27576 vec_promoted_val, count_exp,
27577 epilogue_size_needed);
27578 else
27579 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
27580 epilogue_size_needed);
27581 }
27582 }
27583 if (jump_around_label)
27584 emit_label (jump_around_label);
27585 return true;
27586 }
27587
27588
27589 /* Expand the appropriate insns for doing strlen if not just doing
27590 repnz; scasb
27591
27592 out = result, initialized with the start address
27593 align_rtx = alignment of the address.
27594 scratch = scratch register, initialized with the startaddress when
27595 not aligned, otherwise undefined
27596
27597 This is just the body. It needs the initializations mentioned above and
27598 some address computing at the end. These things are done in i386.md. */
27599
27600 static void
27601 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
27602 {
27603 int align;
27604 rtx tmp;
27605 rtx_code_label *align_2_label = NULL;
27606 rtx_code_label *align_3_label = NULL;
27607 rtx_code_label *align_4_label = gen_label_rtx ();
27608 rtx_code_label *end_0_label = gen_label_rtx ();
27609 rtx mem;
27610 rtx tmpreg = gen_reg_rtx (SImode);
27611 rtx scratch = gen_reg_rtx (SImode);
27612 rtx cmp;
27613
27614 align = 0;
27615 if (CONST_INT_P (align_rtx))
27616 align = INTVAL (align_rtx);
27617
27618 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
27619
27620 /* Is there a known alignment and is it less than 4? */
27621 if (align < 4)
27622 {
27623 rtx scratch1 = gen_reg_rtx (Pmode);
27624 emit_move_insn (scratch1, out);
27625 /* Is there a known alignment and is it not 2? */
27626 if (align != 2)
27627 {
27628 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
27629 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
27630
27631 /* Leave just the 3 lower bits. */
27632 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
27633 NULL_RTX, 0, OPTAB_WIDEN);
27634
27635 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
27636 Pmode, 1, align_4_label);
27637 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
27638 Pmode, 1, align_2_label);
27639 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
27640 Pmode, 1, align_3_label);
27641 }
27642 else
27643 {
27644 /* Since the alignment is 2, we have to check 2 or 0 bytes;
27645 check if is aligned to 4 - byte. */
27646
27647 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
27648 NULL_RTX, 0, OPTAB_WIDEN);
27649
27650 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
27651 Pmode, 1, align_4_label);
27652 }
27653
27654 mem = change_address (src, QImode, out);
27655
27656 /* Now compare the bytes. */
27657
27658 /* Compare the first n unaligned byte on a byte per byte basis. */
27659 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
27660 QImode, 1, end_0_label);
27661
27662 /* Increment the address. */
27663 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
27664
27665 /* Not needed with an alignment of 2 */
27666 if (align != 2)
27667 {
27668 emit_label (align_2_label);
27669
27670 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
27671 end_0_label);
27672
27673 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
27674
27675 emit_label (align_3_label);
27676 }
27677
27678 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
27679 end_0_label);
27680
27681 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
27682 }
27683
27684 /* Generate loop to check 4 bytes at a time. It is not a good idea to
27685 align this loop. It gives only huge programs, but does not help to
27686 speed up. */
27687 emit_label (align_4_label);
27688
27689 mem = change_address (src, SImode, out);
27690 emit_move_insn (scratch, mem);
27691 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
27692
27693 /* This formula yields a nonzero result iff one of the bytes is zero.
27694 This saves three branches inside loop and many cycles. */
27695
27696 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
27697 emit_insn (gen_one_cmplsi2 (scratch, scratch));
27698 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
27699 emit_insn (gen_andsi3 (tmpreg, tmpreg,
27700 gen_int_mode (0x80808080, SImode)));
27701 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
27702 align_4_label);
27703
27704 if (TARGET_CMOVE)
27705 {
27706 rtx reg = gen_reg_rtx (SImode);
27707 rtx reg2 = gen_reg_rtx (Pmode);
27708 emit_move_insn (reg, tmpreg);
27709 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
27710
27711 /* If zero is not in the first two bytes, move two bytes forward. */
27712 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
27713 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
27714 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
27715 emit_insn (gen_rtx_SET (tmpreg,
27716 gen_rtx_IF_THEN_ELSE (SImode, tmp,
27717 reg,
27718 tmpreg)));
27719 /* Emit lea manually to avoid clobbering of flags. */
27720 emit_insn (gen_rtx_SET (reg2, gen_rtx_PLUS (Pmode, out, const2_rtx)));
27721
27722 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
27723 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
27724 emit_insn (gen_rtx_SET (out,
27725 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
27726 reg2,
27727 out)));
27728 }
27729 else
27730 {
27731 rtx_code_label *end_2_label = gen_label_rtx ();
27732 /* Is zero in the first two bytes? */
27733
27734 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
27735 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
27736 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
27737 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
27738 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
27739 pc_rtx);
27740 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
27741 JUMP_LABEL (tmp) = end_2_label;
27742
27743 /* Not in the first two. Move two bytes forward. */
27744 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
27745 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
27746
27747 emit_label (end_2_label);
27748
27749 }
27750
27751 /* Avoid branch in fixing the byte. */
27752 tmpreg = gen_lowpart (QImode, tmpreg);
27753 emit_insn (gen_addqi3_cconly_overflow (tmpreg, tmpreg));
27754 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
27755 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
27756 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
27757
27758 emit_label (end_0_label);
27759 }
27760
27761 /* Expand strlen. */
27762
27763 bool
27764 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
27765 {
27766 rtx addr, scratch1, scratch2, scratch3, scratch4;
27767
27768 /* The generic case of strlen expander is long. Avoid it's
27769 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
27770
27771 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
27772 && !TARGET_INLINE_ALL_STRINGOPS
27773 && !optimize_insn_for_size_p ()
27774 && (!CONST_INT_P (align) || INTVAL (align) < 4))
27775 return false;
27776
27777 addr = force_reg (Pmode, XEXP (src, 0));
27778 scratch1 = gen_reg_rtx (Pmode);
27779
27780 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
27781 && !optimize_insn_for_size_p ())
27782 {
27783 /* Well it seems that some optimizer does not combine a call like
27784 foo(strlen(bar), strlen(bar));
27785 when the move and the subtraction is done here. It does calculate
27786 the length just once when these instructions are done inside of
27787 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
27788 often used and I use one fewer register for the lifetime of
27789 output_strlen_unroll() this is better. */
27790
27791 emit_move_insn (out, addr);
27792
27793 ix86_expand_strlensi_unroll_1 (out, src, align);
27794
27795 /* strlensi_unroll_1 returns the address of the zero at the end of
27796 the string, like memchr(), so compute the length by subtracting
27797 the start address. */
27798 emit_insn (ix86_gen_sub3 (out, out, addr));
27799 }
27800 else
27801 {
27802 rtx unspec;
27803
27804 /* Can't use this if the user has appropriated eax, ecx, or edi. */
27805 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
27806 return false;
27807 /* Can't use this for non-default address spaces. */
27808 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src)))
27809 return false;
27810
27811 scratch2 = gen_reg_rtx (Pmode);
27812 scratch3 = gen_reg_rtx (Pmode);
27813 scratch4 = force_reg (Pmode, constm1_rtx);
27814
27815 emit_move_insn (scratch3, addr);
27816 eoschar = force_reg (QImode, eoschar);
27817
27818 src = replace_equiv_address_nv (src, scratch3);
27819
27820 /* If .md starts supporting :P, this can be done in .md. */
27821 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
27822 scratch4), UNSPEC_SCAS);
27823 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
27824 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
27825 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
27826 }
27827 return true;
27828 }
27829
27830 /* For given symbol (function) construct code to compute address of it's PLT
27831 entry in large x86-64 PIC model. */
27832 static rtx
27833 construct_plt_address (rtx symbol)
27834 {
27835 rtx tmp, unspec;
27836
27837 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
27838 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
27839 gcc_assert (Pmode == DImode);
27840
27841 tmp = gen_reg_rtx (Pmode);
27842 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
27843
27844 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
27845 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
27846 return tmp;
27847 }
27848
27849 rtx
27850 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
27851 rtx callarg2,
27852 rtx pop, bool sibcall)
27853 {
27854 rtx vec[3];
27855 rtx use = NULL, call;
27856 unsigned int vec_len = 0;
27857 tree fndecl;
27858
27859 if (GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
27860 {
27861 fndecl = SYMBOL_REF_DECL (XEXP (fnaddr, 0));
27862 if (fndecl
27863 && (lookup_attribute ("interrupt",
27864 TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))))
27865 error ("interrupt service routine can't be called directly");
27866 }
27867 else
27868 fndecl = NULL_TREE;
27869
27870 if (pop == const0_rtx)
27871 pop = NULL;
27872 gcc_assert (!TARGET_64BIT || !pop);
27873
27874 if (TARGET_MACHO && !TARGET_64BIT)
27875 {
27876 #if TARGET_MACHO
27877 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
27878 fnaddr = machopic_indirect_call_target (fnaddr);
27879 #endif
27880 }
27881 else
27882 {
27883 /* Static functions and indirect calls don't need the pic register. Also,
27884 check if PLT was explicitly avoided via no-plt or "noplt" attribute, making
27885 it an indirect call. */
27886 rtx addr = XEXP (fnaddr, 0);
27887 if (flag_pic
27888 && GET_CODE (addr) == SYMBOL_REF
27889 && !SYMBOL_REF_LOCAL_P (addr))
27890 {
27891 if (flag_plt
27892 && (SYMBOL_REF_DECL (addr) == NULL_TREE
27893 || !lookup_attribute ("noplt",
27894 DECL_ATTRIBUTES (SYMBOL_REF_DECL (addr)))))
27895 {
27896 if (!TARGET_64BIT
27897 || (ix86_cmodel == CM_LARGE_PIC
27898 && DEFAULT_ABI != MS_ABI))
27899 {
27900 use_reg (&use, gen_rtx_REG (Pmode,
27901 REAL_PIC_OFFSET_TABLE_REGNUM));
27902 if (ix86_use_pseudo_pic_reg ())
27903 emit_move_insn (gen_rtx_REG (Pmode,
27904 REAL_PIC_OFFSET_TABLE_REGNUM),
27905 pic_offset_table_rtx);
27906 }
27907 }
27908 else if (!TARGET_PECOFF && !TARGET_MACHO)
27909 {
27910 if (TARGET_64BIT)
27911 {
27912 fnaddr = gen_rtx_UNSPEC (Pmode,
27913 gen_rtvec (1, addr),
27914 UNSPEC_GOTPCREL);
27915 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
27916 }
27917 else
27918 {
27919 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
27920 UNSPEC_GOT);
27921 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
27922 fnaddr = gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
27923 fnaddr);
27924 }
27925 fnaddr = gen_const_mem (Pmode, fnaddr);
27926 /* Pmode may not be the same as word_mode for x32, which
27927 doesn't support indirect branch via 32-bit memory slot.
27928 Since x32 GOT slot is 64 bit with zero upper 32 bits,
27929 indirect branch via x32 GOT slot is OK. */
27930 if (GET_MODE (fnaddr) != word_mode)
27931 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
27932 fnaddr = gen_rtx_MEM (QImode, fnaddr);
27933 }
27934 }
27935 }
27936
27937 /* Skip setting up RAX register for -mskip-rax-setup when there are no
27938 parameters passed in vector registers. */
27939 if (TARGET_64BIT
27940 && (INTVAL (callarg2) > 0
27941 || (INTVAL (callarg2) == 0
27942 && (TARGET_SSE || !flag_skip_rax_setup))))
27943 {
27944 rtx al = gen_rtx_REG (QImode, AX_REG);
27945 emit_move_insn (al, callarg2);
27946 use_reg (&use, al);
27947 }
27948
27949 if (ix86_cmodel == CM_LARGE_PIC
27950 && !TARGET_PECOFF
27951 && MEM_P (fnaddr)
27952 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
27953 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
27954 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
27955 /* Since x32 GOT slot is 64 bit with zero upper 32 bits, indirect
27956 branch via x32 GOT slot is OK. */
27957 else if (!(TARGET_X32
27958 && MEM_P (fnaddr)
27959 && GET_CODE (XEXP (fnaddr, 0)) == ZERO_EXTEND
27960 && GOT_memory_operand (XEXP (XEXP (fnaddr, 0), 0), Pmode))
27961 && (sibcall
27962 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
27963 : !call_insn_operand (XEXP (fnaddr, 0), word_mode)))
27964 {
27965 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
27966 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
27967 }
27968
27969 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
27970
27971 if (retval)
27972 {
27973 /* We should add bounds as destination register in case
27974 pointer with bounds may be returned. */
27975 if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval)))
27976 {
27977 rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
27978 rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
27979 if (GET_CODE (retval) == PARALLEL)
27980 {
27981 b0 = gen_rtx_EXPR_LIST (VOIDmode, b0, const0_rtx);
27982 b1 = gen_rtx_EXPR_LIST (VOIDmode, b1, const0_rtx);
27983 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, b0, b1));
27984 retval = chkp_join_splitted_slot (retval, par);
27985 }
27986 else
27987 {
27988 retval = gen_rtx_PARALLEL (VOIDmode,
27989 gen_rtvec (3, retval, b0, b1));
27990 chkp_put_regs_to_expr_list (retval);
27991 }
27992 }
27993
27994 call = gen_rtx_SET (retval, call);
27995 }
27996 vec[vec_len++] = call;
27997
27998 if (pop)
27999 {
28000 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
28001 pop = gen_rtx_SET (stack_pointer_rtx, pop);
28002 vec[vec_len++] = pop;
28003 }
28004
28005 if (cfun->machine->no_caller_saved_registers
28006 && (!fndecl
28007 || (!TREE_THIS_VOLATILE (fndecl)
28008 && !lookup_attribute ("no_caller_saved_registers",
28009 TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))))
28010 {
28011 static const char ix86_call_used_regs[] = CALL_USED_REGISTERS;
28012 bool is_64bit_ms_abi = (TARGET_64BIT
28013 && ix86_function_abi (fndecl) == MS_ABI);
28014 char c_mask = CALL_USED_REGISTERS_MASK (is_64bit_ms_abi);
28015
28016 /* If there are no caller-saved registers, add all registers
28017 that are clobbered by the call which returns. */
28018 for (int i = 0; i < FIRST_PSEUDO_REGISTER; i++)
28019 if (!fixed_regs[i]
28020 && (ix86_call_used_regs[i] == 1
28021 || (ix86_call_used_regs[i] & c_mask))
28022 && !STACK_REGNO_P (i)
28023 && !MMX_REGNO_P (i))
28024 clobber_reg (&use,
28025 gen_rtx_REG (GET_MODE (regno_reg_rtx[i]), i));
28026 }
28027 else if (TARGET_64BIT_MS_ABI
28028 && (!callarg2 || INTVAL (callarg2) != -2))
28029 {
28030 unsigned i;
28031
28032 for (i = 0; i < NUM_X86_64_MS_CLOBBERED_REGS; i++)
28033 {
28034 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
28035 machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
28036
28037 clobber_reg (&use, gen_rtx_REG (mode, regno));
28038 }
28039
28040 /* Set here, but it may get cleared later. */
28041 if (TARGET_CALL_MS2SYSV_XLOGUES)
28042 {
28043 if (!TARGET_SSE)
28044 ;
28045
28046 /* Don't break hot-patched functions. */
28047 else if (ix86_function_ms_hook_prologue (current_function_decl))
28048 ;
28049
28050 /* TODO: Cases not yet examined. */
28051 else if (flag_split_stack)
28052 warn_once_call_ms2sysv_xlogues ("-fsplit-stack");
28053
28054 else
28055 {
28056 gcc_assert (!reload_completed);
28057 cfun->machine->call_ms2sysv = true;
28058 }
28059 }
28060 }
28061
28062 if (vec_len > 1)
28063 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
28064 call = emit_call_insn (call);
28065 if (use)
28066 CALL_INSN_FUNCTION_USAGE (call) = use;
28067
28068 return call;
28069 }
28070
28071 /* Return true if the function being called was marked with attribute
28072 "noplt" or using -fno-plt and we are compiling for non-PIC. We need
28073 to handle the non-PIC case in the backend because there is no easy
28074 interface for the front-end to force non-PLT calls to use the GOT.
28075 This is currently used only with 64-bit or 32-bit GOT32X ELF targets
28076 to call the function marked "noplt" indirectly. */
28077
28078 static bool
28079 ix86_nopic_noplt_attribute_p (rtx call_op)
28080 {
28081 if (flag_pic || ix86_cmodel == CM_LARGE
28082 || !(TARGET_64BIT || HAVE_AS_IX86_GOT32X)
28083 || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
28084 || SYMBOL_REF_LOCAL_P (call_op))
28085 return false;
28086
28087 tree symbol_decl = SYMBOL_REF_DECL (call_op);
28088
28089 if (!flag_plt
28090 || (symbol_decl != NULL_TREE
28091 && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
28092 return true;
28093
28094 return false;
28095 }
28096
28097 /* Output the assembly for a call instruction. */
28098
28099 const char *
28100 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
28101 {
28102 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
28103 bool seh_nop_p = false;
28104 const char *xasm;
28105
28106 if (SIBLING_CALL_P (insn))
28107 {
28108 if (direct_p)
28109 {
28110 if (ix86_nopic_noplt_attribute_p (call_op))
28111 {
28112 if (TARGET_64BIT)
28113 xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
28114 else
28115 xasm = "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
28116 }
28117 else
28118 xasm = "%!jmp\t%P0";
28119 }
28120 /* SEH epilogue detection requires the indirect branch case
28121 to include REX.W. */
28122 else if (TARGET_SEH)
28123 xasm = "%!rex.W jmp\t%A0";
28124 else
28125 xasm = "%!jmp\t%A0";
28126
28127 output_asm_insn (xasm, &call_op);
28128 return "";
28129 }
28130
28131 /* SEH unwinding can require an extra nop to be emitted in several
28132 circumstances. Determine if we have one of those. */
28133 if (TARGET_SEH)
28134 {
28135 rtx_insn *i;
28136
28137 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
28138 {
28139 /* If we get to another real insn, we don't need the nop. */
28140 if (INSN_P (i))
28141 break;
28142
28143 /* If we get to the epilogue note, prevent a catch region from
28144 being adjacent to the standard epilogue sequence. If non-
28145 call-exceptions, we'll have done this during epilogue emission. */
28146 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
28147 && !flag_non_call_exceptions
28148 && !can_throw_internal (insn))
28149 {
28150 seh_nop_p = true;
28151 break;
28152 }
28153 }
28154
28155 /* If we didn't find a real insn following the call, prevent the
28156 unwinder from looking into the next function. */
28157 if (i == NULL)
28158 seh_nop_p = true;
28159 }
28160
28161 if (direct_p)
28162 {
28163 if (ix86_nopic_noplt_attribute_p (call_op))
28164 {
28165 if (TARGET_64BIT)
28166 xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
28167 else
28168 xasm = "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
28169 }
28170 else
28171 xasm = "%!call\t%P0";
28172 }
28173 else
28174 xasm = "%!call\t%A0";
28175
28176 output_asm_insn (xasm, &call_op);
28177
28178 if (seh_nop_p)
28179 return "nop";
28180
28181 return "";
28182 }
28183 \f
28184 /* Clear stack slot assignments remembered from previous functions.
28185 This is called from INIT_EXPANDERS once before RTL is emitted for each
28186 function. */
28187
28188 static struct machine_function *
28189 ix86_init_machine_status (void)
28190 {
28191 struct machine_function *f;
28192
28193 f = ggc_cleared_alloc<machine_function> ();
28194 f->call_abi = ix86_abi;
28195
28196 return f;
28197 }
28198
28199 /* Return a MEM corresponding to a stack slot with mode MODE.
28200 Allocate a new slot if necessary.
28201
28202 The RTL for a function can have several slots available: N is
28203 which slot to use. */
28204
28205 rtx
28206 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
28207 {
28208 struct stack_local_entry *s;
28209
28210 gcc_assert (n < MAX_386_STACK_LOCALS);
28211
28212 for (s = ix86_stack_locals; s; s = s->next)
28213 if (s->mode == mode && s->n == n)
28214 return validize_mem (copy_rtx (s->rtl));
28215
28216 s = ggc_alloc<stack_local_entry> ();
28217 s->n = n;
28218 s->mode = mode;
28219 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
28220
28221 s->next = ix86_stack_locals;
28222 ix86_stack_locals = s;
28223 return validize_mem (copy_rtx (s->rtl));
28224 }
28225
28226 static void
28227 ix86_instantiate_decls (void)
28228 {
28229 struct stack_local_entry *s;
28230
28231 for (s = ix86_stack_locals; s; s = s->next)
28232 if (s->rtl != NULL_RTX)
28233 instantiate_decl_rtl (s->rtl);
28234 }
28235 \f
28236 /* Return the number used for encoding REG, in the range 0..7. */
28237
28238 static int
28239 reg_encoded_number (rtx reg)
28240 {
28241 unsigned regno = REGNO (reg);
28242 switch (regno)
28243 {
28244 case AX_REG:
28245 return 0;
28246 case CX_REG:
28247 return 1;
28248 case DX_REG:
28249 return 2;
28250 case BX_REG:
28251 return 3;
28252 case SP_REG:
28253 return 4;
28254 case BP_REG:
28255 return 5;
28256 case SI_REG:
28257 return 6;
28258 case DI_REG:
28259 return 7;
28260 default:
28261 break;
28262 }
28263 if (IN_RANGE (regno, FIRST_STACK_REG, LAST_STACK_REG))
28264 return regno - FIRST_STACK_REG;
28265 if (IN_RANGE (regno, FIRST_SSE_REG, LAST_SSE_REG))
28266 return regno - FIRST_SSE_REG;
28267 if (IN_RANGE (regno, FIRST_MMX_REG, LAST_MMX_REG))
28268 return regno - FIRST_MMX_REG;
28269 if (IN_RANGE (regno, FIRST_REX_SSE_REG, LAST_REX_SSE_REG))
28270 return regno - FIRST_REX_SSE_REG;
28271 if (IN_RANGE (regno, FIRST_REX_INT_REG, LAST_REX_INT_REG))
28272 return regno - FIRST_REX_INT_REG;
28273 if (IN_RANGE (regno, FIRST_MASK_REG, LAST_MASK_REG))
28274 return regno - FIRST_MASK_REG;
28275 if (IN_RANGE (regno, FIRST_BND_REG, LAST_BND_REG))
28276 return regno - FIRST_BND_REG;
28277 return -1;
28278 }
28279
28280 /* Given an insn INSN with NOPERANDS OPERANDS, return the modr/m byte used
28281 in its encoding if it could be relevant for ROP mitigation, otherwise
28282 return -1. If POPNO0 and POPNO1 are nonnull, store the operand numbers
28283 used for calculating it into them. */
28284
28285 static int
28286 ix86_get_modrm_for_rop (rtx_insn *insn, rtx *operands, int noperands,
28287 int *popno0 = 0, int *popno1 = 0)
28288 {
28289 if (asm_noperands (PATTERN (insn)) >= 0)
28290 return -1;
28291 int has_modrm = get_attr_modrm (insn);
28292 if (!has_modrm)
28293 return -1;
28294 enum attr_modrm_class cls = get_attr_modrm_class (insn);
28295 rtx op0, op1;
28296 switch (cls)
28297 {
28298 case MODRM_CLASS_OP02:
28299 gcc_assert (noperands >= 3);
28300 if (popno0)
28301 {
28302 *popno0 = 0;
28303 *popno1 = 2;
28304 }
28305 op0 = operands[0];
28306 op1 = operands[2];
28307 break;
28308 case MODRM_CLASS_OP01:
28309 gcc_assert (noperands >= 2);
28310 if (popno0)
28311 {
28312 *popno0 = 0;
28313 *popno1 = 1;
28314 }
28315 op0 = operands[0];
28316 op1 = operands[1];
28317 break;
28318 default:
28319 return -1;
28320 }
28321 if (REG_P (op0) && REG_P (op1))
28322 {
28323 int enc0 = reg_encoded_number (op0);
28324 int enc1 = reg_encoded_number (op1);
28325 return 0xc0 + (enc1 << 3) + enc0;
28326 }
28327 return -1;
28328 }
28329
28330 /* Check whether x86 address PARTS is a pc-relative address. */
28331
28332 bool
28333 ix86_rip_relative_addr_p (struct ix86_address *parts)
28334 {
28335 rtx base, index, disp;
28336
28337 base = parts->base;
28338 index = parts->index;
28339 disp = parts->disp;
28340
28341 if (disp && !base && !index)
28342 {
28343 if (TARGET_64BIT)
28344 {
28345 rtx symbol = disp;
28346
28347 if (GET_CODE (disp) == CONST)
28348 symbol = XEXP (disp, 0);
28349 if (GET_CODE (symbol) == PLUS
28350 && CONST_INT_P (XEXP (symbol, 1)))
28351 symbol = XEXP (symbol, 0);
28352
28353 if (GET_CODE (symbol) == LABEL_REF
28354 || (GET_CODE (symbol) == SYMBOL_REF
28355 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
28356 || (GET_CODE (symbol) == UNSPEC
28357 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
28358 || XINT (symbol, 1) == UNSPEC_PCREL
28359 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
28360 return true;
28361 }
28362 }
28363 return false;
28364 }
28365
28366 /* Calculate the length of the memory address in the instruction encoding.
28367 Includes addr32 prefix, does not include the one-byte modrm, opcode,
28368 or other prefixes. We never generate addr32 prefix for LEA insn. */
28369
28370 int
28371 memory_address_length (rtx addr, bool lea)
28372 {
28373 struct ix86_address parts;
28374 rtx base, index, disp;
28375 int len;
28376 int ok;
28377
28378 if (GET_CODE (addr) == PRE_DEC
28379 || GET_CODE (addr) == POST_INC
28380 || GET_CODE (addr) == PRE_MODIFY
28381 || GET_CODE (addr) == POST_MODIFY)
28382 return 0;
28383
28384 ok = ix86_decompose_address (addr, &parts);
28385 gcc_assert (ok);
28386
28387 len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1;
28388
28389 /* If this is not LEA instruction, add the length of addr32 prefix. */
28390 if (TARGET_64BIT && !lea
28391 && (SImode_address_operand (addr, VOIDmode)
28392 || (parts.base && GET_MODE (parts.base) == SImode)
28393 || (parts.index && GET_MODE (parts.index) == SImode)))
28394 len++;
28395
28396 base = parts.base;
28397 index = parts.index;
28398 disp = parts.disp;
28399
28400 if (base && SUBREG_P (base))
28401 base = SUBREG_REG (base);
28402 if (index && SUBREG_P (index))
28403 index = SUBREG_REG (index);
28404
28405 gcc_assert (base == NULL_RTX || REG_P (base));
28406 gcc_assert (index == NULL_RTX || REG_P (index));
28407
28408 /* Rule of thumb:
28409 - esp as the base always wants an index,
28410 - ebp as the base always wants a displacement,
28411 - r12 as the base always wants an index,
28412 - r13 as the base always wants a displacement. */
28413
28414 /* Register Indirect. */
28415 if (base && !index && !disp)
28416 {
28417 /* esp (for its index) and ebp (for its displacement) need
28418 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
28419 code. */
28420 if (base == arg_pointer_rtx
28421 || base == frame_pointer_rtx
28422 || REGNO (base) == SP_REG
28423 || REGNO (base) == BP_REG
28424 || REGNO (base) == R12_REG
28425 || REGNO (base) == R13_REG)
28426 len++;
28427 }
28428
28429 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
28430 is not disp32, but disp32(%rip), so for disp32
28431 SIB byte is needed, unless print_operand_address
28432 optimizes it into disp32(%rip) or (%rip) is implied
28433 by UNSPEC. */
28434 else if (disp && !base && !index)
28435 {
28436 len += 4;
28437 if (!ix86_rip_relative_addr_p (&parts))
28438 len++;
28439 }
28440 else
28441 {
28442 /* Find the length of the displacement constant. */
28443 if (disp)
28444 {
28445 if (base && satisfies_constraint_K (disp))
28446 len += 1;
28447 else
28448 len += 4;
28449 }
28450 /* ebp always wants a displacement. Similarly r13. */
28451 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
28452 len++;
28453
28454 /* An index requires the two-byte modrm form.... */
28455 if (index
28456 /* ...like esp (or r12), which always wants an index. */
28457 || base == arg_pointer_rtx
28458 || base == frame_pointer_rtx
28459 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
28460 len++;
28461 }
28462
28463 return len;
28464 }
28465
28466 /* Compute default value for "length_immediate" attribute. When SHORTFORM
28467 is set, expect that insn have 8bit immediate alternative. */
28468 int
28469 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
28470 {
28471 int len = 0;
28472 int i;
28473 extract_insn_cached (insn);
28474 for (i = recog_data.n_operands - 1; i >= 0; --i)
28475 if (CONSTANT_P (recog_data.operand[i]))
28476 {
28477 enum attr_mode mode = get_attr_mode (insn);
28478
28479 gcc_assert (!len);
28480 if (shortform && CONST_INT_P (recog_data.operand[i]))
28481 {
28482 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
28483 switch (mode)
28484 {
28485 case MODE_QI:
28486 len = 1;
28487 continue;
28488 case MODE_HI:
28489 ival = trunc_int_for_mode (ival, HImode);
28490 break;
28491 case MODE_SI:
28492 ival = trunc_int_for_mode (ival, SImode);
28493 break;
28494 default:
28495 break;
28496 }
28497 if (IN_RANGE (ival, -128, 127))
28498 {
28499 len = 1;
28500 continue;
28501 }
28502 }
28503 switch (mode)
28504 {
28505 case MODE_QI:
28506 len = 1;
28507 break;
28508 case MODE_HI:
28509 len = 2;
28510 break;
28511 case MODE_SI:
28512 len = 4;
28513 break;
28514 /* Immediates for DImode instructions are encoded
28515 as 32bit sign extended values. */
28516 case MODE_DI:
28517 len = 4;
28518 break;
28519 default:
28520 fatal_insn ("unknown insn mode", insn);
28521 }
28522 }
28523 return len;
28524 }
28525
28526 /* Compute default value for "length_address" attribute. */
28527 int
28528 ix86_attr_length_address_default (rtx_insn *insn)
28529 {
28530 int i;
28531
28532 if (get_attr_type (insn) == TYPE_LEA)
28533 {
28534 rtx set = PATTERN (insn), addr;
28535
28536 if (GET_CODE (set) == PARALLEL)
28537 set = XVECEXP (set, 0, 0);
28538
28539 gcc_assert (GET_CODE (set) == SET);
28540
28541 addr = SET_SRC (set);
28542
28543 return memory_address_length (addr, true);
28544 }
28545
28546 extract_insn_cached (insn);
28547 for (i = recog_data.n_operands - 1; i >= 0; --i)
28548 {
28549 rtx op = recog_data.operand[i];
28550 if (MEM_P (op))
28551 {
28552 constrain_operands_cached (insn, reload_completed);
28553 if (which_alternative != -1)
28554 {
28555 const char *constraints = recog_data.constraints[i];
28556 int alt = which_alternative;
28557
28558 while (*constraints == '=' || *constraints == '+')
28559 constraints++;
28560 while (alt-- > 0)
28561 while (*constraints++ != ',')
28562 ;
28563 /* Skip ignored operands. */
28564 if (*constraints == 'X')
28565 continue;
28566 }
28567
28568 int len = memory_address_length (XEXP (op, 0), false);
28569
28570 /* Account for segment prefix for non-default addr spaces. */
28571 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op)))
28572 len++;
28573
28574 return len;
28575 }
28576 }
28577 return 0;
28578 }
28579
28580 /* Compute default value for "length_vex" attribute. It includes
28581 2 or 3 byte VEX prefix and 1 opcode byte. */
28582
28583 int
28584 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
28585 bool has_vex_w)
28586 {
28587 int i;
28588
28589 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
28590 byte VEX prefix. */
28591 if (!has_0f_opcode || has_vex_w)
28592 return 3 + 1;
28593
28594 /* We can always use 2 byte VEX prefix in 32bit. */
28595 if (!TARGET_64BIT)
28596 return 2 + 1;
28597
28598 extract_insn_cached (insn);
28599
28600 for (i = recog_data.n_operands - 1; i >= 0; --i)
28601 if (REG_P (recog_data.operand[i]))
28602 {
28603 /* REX.W bit uses 3 byte VEX prefix. */
28604 if (GET_MODE (recog_data.operand[i]) == DImode
28605 && GENERAL_REG_P (recog_data.operand[i]))
28606 return 3 + 1;
28607 }
28608 else
28609 {
28610 /* REX.X or REX.B bits use 3 byte VEX prefix. */
28611 if (MEM_P (recog_data.operand[i])
28612 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
28613 return 3 + 1;
28614 }
28615
28616 return 2 + 1;
28617 }
28618 \f
28619
28620 static bool
28621 ix86_class_likely_spilled_p (reg_class_t);
28622
28623 /* Returns true if lhs of insn is HW function argument register and set up
28624 is_spilled to true if it is likely spilled HW register. */
28625 static bool
28626 insn_is_function_arg (rtx insn, bool* is_spilled)
28627 {
28628 rtx dst;
28629
28630 if (!NONDEBUG_INSN_P (insn))
28631 return false;
28632 /* Call instructions are not movable, ignore it. */
28633 if (CALL_P (insn))
28634 return false;
28635 insn = PATTERN (insn);
28636 if (GET_CODE (insn) == PARALLEL)
28637 insn = XVECEXP (insn, 0, 0);
28638 if (GET_CODE (insn) != SET)
28639 return false;
28640 dst = SET_DEST (insn);
28641 if (REG_P (dst) && HARD_REGISTER_P (dst)
28642 && ix86_function_arg_regno_p (REGNO (dst)))
28643 {
28644 /* Is it likely spilled HW register? */
28645 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
28646 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
28647 *is_spilled = true;
28648 return true;
28649 }
28650 return false;
28651 }
28652
28653 /* Add output dependencies for chain of function adjacent arguments if only
28654 there is a move to likely spilled HW register. Return first argument
28655 if at least one dependence was added or NULL otherwise. */
28656 static rtx_insn *
28657 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
28658 {
28659 rtx_insn *insn;
28660 rtx_insn *last = call;
28661 rtx_insn *first_arg = NULL;
28662 bool is_spilled = false;
28663
28664 head = PREV_INSN (head);
28665
28666 /* Find nearest to call argument passing instruction. */
28667 while (true)
28668 {
28669 last = PREV_INSN (last);
28670 if (last == head)
28671 return NULL;
28672 if (!NONDEBUG_INSN_P (last))
28673 continue;
28674 if (insn_is_function_arg (last, &is_spilled))
28675 break;
28676 return NULL;
28677 }
28678
28679 first_arg = last;
28680 while (true)
28681 {
28682 insn = PREV_INSN (last);
28683 if (!INSN_P (insn))
28684 break;
28685 if (insn == head)
28686 break;
28687 if (!NONDEBUG_INSN_P (insn))
28688 {
28689 last = insn;
28690 continue;
28691 }
28692 if (insn_is_function_arg (insn, &is_spilled))
28693 {
28694 /* Add output depdendence between two function arguments if chain
28695 of output arguments contains likely spilled HW registers. */
28696 if (is_spilled)
28697 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
28698 first_arg = last = insn;
28699 }
28700 else
28701 break;
28702 }
28703 if (!is_spilled)
28704 return NULL;
28705 return first_arg;
28706 }
28707
28708 /* Add output or anti dependency from insn to first_arg to restrict its code
28709 motion. */
28710 static void
28711 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
28712 {
28713 rtx set;
28714 rtx tmp;
28715
28716 /* Add anti dependencies for bounds stores. */
28717 if (INSN_P (insn)
28718 && GET_CODE (PATTERN (insn)) == PARALLEL
28719 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
28720 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_BNDSTX)
28721 {
28722 add_dependence (first_arg, insn, REG_DEP_ANTI);
28723 return;
28724 }
28725
28726 set = single_set (insn);
28727 if (!set)
28728 return;
28729 tmp = SET_DEST (set);
28730 if (REG_P (tmp))
28731 {
28732 /* Add output dependency to the first function argument. */
28733 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
28734 return;
28735 }
28736 /* Add anti dependency. */
28737 add_dependence (first_arg, insn, REG_DEP_ANTI);
28738 }
28739
28740 /* Avoid cross block motion of function argument through adding dependency
28741 from the first non-jump instruction in bb. */
28742 static void
28743 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
28744 {
28745 rtx_insn *insn = BB_END (bb);
28746
28747 while (insn)
28748 {
28749 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
28750 {
28751 rtx set = single_set (insn);
28752 if (set)
28753 {
28754 avoid_func_arg_motion (arg, insn);
28755 return;
28756 }
28757 }
28758 if (insn == BB_HEAD (bb))
28759 return;
28760 insn = PREV_INSN (insn);
28761 }
28762 }
28763
28764 /* Hook for pre-reload schedule - avoid motion of function arguments
28765 passed in likely spilled HW registers. */
28766 static void
28767 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
28768 {
28769 rtx_insn *insn;
28770 rtx_insn *first_arg = NULL;
28771 if (reload_completed)
28772 return;
28773 while (head != tail && DEBUG_INSN_P (head))
28774 head = NEXT_INSN (head);
28775 for (insn = tail; insn != head; insn = PREV_INSN (insn))
28776 if (INSN_P (insn) && CALL_P (insn))
28777 {
28778 first_arg = add_parameter_dependencies (insn, head);
28779 if (first_arg)
28780 {
28781 /* Add dependee for first argument to predecessors if only
28782 region contains more than one block. */
28783 basic_block bb = BLOCK_FOR_INSN (insn);
28784 int rgn = CONTAINING_RGN (bb->index);
28785 int nr_blks = RGN_NR_BLOCKS (rgn);
28786 /* Skip trivial regions and region head blocks that can have
28787 predecessors outside of region. */
28788 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
28789 {
28790 edge e;
28791 edge_iterator ei;
28792
28793 /* Regions are SCCs with the exception of selective
28794 scheduling with pipelining of outer blocks enabled.
28795 So also check that immediate predecessors of a non-head
28796 block are in the same region. */
28797 FOR_EACH_EDGE (e, ei, bb->preds)
28798 {
28799 /* Avoid creating of loop-carried dependencies through
28800 using topological ordering in the region. */
28801 if (rgn == CONTAINING_RGN (e->src->index)
28802 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
28803 add_dependee_for_func_arg (first_arg, e->src);
28804 }
28805 }
28806 insn = first_arg;
28807 if (insn == head)
28808 break;
28809 }
28810 }
28811 else if (first_arg)
28812 avoid_func_arg_motion (first_arg, insn);
28813 }
28814
28815 /* Hook for pre-reload schedule - set priority of moves from likely spilled
28816 HW registers to maximum, to schedule them at soon as possible. These are
28817 moves from function argument registers at the top of the function entry
28818 and moves from function return value registers after call. */
28819 static int
28820 ix86_adjust_priority (rtx_insn *insn, int priority)
28821 {
28822 rtx set;
28823
28824 if (reload_completed)
28825 return priority;
28826
28827 if (!NONDEBUG_INSN_P (insn))
28828 return priority;
28829
28830 set = single_set (insn);
28831 if (set)
28832 {
28833 rtx tmp = SET_SRC (set);
28834 if (REG_P (tmp)
28835 && HARD_REGISTER_P (tmp)
28836 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
28837 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
28838 return current_sched_info->sched_max_insns_priority;
28839 }
28840
28841 return priority;
28842 }
28843
28844 /* Prepare for scheduling pass. */
28845 static void
28846 ix86_sched_init_global (FILE *, int, int)
28847 {
28848 /* Install scheduling hooks for current CPU. Some of these hooks are used
28849 in time-critical parts of the scheduler, so we only set them up when
28850 they are actually used. */
28851 switch (ix86_tune)
28852 {
28853 case PROCESSOR_CORE2:
28854 case PROCESSOR_NEHALEM:
28855 case PROCESSOR_SANDYBRIDGE:
28856 case PROCESSOR_HASWELL:
28857 case PROCESSOR_GENERIC:
28858 /* Do not perform multipass scheduling for pre-reload schedule
28859 to save compile time. */
28860 if (reload_completed)
28861 {
28862 ix86_core2i7_init_hooks ();
28863 break;
28864 }
28865 /* Fall through. */
28866 default:
28867 targetm.sched.dfa_post_advance_cycle = NULL;
28868 targetm.sched.first_cycle_multipass_init = NULL;
28869 targetm.sched.first_cycle_multipass_begin = NULL;
28870 targetm.sched.first_cycle_multipass_issue = NULL;
28871 targetm.sched.first_cycle_multipass_backtrack = NULL;
28872 targetm.sched.first_cycle_multipass_end = NULL;
28873 targetm.sched.first_cycle_multipass_fini = NULL;
28874 break;
28875 }
28876 }
28877
28878 \f
28879 /* Implement TARGET_STATIC_RTX_ALIGNMENT. */
28880
28881 static HOST_WIDE_INT
28882 ix86_static_rtx_alignment (machine_mode mode)
28883 {
28884 if (mode == DFmode)
28885 return 64;
28886 if (ALIGN_MODE_128 (mode))
28887 return MAX (128, GET_MODE_ALIGNMENT (mode));
28888 return GET_MODE_ALIGNMENT (mode);
28889 }
28890
28891 /* Implement TARGET_CONSTANT_ALIGNMENT. */
28892
28893 static HOST_WIDE_INT
28894 ix86_constant_alignment (const_tree exp, HOST_WIDE_INT align)
28895 {
28896 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
28897 || TREE_CODE (exp) == INTEGER_CST)
28898 {
28899 machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
28900 HOST_WIDE_INT mode_align = ix86_static_rtx_alignment (mode);
28901 return MAX (mode_align, align);
28902 }
28903 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
28904 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
28905 return BITS_PER_WORD;
28906
28907 return align;
28908 }
28909
28910 /* Implement TARGET_EMPTY_RECORD_P. */
28911
28912 static bool
28913 ix86_is_empty_record (const_tree type)
28914 {
28915 if (!TARGET_64BIT)
28916 return false;
28917 return default_is_empty_record (type);
28918 }
28919
28920 /* Implement TARGET_WARN_PARAMETER_PASSING_ABI. */
28921
28922 static void
28923 ix86_warn_parameter_passing_abi (cumulative_args_t cum_v, tree type)
28924 {
28925 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
28926
28927 if (!cum->warn_empty)
28928 return;
28929
28930 if (!TYPE_EMPTY_P (type))
28931 return;
28932
28933 const_tree ctx = get_ultimate_context (cum->decl);
28934 if (ctx != NULL_TREE
28935 && !TRANSLATION_UNIT_WARN_EMPTY_P (ctx))
28936 return;
28937
28938 /* If the actual size of the type is zero, then there is no change
28939 in how objects of this size are passed. */
28940 if (int_size_in_bytes (type) == 0)
28941 return;
28942
28943 warning (OPT_Wabi, "empty class %qT parameter passing ABI "
28944 "changes in -fabi-version=12 (GCC 8)", type);
28945
28946 /* Only warn once. */
28947 cum->warn_empty = false;
28948 }
28949
28950 /* Compute the alignment for a variable for Intel MCU psABI. TYPE is
28951 the data type, and ALIGN is the alignment that the object would
28952 ordinarily have. */
28953
28954 static int
28955 iamcu_alignment (tree type, int align)
28956 {
28957 machine_mode mode;
28958
28959 if (align < 32 || TYPE_USER_ALIGN (type))
28960 return align;
28961
28962 /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
28963 bytes. */
28964 mode = TYPE_MODE (strip_array_types (type));
28965 switch (GET_MODE_CLASS (mode))
28966 {
28967 case MODE_INT:
28968 case MODE_COMPLEX_INT:
28969 case MODE_COMPLEX_FLOAT:
28970 case MODE_FLOAT:
28971 case MODE_DECIMAL_FLOAT:
28972 return 32;
28973 default:
28974 return align;
28975 }
28976 }
28977
28978 /* Compute the alignment for a static variable.
28979 TYPE is the data type, and ALIGN is the alignment that
28980 the object would ordinarily have. The value of this function is used
28981 instead of that alignment to align the object. */
28982
28983 int
28984 ix86_data_alignment (tree type, int align, bool opt)
28985 {
28986 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
28987 for symbols from other compilation units or symbols that don't need
28988 to bind locally. In order to preserve some ABI compatibility with
28989 those compilers, ensure we don't decrease alignment from what we
28990 used to assume. */
28991
28992 int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
28993
28994 /* A data structure, equal or greater than the size of a cache line
28995 (64 bytes in the Pentium 4 and other recent Intel processors, including
28996 processors based on Intel Core microarchitecture) should be aligned
28997 so that its base address is a multiple of a cache line size. */
28998
28999 int max_align
29000 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
29001
29002 if (max_align < BITS_PER_WORD)
29003 max_align = BITS_PER_WORD;
29004
29005 switch (ix86_align_data_type)
29006 {
29007 case ix86_align_data_type_abi: opt = false; break;
29008 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
29009 case ix86_align_data_type_cacheline: break;
29010 }
29011
29012 if (TARGET_IAMCU)
29013 align = iamcu_alignment (type, align);
29014
29015 if (opt
29016 && AGGREGATE_TYPE_P (type)
29017 && TYPE_SIZE (type)
29018 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
29019 {
29020 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align_compat)
29021 && align < max_align_compat)
29022 align = max_align_compat;
29023 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align)
29024 && align < max_align)
29025 align = max_align;
29026 }
29027
29028 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
29029 to 16byte boundary. */
29030 if (TARGET_64BIT)
29031 {
29032 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
29033 && TYPE_SIZE (type)
29034 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
29035 && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
29036 && align < 128)
29037 return 128;
29038 }
29039
29040 if (!opt)
29041 return align;
29042
29043 if (TREE_CODE (type) == ARRAY_TYPE)
29044 {
29045 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
29046 return 64;
29047 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
29048 return 128;
29049 }
29050 else if (TREE_CODE (type) == COMPLEX_TYPE)
29051 {
29052
29053 if (TYPE_MODE (type) == DCmode && align < 64)
29054 return 64;
29055 if ((TYPE_MODE (type) == XCmode
29056 || TYPE_MODE (type) == TCmode) && align < 128)
29057 return 128;
29058 }
29059 else if ((TREE_CODE (type) == RECORD_TYPE
29060 || TREE_CODE (type) == UNION_TYPE
29061 || TREE_CODE (type) == QUAL_UNION_TYPE)
29062 && TYPE_FIELDS (type))
29063 {
29064 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
29065 return 64;
29066 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
29067 return 128;
29068 }
29069 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
29070 || TREE_CODE (type) == INTEGER_TYPE)
29071 {
29072 if (TYPE_MODE (type) == DFmode && align < 64)
29073 return 64;
29074 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
29075 return 128;
29076 }
29077
29078 return align;
29079 }
29080
29081 /* Compute the alignment for a local variable or a stack slot. EXP is
29082 the data type or decl itself, MODE is the widest mode available and
29083 ALIGN is the alignment that the object would ordinarily have. The
29084 value of this macro is used instead of that alignment to align the
29085 object. */
29086
29087 unsigned int
29088 ix86_local_alignment (tree exp, machine_mode mode,
29089 unsigned int align)
29090 {
29091 tree type, decl;
29092
29093 if (exp && DECL_P (exp))
29094 {
29095 type = TREE_TYPE (exp);
29096 decl = exp;
29097 }
29098 else
29099 {
29100 type = exp;
29101 decl = NULL;
29102 }
29103
29104 /* Don't do dynamic stack realignment for long long objects with
29105 -mpreferred-stack-boundary=2. */
29106 if (!TARGET_64BIT
29107 && align == 64
29108 && ix86_preferred_stack_boundary < 64
29109 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
29110 && (!type || !TYPE_USER_ALIGN (type))
29111 && (!decl || !DECL_USER_ALIGN (decl)))
29112 align = 32;
29113
29114 /* If TYPE is NULL, we are allocating a stack slot for caller-save
29115 register in MODE. We will return the largest alignment of XF
29116 and DF. */
29117 if (!type)
29118 {
29119 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
29120 align = GET_MODE_ALIGNMENT (DFmode);
29121 return align;
29122 }
29123
29124 /* Don't increase alignment for Intel MCU psABI. */
29125 if (TARGET_IAMCU)
29126 return align;
29127
29128 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
29129 to 16byte boundary. Exact wording is:
29130
29131 An array uses the same alignment as its elements, except that a local or
29132 global array variable of length at least 16 bytes or
29133 a C99 variable-length array variable always has alignment of at least 16 bytes.
29134
29135 This was added to allow use of aligned SSE instructions at arrays. This
29136 rule is meant for static storage (where compiler can not do the analysis
29137 by itself). We follow it for automatic variables only when convenient.
29138 We fully control everything in the function compiled and functions from
29139 other unit can not rely on the alignment.
29140
29141 Exclude va_list type. It is the common case of local array where
29142 we can not benefit from the alignment.
29143
29144 TODO: Probably one should optimize for size only when var is not escaping. */
29145 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
29146 && TARGET_SSE)
29147 {
29148 if (AGGREGATE_TYPE_P (type)
29149 && (va_list_type_node == NULL_TREE
29150 || (TYPE_MAIN_VARIANT (type)
29151 != TYPE_MAIN_VARIANT (va_list_type_node)))
29152 && TYPE_SIZE (type)
29153 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
29154 && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
29155 && align < 128)
29156 return 128;
29157 }
29158 if (TREE_CODE (type) == ARRAY_TYPE)
29159 {
29160 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
29161 return 64;
29162 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
29163 return 128;
29164 }
29165 else if (TREE_CODE (type) == COMPLEX_TYPE)
29166 {
29167 if (TYPE_MODE (type) == DCmode && align < 64)
29168 return 64;
29169 if ((TYPE_MODE (type) == XCmode
29170 || TYPE_MODE (type) == TCmode) && align < 128)
29171 return 128;
29172 }
29173 else if ((TREE_CODE (type) == RECORD_TYPE
29174 || TREE_CODE (type) == UNION_TYPE
29175 || TREE_CODE (type) == QUAL_UNION_TYPE)
29176 && TYPE_FIELDS (type))
29177 {
29178 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
29179 return 64;
29180 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
29181 return 128;
29182 }
29183 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
29184 || TREE_CODE (type) == INTEGER_TYPE)
29185 {
29186
29187 if (TYPE_MODE (type) == DFmode && align < 64)
29188 return 64;
29189 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
29190 return 128;
29191 }
29192 return align;
29193 }
29194
29195 /* Compute the minimum required alignment for dynamic stack realignment
29196 purposes for a local variable, parameter or a stack slot. EXP is
29197 the data type or decl itself, MODE is its mode and ALIGN is the
29198 alignment that the object would ordinarily have. */
29199
29200 unsigned int
29201 ix86_minimum_alignment (tree exp, machine_mode mode,
29202 unsigned int align)
29203 {
29204 tree type, decl;
29205
29206 if (exp && DECL_P (exp))
29207 {
29208 type = TREE_TYPE (exp);
29209 decl = exp;
29210 }
29211 else
29212 {
29213 type = exp;
29214 decl = NULL;
29215 }
29216
29217 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
29218 return align;
29219
29220 /* Don't do dynamic stack realignment for long long objects with
29221 -mpreferred-stack-boundary=2. */
29222 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
29223 && (!type || !TYPE_USER_ALIGN (type))
29224 && (!decl || !DECL_USER_ALIGN (decl)))
29225 {
29226 gcc_checking_assert (!TARGET_STV);
29227 return 32;
29228 }
29229
29230 return align;
29231 }
29232 \f
29233 /* Find a location for the static chain incoming to a nested function.
29234 This is a register, unless all free registers are used by arguments. */
29235
29236 static rtx
29237 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
29238 {
29239 unsigned regno;
29240
29241 if (TARGET_64BIT)
29242 {
29243 /* We always use R10 in 64-bit mode. */
29244 regno = R10_REG;
29245 }
29246 else
29247 {
29248 const_tree fntype, fndecl;
29249 unsigned int ccvt;
29250
29251 /* By default in 32-bit mode we use ECX to pass the static chain. */
29252 regno = CX_REG;
29253
29254 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
29255 {
29256 fntype = TREE_TYPE (fndecl_or_type);
29257 fndecl = fndecl_or_type;
29258 }
29259 else
29260 {
29261 fntype = fndecl_or_type;
29262 fndecl = NULL;
29263 }
29264
29265 ccvt = ix86_get_callcvt (fntype);
29266 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
29267 {
29268 /* Fastcall functions use ecx/edx for arguments, which leaves
29269 us with EAX for the static chain.
29270 Thiscall functions use ecx for arguments, which also
29271 leaves us with EAX for the static chain. */
29272 regno = AX_REG;
29273 }
29274 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
29275 {
29276 /* Thiscall functions use ecx for arguments, which leaves
29277 us with EAX and EDX for the static chain.
29278 We are using for abi-compatibility EAX. */
29279 regno = AX_REG;
29280 }
29281 else if (ix86_function_regparm (fntype, fndecl) == 3)
29282 {
29283 /* For regparm 3, we have no free call-clobbered registers in
29284 which to store the static chain. In order to implement this,
29285 we have the trampoline push the static chain to the stack.
29286 However, we can't push a value below the return address when
29287 we call the nested function directly, so we have to use an
29288 alternate entry point. For this we use ESI, and have the
29289 alternate entry point push ESI, so that things appear the
29290 same once we're executing the nested function. */
29291 if (incoming_p)
29292 {
29293 if (fndecl == current_function_decl
29294 && !ix86_static_chain_on_stack)
29295 {
29296 gcc_assert (!reload_completed);
29297 ix86_static_chain_on_stack = true;
29298 }
29299 return gen_frame_mem (SImode,
29300 plus_constant (Pmode,
29301 arg_pointer_rtx, -8));
29302 }
29303 regno = SI_REG;
29304 }
29305 }
29306
29307 return gen_rtx_REG (Pmode, regno);
29308 }
29309
29310 /* Emit RTL insns to initialize the variable parts of a trampoline.
29311 FNDECL is the decl of the target address; M_TRAMP is a MEM for
29312 the trampoline, and CHAIN_VALUE is an RTX for the static chain
29313 to be passed to the target function. */
29314
29315 static void
29316 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
29317 {
29318 rtx mem, fnaddr;
29319 int opcode;
29320 int offset = 0;
29321
29322 fnaddr = XEXP (DECL_RTL (fndecl), 0);
29323
29324 if (TARGET_64BIT)
29325 {
29326 int size;
29327
29328 /* Load the function address to r11. Try to load address using
29329 the shorter movl instead of movabs. We may want to support
29330 movq for kernel mode, but kernel does not use trampolines at
29331 the moment. FNADDR is a 32bit address and may not be in
29332 DImode when ptr_mode == SImode. Always use movl in this
29333 case. */
29334 if (ptr_mode == SImode
29335 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
29336 {
29337 fnaddr = copy_addr_to_reg (fnaddr);
29338
29339 mem = adjust_address (m_tramp, HImode, offset);
29340 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
29341
29342 mem = adjust_address (m_tramp, SImode, offset + 2);
29343 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
29344 offset += 6;
29345 }
29346 else
29347 {
29348 mem = adjust_address (m_tramp, HImode, offset);
29349 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
29350
29351 mem = adjust_address (m_tramp, DImode, offset + 2);
29352 emit_move_insn (mem, fnaddr);
29353 offset += 10;
29354 }
29355
29356 /* Load static chain using movabs to r10. Use the shorter movl
29357 instead of movabs when ptr_mode == SImode. */
29358 if (ptr_mode == SImode)
29359 {
29360 opcode = 0xba41;
29361 size = 6;
29362 }
29363 else
29364 {
29365 opcode = 0xba49;
29366 size = 10;
29367 }
29368
29369 mem = adjust_address (m_tramp, HImode, offset);
29370 emit_move_insn (mem, gen_int_mode (opcode, HImode));
29371
29372 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
29373 emit_move_insn (mem, chain_value);
29374 offset += size;
29375
29376 /* Jump to r11; the last (unused) byte is a nop, only there to
29377 pad the write out to a single 32-bit store. */
29378 mem = adjust_address (m_tramp, SImode, offset);
29379 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
29380 offset += 4;
29381 }
29382 else
29383 {
29384 rtx disp, chain;
29385
29386 /* Depending on the static chain location, either load a register
29387 with a constant, or push the constant to the stack. All of the
29388 instructions are the same size. */
29389 chain = ix86_static_chain (fndecl, true);
29390 if (REG_P (chain))
29391 {
29392 switch (REGNO (chain))
29393 {
29394 case AX_REG:
29395 opcode = 0xb8; break;
29396 case CX_REG:
29397 opcode = 0xb9; break;
29398 default:
29399 gcc_unreachable ();
29400 }
29401 }
29402 else
29403 opcode = 0x68;
29404
29405 mem = adjust_address (m_tramp, QImode, offset);
29406 emit_move_insn (mem, gen_int_mode (opcode, QImode));
29407
29408 mem = adjust_address (m_tramp, SImode, offset + 1);
29409 emit_move_insn (mem, chain_value);
29410 offset += 5;
29411
29412 mem = adjust_address (m_tramp, QImode, offset);
29413 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
29414
29415 mem = adjust_address (m_tramp, SImode, offset + 1);
29416
29417 /* Compute offset from the end of the jmp to the target function.
29418 In the case in which the trampoline stores the static chain on
29419 the stack, we need to skip the first insn which pushes the
29420 (call-saved) register static chain; this push is 1 byte. */
29421 offset += 5;
29422 disp = expand_binop (SImode, sub_optab, fnaddr,
29423 plus_constant (Pmode, XEXP (m_tramp, 0),
29424 offset - (MEM_P (chain) ? 1 : 0)),
29425 NULL_RTX, 1, OPTAB_DIRECT);
29426 emit_move_insn (mem, disp);
29427 }
29428
29429 gcc_assert (offset <= TRAMPOLINE_SIZE);
29430
29431 #ifdef HAVE_ENABLE_EXECUTE_STACK
29432 #ifdef CHECK_EXECUTE_STACK_ENABLED
29433 if (CHECK_EXECUTE_STACK_ENABLED)
29434 #endif
29435 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
29436 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
29437 #endif
29438 }
29439
29440 static bool
29441 ix86_allocate_stack_slots_for_args (void)
29442 {
29443 /* Naked functions should not allocate stack slots for arguments. */
29444 return !ix86_function_naked (current_function_decl);
29445 }
29446
29447 static bool
29448 ix86_warn_func_return (tree decl)
29449 {
29450 /* Naked functions are implemented entirely in assembly, including the
29451 return sequence, so suppress warnings about this. */
29452 return !ix86_function_naked (decl);
29453 }
29454 \f
29455 /* The following file contains several enumerations and data structures
29456 built from the definitions in i386-builtin-types.def. */
29457
29458 #include "i386-builtin-types.inc"
29459
29460 /* Table for the ix86 builtin non-function types. */
29461 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
29462
29463 /* Retrieve an element from the above table, building some of
29464 the types lazily. */
29465
29466 static tree
29467 ix86_get_builtin_type (enum ix86_builtin_type tcode)
29468 {
29469 unsigned int index;
29470 tree type, itype;
29471
29472 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
29473
29474 type = ix86_builtin_type_tab[(int) tcode];
29475 if (type != NULL)
29476 return type;
29477
29478 gcc_assert (tcode > IX86_BT_LAST_PRIM);
29479 if (tcode <= IX86_BT_LAST_VECT)
29480 {
29481 machine_mode mode;
29482
29483 index = tcode - IX86_BT_LAST_PRIM - 1;
29484 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
29485 mode = ix86_builtin_type_vect_mode[index];
29486
29487 type = build_vector_type_for_mode (itype, mode);
29488 }
29489 else
29490 {
29491 int quals;
29492
29493 index = tcode - IX86_BT_LAST_VECT - 1;
29494 if (tcode <= IX86_BT_LAST_PTR)
29495 quals = TYPE_UNQUALIFIED;
29496 else
29497 quals = TYPE_QUAL_CONST;
29498
29499 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
29500 if (quals != TYPE_UNQUALIFIED)
29501 itype = build_qualified_type (itype, quals);
29502
29503 type = build_pointer_type (itype);
29504 }
29505
29506 ix86_builtin_type_tab[(int) tcode] = type;
29507 return type;
29508 }
29509
29510 /* Table for the ix86 builtin function types. */
29511 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
29512
29513 /* Retrieve an element from the above table, building some of
29514 the types lazily. */
29515
29516 static tree
29517 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
29518 {
29519 tree type;
29520
29521 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
29522
29523 type = ix86_builtin_func_type_tab[(int) tcode];
29524 if (type != NULL)
29525 return type;
29526
29527 if (tcode <= IX86_BT_LAST_FUNC)
29528 {
29529 unsigned start = ix86_builtin_func_start[(int) tcode];
29530 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
29531 tree rtype, atype, args = void_list_node;
29532 unsigned i;
29533
29534 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
29535 for (i = after - 1; i > start; --i)
29536 {
29537 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
29538 args = tree_cons (NULL, atype, args);
29539 }
29540
29541 type = build_function_type (rtype, args);
29542 }
29543 else
29544 {
29545 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
29546 enum ix86_builtin_func_type icode;
29547
29548 icode = ix86_builtin_func_alias_base[index];
29549 type = ix86_get_builtin_func_type (icode);
29550 }
29551
29552 ix86_builtin_func_type_tab[(int) tcode] = type;
29553 return type;
29554 }
29555
29556
29557 /* Codes for all the SSE/MMX builtins. Builtins not mentioned in any
29558 bdesc_* arrays below should come first, then builtins for each bdesc_*
29559 array in ascending order, so that we can use direct array accesses. */
29560 enum ix86_builtins
29561 {
29562 IX86_BUILTIN_MASKMOVQ,
29563 IX86_BUILTIN_LDMXCSR,
29564 IX86_BUILTIN_STMXCSR,
29565 IX86_BUILTIN_MASKMOVDQU,
29566 IX86_BUILTIN_PSLLDQ128,
29567 IX86_BUILTIN_CLFLUSH,
29568 IX86_BUILTIN_MONITOR,
29569 IX86_BUILTIN_MWAIT,
29570 IX86_BUILTIN_CLZERO,
29571 IX86_BUILTIN_VEC_INIT_V2SI,
29572 IX86_BUILTIN_VEC_INIT_V4HI,
29573 IX86_BUILTIN_VEC_INIT_V8QI,
29574 IX86_BUILTIN_VEC_EXT_V2DF,
29575 IX86_BUILTIN_VEC_EXT_V2DI,
29576 IX86_BUILTIN_VEC_EXT_V4SF,
29577 IX86_BUILTIN_VEC_EXT_V4SI,
29578 IX86_BUILTIN_VEC_EXT_V8HI,
29579 IX86_BUILTIN_VEC_EXT_V2SI,
29580 IX86_BUILTIN_VEC_EXT_V4HI,
29581 IX86_BUILTIN_VEC_EXT_V16QI,
29582 IX86_BUILTIN_VEC_SET_V2DI,
29583 IX86_BUILTIN_VEC_SET_V4SF,
29584 IX86_BUILTIN_VEC_SET_V4SI,
29585 IX86_BUILTIN_VEC_SET_V8HI,
29586 IX86_BUILTIN_VEC_SET_V4HI,
29587 IX86_BUILTIN_VEC_SET_V16QI,
29588 IX86_BUILTIN_GATHERSIV2DF,
29589 IX86_BUILTIN_GATHERSIV4DF,
29590 IX86_BUILTIN_GATHERDIV2DF,
29591 IX86_BUILTIN_GATHERDIV4DF,
29592 IX86_BUILTIN_GATHERSIV4SF,
29593 IX86_BUILTIN_GATHERSIV8SF,
29594 IX86_BUILTIN_GATHERDIV4SF,
29595 IX86_BUILTIN_GATHERDIV8SF,
29596 IX86_BUILTIN_GATHERSIV2DI,
29597 IX86_BUILTIN_GATHERSIV4DI,
29598 IX86_BUILTIN_GATHERDIV2DI,
29599 IX86_BUILTIN_GATHERDIV4DI,
29600 IX86_BUILTIN_GATHERSIV4SI,
29601 IX86_BUILTIN_GATHERSIV8SI,
29602 IX86_BUILTIN_GATHERDIV4SI,
29603 IX86_BUILTIN_GATHERDIV8SI,
29604 IX86_BUILTIN_VFMSUBSD3_MASK3,
29605 IX86_BUILTIN_VFMSUBSS3_MASK3,
29606 IX86_BUILTIN_GATHER3SIV8SF,
29607 IX86_BUILTIN_GATHER3SIV4SF,
29608 IX86_BUILTIN_GATHER3SIV4DF,
29609 IX86_BUILTIN_GATHER3SIV2DF,
29610 IX86_BUILTIN_GATHER3DIV8SF,
29611 IX86_BUILTIN_GATHER3DIV4SF,
29612 IX86_BUILTIN_GATHER3DIV4DF,
29613 IX86_BUILTIN_GATHER3DIV2DF,
29614 IX86_BUILTIN_GATHER3SIV8SI,
29615 IX86_BUILTIN_GATHER3SIV4SI,
29616 IX86_BUILTIN_GATHER3SIV4DI,
29617 IX86_BUILTIN_GATHER3SIV2DI,
29618 IX86_BUILTIN_GATHER3DIV8SI,
29619 IX86_BUILTIN_GATHER3DIV4SI,
29620 IX86_BUILTIN_GATHER3DIV4DI,
29621 IX86_BUILTIN_GATHER3DIV2DI,
29622 IX86_BUILTIN_SCATTERSIV8SF,
29623 IX86_BUILTIN_SCATTERSIV4SF,
29624 IX86_BUILTIN_SCATTERSIV4DF,
29625 IX86_BUILTIN_SCATTERSIV2DF,
29626 IX86_BUILTIN_SCATTERDIV8SF,
29627 IX86_BUILTIN_SCATTERDIV4SF,
29628 IX86_BUILTIN_SCATTERDIV4DF,
29629 IX86_BUILTIN_SCATTERDIV2DF,
29630 IX86_BUILTIN_SCATTERSIV8SI,
29631 IX86_BUILTIN_SCATTERSIV4SI,
29632 IX86_BUILTIN_SCATTERSIV4DI,
29633 IX86_BUILTIN_SCATTERSIV2DI,
29634 IX86_BUILTIN_SCATTERDIV8SI,
29635 IX86_BUILTIN_SCATTERDIV4SI,
29636 IX86_BUILTIN_SCATTERDIV4DI,
29637 IX86_BUILTIN_SCATTERDIV2DI,
29638 /* Alternate 4 and 8 element gather/scatter for the vectorizer
29639 where all operands are 32-byte or 64-byte wide respectively. */
29640 IX86_BUILTIN_GATHERALTSIV4DF,
29641 IX86_BUILTIN_GATHERALTDIV8SF,
29642 IX86_BUILTIN_GATHERALTSIV4DI,
29643 IX86_BUILTIN_GATHERALTDIV8SI,
29644 IX86_BUILTIN_GATHER3ALTDIV16SF,
29645 IX86_BUILTIN_GATHER3ALTDIV16SI,
29646 IX86_BUILTIN_GATHER3ALTSIV4DF,
29647 IX86_BUILTIN_GATHER3ALTDIV8SF,
29648 IX86_BUILTIN_GATHER3ALTSIV4DI,
29649 IX86_BUILTIN_GATHER3ALTDIV8SI,
29650 IX86_BUILTIN_GATHER3ALTSIV8DF,
29651 IX86_BUILTIN_GATHER3ALTSIV8DI,
29652 IX86_BUILTIN_GATHER3DIV16SF,
29653 IX86_BUILTIN_GATHER3DIV16SI,
29654 IX86_BUILTIN_GATHER3DIV8DF,
29655 IX86_BUILTIN_GATHER3DIV8DI,
29656 IX86_BUILTIN_GATHER3SIV16SF,
29657 IX86_BUILTIN_GATHER3SIV16SI,
29658 IX86_BUILTIN_GATHER3SIV8DF,
29659 IX86_BUILTIN_GATHER3SIV8DI,
29660 IX86_BUILTIN_SCATTERALTSIV8DF,
29661 IX86_BUILTIN_SCATTERALTDIV16SF,
29662 IX86_BUILTIN_SCATTERALTSIV8DI,
29663 IX86_BUILTIN_SCATTERALTDIV16SI,
29664 IX86_BUILTIN_SCATTERDIV16SF,
29665 IX86_BUILTIN_SCATTERDIV16SI,
29666 IX86_BUILTIN_SCATTERDIV8DF,
29667 IX86_BUILTIN_SCATTERDIV8DI,
29668 IX86_BUILTIN_SCATTERSIV16SF,
29669 IX86_BUILTIN_SCATTERSIV16SI,
29670 IX86_BUILTIN_SCATTERSIV8DF,
29671 IX86_BUILTIN_SCATTERSIV8DI,
29672 IX86_BUILTIN_GATHERPFQPD,
29673 IX86_BUILTIN_GATHERPFDPS,
29674 IX86_BUILTIN_GATHERPFDPD,
29675 IX86_BUILTIN_GATHERPFQPS,
29676 IX86_BUILTIN_SCATTERPFDPD,
29677 IX86_BUILTIN_SCATTERPFDPS,
29678 IX86_BUILTIN_SCATTERPFQPD,
29679 IX86_BUILTIN_SCATTERPFQPS,
29680 IX86_BUILTIN_CLWB,
29681 IX86_BUILTIN_CLFLUSHOPT,
29682 IX86_BUILTIN_INFQ,
29683 IX86_BUILTIN_HUGE_VALQ,
29684 IX86_BUILTIN_NANQ,
29685 IX86_BUILTIN_NANSQ,
29686 IX86_BUILTIN_XABORT,
29687 IX86_BUILTIN_ADDCARRYX32,
29688 IX86_BUILTIN_ADDCARRYX64,
29689 IX86_BUILTIN_SBB32,
29690 IX86_BUILTIN_SBB64,
29691 IX86_BUILTIN_RDRAND16_STEP,
29692 IX86_BUILTIN_RDRAND32_STEP,
29693 IX86_BUILTIN_RDRAND64_STEP,
29694 IX86_BUILTIN_RDSEED16_STEP,
29695 IX86_BUILTIN_RDSEED32_STEP,
29696 IX86_BUILTIN_RDSEED64_STEP,
29697 IX86_BUILTIN_MONITORX,
29698 IX86_BUILTIN_MWAITX,
29699 IX86_BUILTIN_CFSTRING,
29700 IX86_BUILTIN_CPU_INIT,
29701 IX86_BUILTIN_CPU_IS,
29702 IX86_BUILTIN_CPU_SUPPORTS,
29703 IX86_BUILTIN_READ_FLAGS,
29704 IX86_BUILTIN_WRITE_FLAGS,
29705
29706 /* All the remaining builtins are tracked in bdesc_* arrays in
29707 i386-builtin.def. Don't add any IX86_BUILTIN_* enumerators after
29708 this point. */
29709 #define BDESC(mask, icode, name, code, comparison, flag) \
29710 code,
29711 #define BDESC_FIRST(kind, kindu, mask, icode, name, code, comparison, flag) \
29712 code, \
29713 IX86_BUILTIN__BDESC_##kindu##_FIRST = code,
29714 #define BDESC_END(kind, next_kind)
29715
29716 #include "i386-builtin.def"
29717
29718 #undef BDESC
29719 #undef BDESC_FIRST
29720 #undef BDESC_END
29721
29722 IX86_BUILTIN_MAX,
29723
29724 IX86_BUILTIN__BDESC_MAX_FIRST = IX86_BUILTIN_MAX,
29725
29726 /* Now just the aliases for bdesc_* start/end. */
29727 #define BDESC(mask, icode, name, code, comparison, flag)
29728 #define BDESC_FIRST(kind, kindu, mask, icode, name, code, comparison, flag)
29729 #define BDESC_END(kind, next_kind) \
29730 IX86_BUILTIN__BDESC_##kind##_LAST \
29731 = IX86_BUILTIN__BDESC_##next_kind##_FIRST - 1,
29732
29733 #include "i386-builtin.def"
29734
29735 #undef BDESC
29736 #undef BDESC_FIRST
29737 #undef BDESC_END
29738
29739 /* Just to make sure there is no comma after the last enumerator. */
29740 IX86_BUILTIN__BDESC_MAX_LAST = IX86_BUILTIN__BDESC_MAX_FIRST
29741 };
29742
29743 /* Table for the ix86 builtin decls. */
29744 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
29745
29746 /* Table of all of the builtin functions that are possible with different ISA's
29747 but are waiting to be built until a function is declared to use that
29748 ISA. */
29749 struct builtin_isa {
29750 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
29751 HOST_WIDE_INT isa2; /* additional isa_flags this builtin is defined for */
29752 const char *name; /* function name */
29753 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
29754 unsigned char const_p:1; /* true if the declaration is constant */
29755 unsigned char pure_p:1; /* true if the declaration has pure attribute */
29756 bool leaf_p; /* true if the declaration has leaf attribute */
29757 bool nothrow_p; /* true if the declaration has nothrow attribute */
29758 bool set_and_not_built_p;
29759 };
29760
29761 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
29762
29763 /* Bits that can still enable any inclusion of a builtin. */
29764 static HOST_WIDE_INT deferred_isa_values = 0;
29765 static HOST_WIDE_INT deferred_isa_values2 = 0;
29766
29767 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
29768 of which isa_flags to use in the ix86_builtins_isa array. Stores the
29769 function decl in the ix86_builtins array. Returns the function decl or
29770 NULL_TREE, if the builtin was not added.
29771
29772 If the front end has a special hook for builtin functions, delay adding
29773 builtin functions that aren't in the current ISA until the ISA is changed
29774 with function specific optimization. Doing so, can save about 300K for the
29775 default compiler. When the builtin is expanded, check at that time whether
29776 it is valid.
29777
29778 If the front end doesn't have a special hook, record all builtins, even if
29779 it isn't an instruction set in the current ISA in case the user uses
29780 function specific options for a different ISA, so that we don't get scope
29781 errors if a builtin is added in the middle of a function scope. */
29782
29783 static inline tree
29784 def_builtin (HOST_WIDE_INT mask, const char *name,
29785 enum ix86_builtin_func_type tcode,
29786 enum ix86_builtins code)
29787 {
29788 tree decl = NULL_TREE;
29789
29790 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
29791 {
29792 ix86_builtins_isa[(int) code].isa = mask;
29793
29794 /* OPTION_MASK_ISA_AVX512VL has special meaning. Despite of generic case,
29795 where any bit set means that built-in is enable, this bit must be *and-ed*
29796 with another one. E.g.: OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL
29797 means that *both* cpuid bits must be set for the built-in to be available.
29798 Handle this here. */
29799 if (mask & ix86_isa_flags & OPTION_MASK_ISA_AVX512VL)
29800 mask &= ~OPTION_MASK_ISA_AVX512VL;
29801
29802 mask &= ~OPTION_MASK_ISA_64BIT;
29803 if (mask == 0
29804 || (mask & ix86_isa_flags) != 0
29805 || (lang_hooks.builtin_function
29806 == lang_hooks.builtin_function_ext_scope))
29807
29808 {
29809 tree type = ix86_get_builtin_func_type (tcode);
29810 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
29811 NULL, NULL_TREE);
29812 ix86_builtins[(int) code] = decl;
29813 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
29814 }
29815 else
29816 {
29817 /* Just a MASK where set_and_not_built_p == true can potentially
29818 include a builtin. */
29819 deferred_isa_values |= mask;
29820 ix86_builtins[(int) code] = NULL_TREE;
29821 ix86_builtins_isa[(int) code].tcode = tcode;
29822 ix86_builtins_isa[(int) code].name = name;
29823 ix86_builtins_isa[(int) code].leaf_p = false;
29824 ix86_builtins_isa[(int) code].nothrow_p = false;
29825 ix86_builtins_isa[(int) code].const_p = false;
29826 ix86_builtins_isa[(int) code].pure_p = false;
29827 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
29828 }
29829 }
29830
29831 return decl;
29832 }
29833
29834 /* Like def_builtin, but also marks the function decl "const". */
29835
29836 static inline tree
29837 def_builtin_const (HOST_WIDE_INT mask, const char *name,
29838 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
29839 {
29840 tree decl = def_builtin (mask, name, tcode, code);
29841 if (decl)
29842 TREE_READONLY (decl) = 1;
29843 else
29844 ix86_builtins_isa[(int) code].const_p = true;
29845
29846 return decl;
29847 }
29848
29849 /* Like def_builtin, but also marks the function decl "pure". */
29850
29851 static inline tree
29852 def_builtin_pure (HOST_WIDE_INT mask, const char *name,
29853 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
29854 {
29855 tree decl = def_builtin (mask, name, tcode, code);
29856 if (decl)
29857 DECL_PURE_P (decl) = 1;
29858 else
29859 ix86_builtins_isa[(int) code].pure_p = true;
29860
29861 return decl;
29862 }
29863
29864 /* Like def_builtin, but for additional isa2 flags. */
29865
29866 static inline tree
29867 def_builtin2 (HOST_WIDE_INT mask, const char *name,
29868 enum ix86_builtin_func_type tcode,
29869 enum ix86_builtins code)
29870 {
29871 tree decl = NULL_TREE;
29872
29873 ix86_builtins_isa[(int) code].isa2 = mask;
29874
29875 if (mask == 0
29876 || (mask & ix86_isa_flags2) != 0
29877 || (lang_hooks.builtin_function
29878 == lang_hooks.builtin_function_ext_scope))
29879
29880 {
29881 tree type = ix86_get_builtin_func_type (tcode);
29882 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
29883 NULL, NULL_TREE);
29884 ix86_builtins[(int) code] = decl;
29885 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
29886 }
29887 else
29888 {
29889 /* Just a MASK where set_and_not_built_p == true can potentially
29890 include a builtin. */
29891 deferred_isa_values2 |= mask;
29892 ix86_builtins[(int) code] = NULL_TREE;
29893 ix86_builtins_isa[(int) code].tcode = tcode;
29894 ix86_builtins_isa[(int) code].name = name;
29895 ix86_builtins_isa[(int) code].leaf_p = false;
29896 ix86_builtins_isa[(int) code].nothrow_p = false;
29897 ix86_builtins_isa[(int) code].const_p = false;
29898 ix86_builtins_isa[(int) code].pure_p = false;
29899 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
29900 }
29901
29902 return decl;
29903 }
29904
29905 /* Like def_builtin, but also marks the function decl "const". */
29906
29907 static inline tree
29908 def_builtin_const2 (HOST_WIDE_INT mask, const char *name,
29909 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
29910 {
29911 tree decl = def_builtin2 (mask, name, tcode, code);
29912 if (decl)
29913 TREE_READONLY (decl) = 1;
29914 else
29915 ix86_builtins_isa[(int) code].const_p = true;
29916
29917 return decl;
29918 }
29919
29920 /* Like def_builtin, but also marks the function decl "pure". */
29921
29922 static inline tree
29923 def_builtin_pure2 (HOST_WIDE_INT mask, const char *name,
29924 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
29925 {
29926 tree decl = def_builtin2 (mask, name, tcode, code);
29927 if (decl)
29928 DECL_PURE_P (decl) = 1;
29929 else
29930 ix86_builtins_isa[(int) code].pure_p = true;
29931
29932 return decl;
29933 }
29934
29935 /* Add any new builtin functions for a given ISA that may not have been
29936 declared. This saves a bit of space compared to adding all of the
29937 declarations to the tree, even if we didn't use them. */
29938
29939 static void
29940 ix86_add_new_builtins (HOST_WIDE_INT isa, HOST_WIDE_INT isa2)
29941 {
29942 if ((isa & deferred_isa_values) == 0
29943 && (isa2 & deferred_isa_values2) == 0)
29944 return;
29945
29946 /* Bits in ISA value can be removed from potential isa values. */
29947 deferred_isa_values &= ~isa;
29948 deferred_isa_values2 &= ~isa2;
29949
29950 int i;
29951 tree saved_current_target_pragma = current_target_pragma;
29952 current_target_pragma = NULL_TREE;
29953
29954 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
29955 {
29956 if (((ix86_builtins_isa[i].isa & isa) != 0
29957 || (ix86_builtins_isa[i].isa2 & isa2) != 0)
29958 && ix86_builtins_isa[i].set_and_not_built_p)
29959 {
29960 tree decl, type;
29961
29962 /* Don't define the builtin again. */
29963 ix86_builtins_isa[i].set_and_not_built_p = false;
29964
29965 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
29966 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
29967 type, i, BUILT_IN_MD, NULL,
29968 NULL_TREE);
29969
29970 ix86_builtins[i] = decl;
29971 if (ix86_builtins_isa[i].const_p)
29972 TREE_READONLY (decl) = 1;
29973 if (ix86_builtins_isa[i].pure_p)
29974 DECL_PURE_P (decl) = 1;
29975 if (ix86_builtins_isa[i].leaf_p)
29976 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
29977 NULL_TREE);
29978 if (ix86_builtins_isa[i].nothrow_p)
29979 TREE_NOTHROW (decl) = 1;
29980 }
29981 }
29982
29983 current_target_pragma = saved_current_target_pragma;
29984 }
29985
29986 /* Bits for builtin_description.flag. */
29987
29988 /* Set when we don't support the comparison natively, and should
29989 swap_comparison in order to support it. */
29990 #define BUILTIN_DESC_SWAP_OPERANDS 1
29991
29992 struct builtin_description
29993 {
29994 const HOST_WIDE_INT mask;
29995 const enum insn_code icode;
29996 const char *const name;
29997 const enum ix86_builtins code;
29998 const enum rtx_code comparison;
29999 const int flag;
30000 };
30001
30002 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
30003 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
30004 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
30005 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
30006 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
30007 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
30008 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
30009 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
30010 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
30011 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
30012 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
30013 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
30014 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
30015 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
30016 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
30017 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
30018 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
30019 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
30020 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
30021 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
30022 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
30023 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
30024 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
30025 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
30026 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
30027 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
30028 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
30029 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
30030 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
30031 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
30032 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
30033 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
30034 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
30035 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
30036 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
30037 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
30038 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
30039 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
30040 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
30041 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
30042 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
30043 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
30044 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
30045 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
30046 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
30047 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
30048 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
30049 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
30050 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
30051 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
30052 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
30053 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
30054
30055 #define BDESC(mask, icode, name, code, comparison, flag) \
30056 { mask, icode, name, code, comparison, flag },
30057 #define BDESC_FIRST(kind, kindu, mask, icode, name, code, comparison, flag) \
30058 static const struct builtin_description bdesc_##kind[] = \
30059 { \
30060 BDESC (mask, icode, name, code, comparison, flag)
30061 #define BDESC_END(kind, next_kind) \
30062 };
30063
30064 #include "i386-builtin.def"
30065
30066 #undef BDESC
30067 #undef BDESC_FIRST
30068 #undef BDESC_END
30069 \f
30070 /* TM vector builtins. */
30071
30072 /* Reuse the existing x86-specific `struct builtin_description' cause
30073 we're lazy. Add casts to make them fit. */
30074 static const struct builtin_description bdesc_tm[] =
30075 {
30076 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30077 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30078 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30079 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30080 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30081 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30082 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30083
30084 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30085 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30086 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30087 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30088 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30089 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30090 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30091
30092 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30093 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30094 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30095 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30096 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30097 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30098 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30099
30100 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
30101 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
30102 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
30103 };
30104
30105 /* Initialize the transactional memory vector load/store builtins. */
30106
30107 static void
30108 ix86_init_tm_builtins (void)
30109 {
30110 enum ix86_builtin_func_type ftype;
30111 const struct builtin_description *d;
30112 size_t i;
30113 tree decl;
30114 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
30115 tree attrs_log, attrs_type_log;
30116
30117 if (!flag_tm)
30118 return;
30119
30120 /* If there are no builtins defined, we must be compiling in a
30121 language without trans-mem support. */
30122 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
30123 return;
30124
30125 /* Use whatever attributes a normal TM load has. */
30126 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
30127 attrs_load = DECL_ATTRIBUTES (decl);
30128 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30129 /* Use whatever attributes a normal TM store has. */
30130 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
30131 attrs_store = DECL_ATTRIBUTES (decl);
30132 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30133 /* Use whatever attributes a normal TM log has. */
30134 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
30135 attrs_log = DECL_ATTRIBUTES (decl);
30136 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30137
30138 for (i = 0, d = bdesc_tm;
30139 i < ARRAY_SIZE (bdesc_tm);
30140 i++, d++)
30141 {
30142 if ((d->mask & ix86_isa_flags) != 0
30143 || (lang_hooks.builtin_function
30144 == lang_hooks.builtin_function_ext_scope))
30145 {
30146 tree type, attrs, attrs_type;
30147 enum built_in_function code = (enum built_in_function) d->code;
30148
30149 ftype = (enum ix86_builtin_func_type) d->flag;
30150 type = ix86_get_builtin_func_type (ftype);
30151
30152 if (BUILTIN_TM_LOAD_P (code))
30153 {
30154 attrs = attrs_load;
30155 attrs_type = attrs_type_load;
30156 }
30157 else if (BUILTIN_TM_STORE_P (code))
30158 {
30159 attrs = attrs_store;
30160 attrs_type = attrs_type_store;
30161 }
30162 else
30163 {
30164 attrs = attrs_log;
30165 attrs_type = attrs_type_log;
30166 }
30167 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
30168 /* The builtin without the prefix for
30169 calling it directly. */
30170 d->name + strlen ("__builtin_"),
30171 attrs);
30172 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
30173 set the TYPE_ATTRIBUTES. */
30174 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
30175
30176 set_builtin_decl (code, decl, false);
30177 }
30178 }
30179 }
30180
30181 /* Macros for verification of enum ix86_builtins order. */
30182 #define BDESC_VERIFY(x, y, z) \
30183 gcc_checking_assert ((x) == (enum ix86_builtins) ((y) + (z)))
30184 #define BDESC_VERIFYS(x, y, z) \
30185 STATIC_ASSERT ((x) == (enum ix86_builtins) ((y) + (z)))
30186
30187 BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPESTR_FIRST,
30188 IX86_BUILTIN__BDESC_COMI_LAST, 1);
30189 BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPISTR_FIRST,
30190 IX86_BUILTIN__BDESC_PCMPESTR_LAST, 1);
30191 BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST,
30192 IX86_BUILTIN__BDESC_PCMPISTR_LAST, 1);
30193 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS_FIRST,
30194 IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST, 1);
30195 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST,
30196 IX86_BUILTIN__BDESC_ARGS_LAST, 1);
30197 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS2_FIRST,
30198 IX86_BUILTIN__BDESC_ROUND_ARGS_LAST, 1);
30199 BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS2_FIRST,
30200 IX86_BUILTIN__BDESC_ARGS2_LAST, 1);
30201 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MPX_FIRST,
30202 IX86_BUILTIN__BDESC_SPECIAL_ARGS2_LAST, 1);
30203 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MPX_CONST_FIRST,
30204 IX86_BUILTIN__BDESC_MPX_LAST, 1);
30205 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MULTI_ARG_FIRST,
30206 IX86_BUILTIN__BDESC_MPX_CONST_LAST, 1);
30207 BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_FIRST,
30208 IX86_BUILTIN__BDESC_MULTI_ARG_LAST, 1);
30209 BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_NORMAL_FIRST,
30210 IX86_BUILTIN__BDESC_CET_LAST, 1);
30211 BDESC_VERIFYS (IX86_BUILTIN_MAX,
30212 IX86_BUILTIN__BDESC_CET_NORMAL_LAST, 1);
30213
30214 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
30215 in the current target ISA to allow the user to compile particular modules
30216 with different target specific options that differ from the command line
30217 options. */
30218 static void
30219 ix86_init_mmx_sse_builtins (void)
30220 {
30221 const struct builtin_description * d;
30222 enum ix86_builtin_func_type ftype;
30223 size_t i;
30224
30225 /* Add all special builtins with variable number of operands. */
30226 for (i = 0, d = bdesc_special_args;
30227 i < ARRAY_SIZE (bdesc_special_args);
30228 i++, d++)
30229 {
30230 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST, i);
30231 if (d->name == 0)
30232 continue;
30233
30234 ftype = (enum ix86_builtin_func_type) d->flag;
30235 def_builtin (d->mask, d->name, ftype, d->code);
30236 }
30237 BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST,
30238 IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST,
30239 ARRAY_SIZE (bdesc_special_args) - 1);
30240
30241 /* Add all builtins with variable number of operands. */
30242 for (i = 0, d = bdesc_args;
30243 i < ARRAY_SIZE (bdesc_args);
30244 i++, d++)
30245 {
30246 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_ARGS_FIRST, i);
30247 if (d->name == 0)
30248 continue;
30249
30250 ftype = (enum ix86_builtin_func_type) d->flag;
30251 def_builtin_const (d->mask, d->name, ftype, d->code);
30252 }
30253 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS_LAST,
30254 IX86_BUILTIN__BDESC_ARGS_FIRST,
30255 ARRAY_SIZE (bdesc_args) - 1);
30256
30257 /* Add all builtins with variable number of operands. */
30258 for (i = 0, d = bdesc_args2;
30259 i < ARRAY_SIZE (bdesc_args2);
30260 i++, d++)
30261 {
30262 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_ARGS2_FIRST, i);
30263 if (d->name == 0)
30264 continue;
30265
30266 ftype = (enum ix86_builtin_func_type) d->flag;
30267 def_builtin_const2 (d->mask, d->name, ftype, d->code);
30268 }
30269 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS2_LAST,
30270 IX86_BUILTIN__BDESC_ARGS2_FIRST,
30271 ARRAY_SIZE (bdesc_args2) - 1);
30272
30273 for (i = 0, d = bdesc_special_args2;
30274 i < ARRAY_SIZE (bdesc_special_args2);
30275 i++, d++)
30276 {
30277 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_SPECIAL_ARGS2_FIRST, i);
30278 if (d->name == 0)
30279 continue;
30280
30281 ftype = (enum ix86_builtin_func_type) d->flag;
30282 def_builtin2 (d->mask, d->name, ftype, d->code);
30283 }
30284 BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS2_LAST,
30285 IX86_BUILTIN__BDESC_SPECIAL_ARGS2_FIRST,
30286 ARRAY_SIZE (bdesc_special_args2) - 1);
30287
30288 /* Add all builtins with rounding. */
30289 for (i = 0, d = bdesc_round_args;
30290 i < ARRAY_SIZE (bdesc_round_args);
30291 i++, d++)
30292 {
30293 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST, i);
30294 if (d->name == 0)
30295 continue;
30296
30297 ftype = (enum ix86_builtin_func_type) d->flag;
30298 def_builtin_const (d->mask, d->name, ftype, d->code);
30299 }
30300 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ROUND_ARGS_LAST,
30301 IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST,
30302 ARRAY_SIZE (bdesc_round_args) - 1);
30303
30304 /* pcmpestr[im] insns. */
30305 for (i = 0, d = bdesc_pcmpestr;
30306 i < ARRAY_SIZE (bdesc_pcmpestr);
30307 i++, d++)
30308 {
30309 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_PCMPESTR_FIRST, i);
30310 if (d->code == IX86_BUILTIN_PCMPESTRM128)
30311 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
30312 else
30313 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
30314 def_builtin_const (d->mask, d->name, ftype, d->code);
30315 }
30316 BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPESTR_LAST,
30317 IX86_BUILTIN__BDESC_PCMPESTR_FIRST,
30318 ARRAY_SIZE (bdesc_pcmpestr) - 1);
30319
30320 /* pcmpistr[im] insns. */
30321 for (i = 0, d = bdesc_pcmpistr;
30322 i < ARRAY_SIZE (bdesc_pcmpistr);
30323 i++, d++)
30324 {
30325 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_PCMPISTR_FIRST, i);
30326 if (d->code == IX86_BUILTIN_PCMPISTRM128)
30327 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
30328 else
30329 ftype = INT_FTYPE_V16QI_V16QI_INT;
30330 def_builtin_const (d->mask, d->name, ftype, d->code);
30331 }
30332 BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPISTR_LAST,
30333 IX86_BUILTIN__BDESC_PCMPISTR_FIRST,
30334 ARRAY_SIZE (bdesc_pcmpistr) - 1);
30335
30336 /* comi/ucomi insns. */
30337 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
30338 {
30339 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_COMI_FIRST, i);
30340 if (d->mask == OPTION_MASK_ISA_SSE2)
30341 ftype = INT_FTYPE_V2DF_V2DF;
30342 else
30343 ftype = INT_FTYPE_V4SF_V4SF;
30344 def_builtin_const (d->mask, d->name, ftype, d->code);
30345 }
30346 BDESC_VERIFYS (IX86_BUILTIN__BDESC_COMI_LAST,
30347 IX86_BUILTIN__BDESC_COMI_FIRST,
30348 ARRAY_SIZE (bdesc_comi) - 1);
30349
30350 /* SSE */
30351 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
30352 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
30353 def_builtin_pure (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
30354 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
30355
30356 /* SSE or 3DNow!A */
30357 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A
30358 /* As it uses V4HImode, we have to require -mmmx too. */
30359 | OPTION_MASK_ISA_MMX,
30360 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
30361 IX86_BUILTIN_MASKMOVQ);
30362
30363 /* SSE2 */
30364 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
30365 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
30366
30367 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
30368 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
30369 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
30370 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
30371
30372 /* SSE3. */
30373 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
30374 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
30375 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
30376 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
30377
30378 /* AES */
30379 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
30380 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
30381 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
30382 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
30383 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
30384 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
30385 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
30386 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
30387 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
30388 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
30389 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
30390 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
30391
30392 /* PCLMUL */
30393 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
30394 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
30395
30396 /* RDRND */
30397 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
30398 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
30399 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
30400 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
30401 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
30402 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
30403 IX86_BUILTIN_RDRAND64_STEP);
30404
30405 /* AVX2 */
30406 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
30407 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
30408 IX86_BUILTIN_GATHERSIV2DF);
30409
30410 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
30411 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
30412 IX86_BUILTIN_GATHERSIV4DF);
30413
30414 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
30415 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
30416 IX86_BUILTIN_GATHERDIV2DF);
30417
30418 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
30419 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
30420 IX86_BUILTIN_GATHERDIV4DF);
30421
30422 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
30423 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
30424 IX86_BUILTIN_GATHERSIV4SF);
30425
30426 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
30427 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
30428 IX86_BUILTIN_GATHERSIV8SF);
30429
30430 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
30431 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
30432 IX86_BUILTIN_GATHERDIV4SF);
30433
30434 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
30435 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
30436 IX86_BUILTIN_GATHERDIV8SF);
30437
30438 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
30439 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
30440 IX86_BUILTIN_GATHERSIV2DI);
30441
30442 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
30443 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
30444 IX86_BUILTIN_GATHERSIV4DI);
30445
30446 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
30447 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
30448 IX86_BUILTIN_GATHERDIV2DI);
30449
30450 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
30451 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
30452 IX86_BUILTIN_GATHERDIV4DI);
30453
30454 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
30455 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
30456 IX86_BUILTIN_GATHERSIV4SI);
30457
30458 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
30459 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
30460 IX86_BUILTIN_GATHERSIV8SI);
30461
30462 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
30463 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
30464 IX86_BUILTIN_GATHERDIV4SI);
30465
30466 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
30467 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
30468 IX86_BUILTIN_GATHERDIV8SI);
30469
30470 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
30471 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
30472 IX86_BUILTIN_GATHERALTSIV4DF);
30473
30474 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
30475 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
30476 IX86_BUILTIN_GATHERALTDIV8SF);
30477
30478 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
30479 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
30480 IX86_BUILTIN_GATHERALTSIV4DI);
30481
30482 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
30483 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
30484 IX86_BUILTIN_GATHERALTDIV8SI);
30485
30486 /* AVX512F */
30487 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
30488 V16SF_FTYPE_V16SF_PCVOID_V16SI_HI_INT,
30489 IX86_BUILTIN_GATHER3SIV16SF);
30490
30491 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
30492 V8DF_FTYPE_V8DF_PCVOID_V8SI_QI_INT,
30493 IX86_BUILTIN_GATHER3SIV8DF);
30494
30495 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
30496 V8SF_FTYPE_V8SF_PCVOID_V8DI_QI_INT,
30497 IX86_BUILTIN_GATHER3DIV16SF);
30498
30499 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
30500 V8DF_FTYPE_V8DF_PCVOID_V8DI_QI_INT,
30501 IX86_BUILTIN_GATHER3DIV8DF);
30502
30503 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
30504 V16SI_FTYPE_V16SI_PCVOID_V16SI_HI_INT,
30505 IX86_BUILTIN_GATHER3SIV16SI);
30506
30507 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
30508 V8DI_FTYPE_V8DI_PCVOID_V8SI_QI_INT,
30509 IX86_BUILTIN_GATHER3SIV8DI);
30510
30511 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
30512 V8SI_FTYPE_V8SI_PCVOID_V8DI_QI_INT,
30513 IX86_BUILTIN_GATHER3DIV16SI);
30514
30515 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
30516 V8DI_FTYPE_V8DI_PCVOID_V8DI_QI_INT,
30517 IX86_BUILTIN_GATHER3DIV8DI);
30518
30519 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
30520 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
30521 IX86_BUILTIN_GATHER3ALTSIV8DF);
30522
30523 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
30524 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
30525 IX86_BUILTIN_GATHER3ALTDIV16SF);
30526
30527 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
30528 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
30529 IX86_BUILTIN_GATHER3ALTSIV8DI);
30530
30531 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
30532 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
30533 IX86_BUILTIN_GATHER3ALTDIV16SI);
30534
30535 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
30536 VOID_FTYPE_PVOID_HI_V16SI_V16SF_INT,
30537 IX86_BUILTIN_SCATTERSIV16SF);
30538
30539 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
30540 VOID_FTYPE_PVOID_QI_V8SI_V8DF_INT,
30541 IX86_BUILTIN_SCATTERSIV8DF);
30542
30543 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
30544 VOID_FTYPE_PVOID_QI_V8DI_V8SF_INT,
30545 IX86_BUILTIN_SCATTERDIV16SF);
30546
30547 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
30548 VOID_FTYPE_PVOID_QI_V8DI_V8DF_INT,
30549 IX86_BUILTIN_SCATTERDIV8DF);
30550
30551 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
30552 VOID_FTYPE_PVOID_HI_V16SI_V16SI_INT,
30553 IX86_BUILTIN_SCATTERSIV16SI);
30554
30555 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
30556 VOID_FTYPE_PVOID_QI_V8SI_V8DI_INT,
30557 IX86_BUILTIN_SCATTERSIV8DI);
30558
30559 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
30560 VOID_FTYPE_PVOID_QI_V8DI_V8SI_INT,
30561 IX86_BUILTIN_SCATTERDIV16SI);
30562
30563 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
30564 VOID_FTYPE_PVOID_QI_V8DI_V8DI_INT,
30565 IX86_BUILTIN_SCATTERDIV8DI);
30566
30567 /* AVX512VL */
30568 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
30569 V2DF_FTYPE_V2DF_PCVOID_V4SI_QI_INT,
30570 IX86_BUILTIN_GATHER3SIV2DF);
30571
30572 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
30573 V4DF_FTYPE_V4DF_PCVOID_V4SI_QI_INT,
30574 IX86_BUILTIN_GATHER3SIV4DF);
30575
30576 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
30577 V2DF_FTYPE_V2DF_PCVOID_V2DI_QI_INT,
30578 IX86_BUILTIN_GATHER3DIV2DF);
30579
30580 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
30581 V4DF_FTYPE_V4DF_PCVOID_V4DI_QI_INT,
30582 IX86_BUILTIN_GATHER3DIV4DF);
30583
30584 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
30585 V4SF_FTYPE_V4SF_PCVOID_V4SI_QI_INT,
30586 IX86_BUILTIN_GATHER3SIV4SF);
30587
30588 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
30589 V8SF_FTYPE_V8SF_PCVOID_V8SI_QI_INT,
30590 IX86_BUILTIN_GATHER3SIV8SF);
30591
30592 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
30593 V4SF_FTYPE_V4SF_PCVOID_V2DI_QI_INT,
30594 IX86_BUILTIN_GATHER3DIV4SF);
30595
30596 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
30597 V4SF_FTYPE_V4SF_PCVOID_V4DI_QI_INT,
30598 IX86_BUILTIN_GATHER3DIV8SF);
30599
30600 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
30601 V2DI_FTYPE_V2DI_PCVOID_V4SI_QI_INT,
30602 IX86_BUILTIN_GATHER3SIV2DI);
30603
30604 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
30605 V4DI_FTYPE_V4DI_PCVOID_V4SI_QI_INT,
30606 IX86_BUILTIN_GATHER3SIV4DI);
30607
30608 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
30609 V2DI_FTYPE_V2DI_PCVOID_V2DI_QI_INT,
30610 IX86_BUILTIN_GATHER3DIV2DI);
30611
30612 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
30613 V4DI_FTYPE_V4DI_PCVOID_V4DI_QI_INT,
30614 IX86_BUILTIN_GATHER3DIV4DI);
30615
30616 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
30617 V4SI_FTYPE_V4SI_PCVOID_V4SI_QI_INT,
30618 IX86_BUILTIN_GATHER3SIV4SI);
30619
30620 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
30621 V8SI_FTYPE_V8SI_PCVOID_V8SI_QI_INT,
30622 IX86_BUILTIN_GATHER3SIV8SI);
30623
30624 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
30625 V4SI_FTYPE_V4SI_PCVOID_V2DI_QI_INT,
30626 IX86_BUILTIN_GATHER3DIV4SI);
30627
30628 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
30629 V4SI_FTYPE_V4SI_PCVOID_V4DI_QI_INT,
30630 IX86_BUILTIN_GATHER3DIV8SI);
30631
30632 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
30633 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
30634 IX86_BUILTIN_GATHER3ALTSIV4DF);
30635
30636 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
30637 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
30638 IX86_BUILTIN_GATHER3ALTDIV8SF);
30639
30640 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
30641 V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
30642 IX86_BUILTIN_GATHER3ALTSIV4DI);
30643
30644 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
30645 V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
30646 IX86_BUILTIN_GATHER3ALTDIV8SI);
30647
30648 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
30649 VOID_FTYPE_PVOID_QI_V8SI_V8SF_INT,
30650 IX86_BUILTIN_SCATTERSIV8SF);
30651
30652 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
30653 VOID_FTYPE_PVOID_QI_V4SI_V4SF_INT,
30654 IX86_BUILTIN_SCATTERSIV4SF);
30655
30656 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
30657 VOID_FTYPE_PVOID_QI_V4SI_V4DF_INT,
30658 IX86_BUILTIN_SCATTERSIV4DF);
30659
30660 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
30661 VOID_FTYPE_PVOID_QI_V4SI_V2DF_INT,
30662 IX86_BUILTIN_SCATTERSIV2DF);
30663
30664 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
30665 VOID_FTYPE_PVOID_QI_V4DI_V4SF_INT,
30666 IX86_BUILTIN_SCATTERDIV8SF);
30667
30668 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
30669 VOID_FTYPE_PVOID_QI_V2DI_V4SF_INT,
30670 IX86_BUILTIN_SCATTERDIV4SF);
30671
30672 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
30673 VOID_FTYPE_PVOID_QI_V4DI_V4DF_INT,
30674 IX86_BUILTIN_SCATTERDIV4DF);
30675
30676 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
30677 VOID_FTYPE_PVOID_QI_V2DI_V2DF_INT,
30678 IX86_BUILTIN_SCATTERDIV2DF);
30679
30680 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
30681 VOID_FTYPE_PVOID_QI_V8SI_V8SI_INT,
30682 IX86_BUILTIN_SCATTERSIV8SI);
30683
30684 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
30685 VOID_FTYPE_PVOID_QI_V4SI_V4SI_INT,
30686 IX86_BUILTIN_SCATTERSIV4SI);
30687
30688 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
30689 VOID_FTYPE_PVOID_QI_V4SI_V4DI_INT,
30690 IX86_BUILTIN_SCATTERSIV4DI);
30691
30692 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
30693 VOID_FTYPE_PVOID_QI_V4SI_V2DI_INT,
30694 IX86_BUILTIN_SCATTERSIV2DI);
30695
30696 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
30697 VOID_FTYPE_PVOID_QI_V4DI_V4SI_INT,
30698 IX86_BUILTIN_SCATTERDIV8SI);
30699
30700 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
30701 VOID_FTYPE_PVOID_QI_V2DI_V4SI_INT,
30702 IX86_BUILTIN_SCATTERDIV4SI);
30703
30704 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
30705 VOID_FTYPE_PVOID_QI_V4DI_V4DI_INT,
30706 IX86_BUILTIN_SCATTERDIV4DI);
30707
30708 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
30709 VOID_FTYPE_PVOID_QI_V2DI_V2DI_INT,
30710 IX86_BUILTIN_SCATTERDIV2DI);
30711 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltsiv8df ",
30712 VOID_FTYPE_PDOUBLE_QI_V16SI_V8DF_INT,
30713 IX86_BUILTIN_SCATTERALTSIV8DF);
30714
30715 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv8sf ",
30716 VOID_FTYPE_PFLOAT_HI_V8DI_V16SF_INT,
30717 IX86_BUILTIN_SCATTERALTDIV16SF);
30718
30719 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltsiv8di ",
30720 VOID_FTYPE_PLONGLONG_QI_V16SI_V8DI_INT,
30721 IX86_BUILTIN_SCATTERALTSIV8DI);
30722
30723 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv8si ",
30724 VOID_FTYPE_PINT_HI_V8DI_V16SI_INT,
30725 IX86_BUILTIN_SCATTERALTDIV16SI);
30726
30727 /* AVX512PF */
30728 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
30729 VOID_FTYPE_QI_V8SI_PCVOID_INT_INT,
30730 IX86_BUILTIN_GATHERPFDPD);
30731 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
30732 VOID_FTYPE_HI_V16SI_PCVOID_INT_INT,
30733 IX86_BUILTIN_GATHERPFDPS);
30734 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
30735 VOID_FTYPE_QI_V8DI_PCVOID_INT_INT,
30736 IX86_BUILTIN_GATHERPFQPD);
30737 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
30738 VOID_FTYPE_QI_V8DI_PCVOID_INT_INT,
30739 IX86_BUILTIN_GATHERPFQPS);
30740 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
30741 VOID_FTYPE_QI_V8SI_PCVOID_INT_INT,
30742 IX86_BUILTIN_SCATTERPFDPD);
30743 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
30744 VOID_FTYPE_HI_V16SI_PCVOID_INT_INT,
30745 IX86_BUILTIN_SCATTERPFDPS);
30746 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
30747 VOID_FTYPE_QI_V8DI_PCVOID_INT_INT,
30748 IX86_BUILTIN_SCATTERPFQPD);
30749 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
30750 VOID_FTYPE_QI_V8DI_PCVOID_INT_INT,
30751 IX86_BUILTIN_SCATTERPFQPS);
30752
30753 /* SHA */
30754 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
30755 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
30756 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
30757 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
30758 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
30759 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
30760 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
30761 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
30762 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
30763 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
30764 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
30765 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
30766 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
30767 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
30768
30769 /* RTM. */
30770 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
30771 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
30772
30773 /* MMX access to the vec_init patterns. */
30774 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
30775 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
30776
30777 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
30778 V4HI_FTYPE_HI_HI_HI_HI,
30779 IX86_BUILTIN_VEC_INIT_V4HI);
30780
30781 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
30782 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
30783 IX86_BUILTIN_VEC_INIT_V8QI);
30784
30785 /* Access to the vec_extract patterns. */
30786 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
30787 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
30788 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
30789 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
30790 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
30791 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
30792 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
30793 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
30794 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
30795 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
30796
30797 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A
30798 /* As it uses V4HImode, we have to require -mmmx too. */
30799 | OPTION_MASK_ISA_MMX,
30800 "__builtin_ia32_vec_ext_v4hi",
30801 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
30802
30803 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
30804 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
30805
30806 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
30807 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
30808
30809 /* Access to the vec_set patterns. */
30810 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
30811 "__builtin_ia32_vec_set_v2di",
30812 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
30813
30814 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
30815 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
30816
30817 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
30818 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
30819
30820 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
30821 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
30822
30823 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A
30824 /* As it uses V4HImode, we have to require -mmmx too. */
30825 | OPTION_MASK_ISA_MMX,
30826 "__builtin_ia32_vec_set_v4hi",
30827 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
30828
30829 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
30830 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
30831
30832 /* RDSEED */
30833 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
30834 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
30835 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
30836 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
30837 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
30838 "__builtin_ia32_rdseed_di_step",
30839 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
30840
30841 /* ADCX */
30842 def_builtin (0, "__builtin_ia32_addcarryx_u32",
30843 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
30844 def_builtin (OPTION_MASK_ISA_64BIT,
30845 "__builtin_ia32_addcarryx_u64",
30846 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
30847 IX86_BUILTIN_ADDCARRYX64);
30848
30849 /* SBB */
30850 def_builtin (0, "__builtin_ia32_sbb_u32",
30851 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
30852 def_builtin (OPTION_MASK_ISA_64BIT,
30853 "__builtin_ia32_sbb_u64",
30854 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
30855 IX86_BUILTIN_SBB64);
30856
30857 /* Read/write FLAGS. */
30858 def_builtin (0, "__builtin_ia32_readeflags_u32",
30859 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
30860 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
30861 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
30862 def_builtin (0, "__builtin_ia32_writeeflags_u32",
30863 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
30864 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
30865 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
30866
30867 /* CLFLUSHOPT. */
30868 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
30869 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
30870
30871 /* CLWB. */
30872 def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb",
30873 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB);
30874
30875 /* MONITORX and MWAITX. */
30876 def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_monitorx",
30877 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITORX);
30878 def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_mwaitx",
30879 VOID_FTYPE_UNSIGNED_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAITX);
30880
30881 /* CLZERO. */
30882 def_builtin (OPTION_MASK_ISA_CLZERO, "__builtin_ia32_clzero",
30883 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLZERO);
30884
30885 /* Add FMA4 multi-arg argument instructions */
30886 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
30887 {
30888 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_MULTI_ARG_FIRST, i);
30889 if (d->name == 0)
30890 continue;
30891
30892 ftype = (enum ix86_builtin_func_type) d->flag;
30893 def_builtin_const (d->mask, d->name, ftype, d->code);
30894 }
30895 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MULTI_ARG_LAST,
30896 IX86_BUILTIN__BDESC_MULTI_ARG_FIRST,
30897 ARRAY_SIZE (bdesc_multi_arg) - 1);
30898
30899 /* Add CET inrinsics. */
30900 for (i = 0, d = bdesc_cet; i < ARRAY_SIZE (bdesc_cet); i++, d++)
30901 {
30902 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_CET_FIRST, i);
30903 if (d->name == 0)
30904 continue;
30905
30906 ftype = (enum ix86_builtin_func_type) d->flag;
30907 def_builtin2 (d->mask, d->name, ftype, d->code);
30908 }
30909 BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_LAST,
30910 IX86_BUILTIN__BDESC_CET_FIRST,
30911 ARRAY_SIZE (bdesc_cet) - 1);
30912
30913 for (i = 0, d = bdesc_cet_rdssp;
30914 i < ARRAY_SIZE (bdesc_cet_rdssp);
30915 i++, d++)
30916 {
30917 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_CET_NORMAL_FIRST, i);
30918 if (d->name == 0)
30919 continue;
30920
30921 ftype = (enum ix86_builtin_func_type) d->flag;
30922 def_builtin2 (d->mask, d->name, ftype, d->code);
30923 }
30924 BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_NORMAL_LAST,
30925 IX86_BUILTIN__BDESC_CET_NORMAL_FIRST,
30926 ARRAY_SIZE (bdesc_cet_rdssp) - 1);
30927 }
30928
30929 static void
30930 ix86_init_mpx_builtins ()
30931 {
30932 const struct builtin_description * d;
30933 enum ix86_builtin_func_type ftype;
30934 tree decl;
30935 size_t i;
30936
30937 for (i = 0, d = bdesc_mpx;
30938 i < ARRAY_SIZE (bdesc_mpx);
30939 i++, d++)
30940 {
30941 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_MPX_FIRST, i);
30942 if (d->name == 0)
30943 continue;
30944
30945 ftype = (enum ix86_builtin_func_type) d->flag;
30946 decl = def_builtin2 (d->mask, d->name, ftype, d->code);
30947
30948 /* With no leaf and nothrow flags for MPX builtins
30949 abnormal edges may follow its call when setjmp
30950 presents in the function. Since we may have a lot
30951 of MPX builtins calls it causes lots of useless
30952 edges and enormous PHI nodes. To avoid this we mark
30953 MPX builtins as leaf and nothrow. */
30954 if (decl)
30955 {
30956 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
30957 NULL_TREE);
30958 TREE_NOTHROW (decl) = 1;
30959 }
30960 else
30961 {
30962 ix86_builtins_isa[(int)d->code].leaf_p = true;
30963 ix86_builtins_isa[(int)d->code].nothrow_p = true;
30964 }
30965 }
30966 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MPX_LAST,
30967 IX86_BUILTIN__BDESC_MPX_FIRST,
30968 ARRAY_SIZE (bdesc_mpx) - 1);
30969
30970 for (i = 0, d = bdesc_mpx_const;
30971 i < ARRAY_SIZE (bdesc_mpx_const);
30972 i++, d++)
30973 {
30974 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_MPX_CONST_FIRST, i);
30975 if (d->name == 0)
30976 continue;
30977
30978 ftype = (enum ix86_builtin_func_type) d->flag;
30979 decl = def_builtin_const2 (d->mask, d->name, ftype, d->code);
30980
30981 if (decl)
30982 {
30983 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
30984 NULL_TREE);
30985 TREE_NOTHROW (decl) = 1;
30986 }
30987 else
30988 {
30989 ix86_builtins_isa[(int)d->code].leaf_p = true;
30990 ix86_builtins_isa[(int)d->code].nothrow_p = true;
30991 }
30992 }
30993 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MPX_CONST_LAST,
30994 IX86_BUILTIN__BDESC_MPX_CONST_FIRST,
30995 ARRAY_SIZE (bdesc_mpx_const) - 1);
30996 }
30997 #undef BDESC_VERIFY
30998 #undef BDESC_VERIFYS
30999
31000 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
31001 to return a pointer to VERSION_DECL if the outcome of the expression
31002 formed by PREDICATE_CHAIN is true. This function will be called during
31003 version dispatch to decide which function version to execute. It returns
31004 the basic block at the end, to which more conditions can be added. */
31005
31006 static basic_block
31007 add_condition_to_bb (tree function_decl, tree version_decl,
31008 tree predicate_chain, basic_block new_bb)
31009 {
31010 gimple *return_stmt;
31011 tree convert_expr, result_var;
31012 gimple *convert_stmt;
31013 gimple *call_cond_stmt;
31014 gimple *if_else_stmt;
31015
31016 basic_block bb1, bb2, bb3;
31017 edge e12, e23;
31018
31019 tree cond_var, and_expr_var = NULL_TREE;
31020 gimple_seq gseq;
31021
31022 tree predicate_decl, predicate_arg;
31023
31024 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
31025
31026 gcc_assert (new_bb != NULL);
31027 gseq = bb_seq (new_bb);
31028
31029
31030 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
31031 build_fold_addr_expr (version_decl));
31032 result_var = create_tmp_var (ptr_type_node);
31033 convert_stmt = gimple_build_assign (result_var, convert_expr);
31034 return_stmt = gimple_build_return (result_var);
31035
31036 if (predicate_chain == NULL_TREE)
31037 {
31038 gimple_seq_add_stmt (&gseq, convert_stmt);
31039 gimple_seq_add_stmt (&gseq, return_stmt);
31040 set_bb_seq (new_bb, gseq);
31041 gimple_set_bb (convert_stmt, new_bb);
31042 gimple_set_bb (return_stmt, new_bb);
31043 pop_cfun ();
31044 return new_bb;
31045 }
31046
31047 while (predicate_chain != NULL)
31048 {
31049 cond_var = create_tmp_var (integer_type_node);
31050 predicate_decl = TREE_PURPOSE (predicate_chain);
31051 predicate_arg = TREE_VALUE (predicate_chain);
31052 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
31053 gimple_call_set_lhs (call_cond_stmt, cond_var);
31054
31055 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
31056 gimple_set_bb (call_cond_stmt, new_bb);
31057 gimple_seq_add_stmt (&gseq, call_cond_stmt);
31058
31059 predicate_chain = TREE_CHAIN (predicate_chain);
31060
31061 if (and_expr_var == NULL)
31062 and_expr_var = cond_var;
31063 else
31064 {
31065 gimple *assign_stmt;
31066 /* Use MIN_EXPR to check if any integer is zero?.
31067 and_expr_var = min_expr <cond_var, and_expr_var> */
31068 assign_stmt = gimple_build_assign (and_expr_var,
31069 build2 (MIN_EXPR, integer_type_node,
31070 cond_var, and_expr_var));
31071
31072 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
31073 gimple_set_bb (assign_stmt, new_bb);
31074 gimple_seq_add_stmt (&gseq, assign_stmt);
31075 }
31076 }
31077
31078 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
31079 integer_zero_node,
31080 NULL_TREE, NULL_TREE);
31081 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
31082 gimple_set_bb (if_else_stmt, new_bb);
31083 gimple_seq_add_stmt (&gseq, if_else_stmt);
31084
31085 gimple_seq_add_stmt (&gseq, convert_stmt);
31086 gimple_seq_add_stmt (&gseq, return_stmt);
31087 set_bb_seq (new_bb, gseq);
31088
31089 bb1 = new_bb;
31090 e12 = split_block (bb1, if_else_stmt);
31091 bb2 = e12->dest;
31092 e12->flags &= ~EDGE_FALLTHRU;
31093 e12->flags |= EDGE_TRUE_VALUE;
31094
31095 e23 = split_block (bb2, return_stmt);
31096
31097 gimple_set_bb (convert_stmt, bb2);
31098 gimple_set_bb (return_stmt, bb2);
31099
31100 bb3 = e23->dest;
31101 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
31102
31103 remove_edge (e23);
31104 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
31105
31106 pop_cfun ();
31107
31108 return bb3;
31109 }
31110
31111 /* This parses the attribute arguments to target in DECL and determines
31112 the right builtin to use to match the platform specification.
31113 It returns the priority value for this version decl. If PREDICATE_LIST
31114 is not NULL, it stores the list of cpu features that need to be checked
31115 before dispatching this function. */
31116
31117 static unsigned int
31118 get_builtin_code_for_version (tree decl, tree *predicate_list)
31119 {
31120 tree attrs;
31121 struct cl_target_option cur_target;
31122 tree target_node;
31123 struct cl_target_option *new_target;
31124 const char *arg_str = NULL;
31125 const char *attrs_str = NULL;
31126 char *tok_str = NULL;
31127 char *token;
31128
31129 /* Priority of i386 features, greater value is higher priority. This is
31130 used to decide the order in which function dispatch must happen. For
31131 instance, a version specialized for SSE4.2 should be checked for dispatch
31132 before a version for SSE3, as SSE4.2 implies SSE3. */
31133 enum feature_priority
31134 {
31135 P_ZERO = 0,
31136 P_MMX,
31137 P_SSE,
31138 P_SSE2,
31139 P_SSE3,
31140 P_SSSE3,
31141 P_PROC_SSSE3,
31142 P_SSE4_A,
31143 P_PROC_SSE4_A,
31144 P_SSE4_1,
31145 P_SSE4_2,
31146 P_PROC_SSE4_2,
31147 P_POPCNT,
31148 P_AES,
31149 P_PCLMUL,
31150 P_AVX,
31151 P_PROC_AVX,
31152 P_BMI,
31153 P_PROC_BMI,
31154 P_FMA4,
31155 P_XOP,
31156 P_PROC_XOP,
31157 P_FMA,
31158 P_PROC_FMA,
31159 P_BMI2,
31160 P_AVX2,
31161 P_PROC_AVX2,
31162 P_AVX512F,
31163 P_PROC_AVX512F
31164 };
31165
31166 enum feature_priority priority = P_ZERO;
31167
31168 /* These are the target attribute strings for which a dispatcher is
31169 available, from fold_builtin_cpu. */
31170
31171 static struct _feature_list
31172 {
31173 const char *const name;
31174 const enum feature_priority priority;
31175 }
31176 const feature_list[] =
31177 {
31178 {"mmx", P_MMX},
31179 {"sse", P_SSE},
31180 {"sse2", P_SSE2},
31181 {"sse3", P_SSE3},
31182 {"sse4a", P_SSE4_A},
31183 {"ssse3", P_SSSE3},
31184 {"sse4.1", P_SSE4_1},
31185 {"sse4.2", P_SSE4_2},
31186 {"popcnt", P_POPCNT},
31187 {"aes", P_AES},
31188 {"pclmul", P_PCLMUL},
31189 {"avx", P_AVX},
31190 {"bmi", P_BMI},
31191 {"fma4", P_FMA4},
31192 {"xop", P_XOP},
31193 {"fma", P_FMA},
31194 {"bmi2", P_BMI2},
31195 {"avx2", P_AVX2},
31196 {"avx512f", P_AVX512F}
31197 };
31198
31199
31200 static unsigned int NUM_FEATURES
31201 = sizeof (feature_list) / sizeof (struct _feature_list);
31202
31203 unsigned int i;
31204
31205 tree predicate_chain = NULL_TREE;
31206 tree predicate_decl, predicate_arg;
31207
31208 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31209 gcc_assert (attrs != NULL);
31210
31211 attrs = TREE_VALUE (TREE_VALUE (attrs));
31212
31213 gcc_assert (TREE_CODE (attrs) == STRING_CST);
31214 attrs_str = TREE_STRING_POINTER (attrs);
31215
31216 /* Return priority zero for default function. */
31217 if (strcmp (attrs_str, "default") == 0)
31218 return 0;
31219
31220 /* Handle arch= if specified. For priority, set it to be 1 more than
31221 the best instruction set the processor can handle. For instance, if
31222 there is a version for atom and a version for ssse3 (the highest ISA
31223 priority for atom), the atom version must be checked for dispatch
31224 before the ssse3 version. */
31225 if (strstr (attrs_str, "arch=") != NULL)
31226 {
31227 cl_target_option_save (&cur_target, &global_options);
31228 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
31229 &global_options_set);
31230
31231 gcc_assert (target_node);
31232 new_target = TREE_TARGET_OPTION (target_node);
31233 gcc_assert (new_target);
31234
31235 if (new_target->arch_specified && new_target->arch > 0)
31236 {
31237 switch (new_target->arch)
31238 {
31239 case PROCESSOR_CORE2:
31240 arg_str = "core2";
31241 priority = P_PROC_SSSE3;
31242 break;
31243 case PROCESSOR_NEHALEM:
31244 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
31245 {
31246 arg_str = "westmere";
31247 priority = P_AES;
31248 }
31249 else
31250 {
31251 /* We translate "arch=corei7" and "arch=nehalem" to
31252 "corei7" so that it will be mapped to M_INTEL_COREI7
31253 as cpu type to cover all M_INTEL_COREI7_XXXs. */
31254 arg_str = "corei7";
31255 priority = P_PROC_SSE4_2;
31256 }
31257 break;
31258 case PROCESSOR_SANDYBRIDGE:
31259 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
31260 arg_str = "ivybridge";
31261 else
31262 arg_str = "sandybridge";
31263 priority = P_PROC_AVX;
31264 break;
31265 case PROCESSOR_HASWELL:
31266 case PROCESSOR_SKYLAKE_AVX512:
31267 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AVX512VBMI)
31268 arg_str = "cannonlake";
31269 else if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AVX512VL)
31270 arg_str = "skylake-avx512";
31271 else if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_XSAVES)
31272 arg_str = "skylake";
31273 else if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
31274 arg_str = "broadwell";
31275 else
31276 arg_str = "haswell";
31277 priority = P_PROC_AVX2;
31278 break;
31279 case PROCESSOR_BONNELL:
31280 arg_str = "bonnell";
31281 priority = P_PROC_SSSE3;
31282 break;
31283 case PROCESSOR_KNL:
31284 arg_str = "knl";
31285 priority = P_PROC_AVX512F;
31286 break;
31287 case PROCESSOR_KNM:
31288 arg_str = "knm";
31289 priority = P_PROC_AVX512F;
31290 break;
31291 case PROCESSOR_SILVERMONT:
31292 arg_str = "silvermont";
31293 priority = P_PROC_SSE4_2;
31294 break;
31295 case PROCESSOR_AMDFAM10:
31296 arg_str = "amdfam10h";
31297 priority = P_PROC_SSE4_A;
31298 break;
31299 case PROCESSOR_BTVER1:
31300 arg_str = "btver1";
31301 priority = P_PROC_SSE4_A;
31302 break;
31303 case PROCESSOR_BTVER2:
31304 arg_str = "btver2";
31305 priority = P_PROC_BMI;
31306 break;
31307 case PROCESSOR_BDVER1:
31308 arg_str = "bdver1";
31309 priority = P_PROC_XOP;
31310 break;
31311 case PROCESSOR_BDVER2:
31312 arg_str = "bdver2";
31313 priority = P_PROC_FMA;
31314 break;
31315 case PROCESSOR_BDVER3:
31316 arg_str = "bdver3";
31317 priority = P_PROC_FMA;
31318 break;
31319 case PROCESSOR_BDVER4:
31320 arg_str = "bdver4";
31321 priority = P_PROC_AVX2;
31322 break;
31323 case PROCESSOR_ZNVER1:
31324 arg_str = "znver1";
31325 priority = P_PROC_AVX2;
31326 break;
31327 }
31328 }
31329
31330 cl_target_option_restore (&global_options, &cur_target);
31331
31332 if (predicate_list && arg_str == NULL)
31333 {
31334 error_at (DECL_SOURCE_LOCATION (decl),
31335 "No dispatcher found for the versioning attributes");
31336 return 0;
31337 }
31338
31339 if (predicate_list)
31340 {
31341 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
31342 /* For a C string literal the length includes the trailing NULL. */
31343 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
31344 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31345 predicate_chain);
31346 }
31347 }
31348
31349 /* Process feature name. */
31350 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
31351 strcpy (tok_str, attrs_str);
31352 token = strtok (tok_str, ",");
31353 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
31354
31355 while (token != NULL)
31356 {
31357 /* Do not process "arch=" */
31358 if (strncmp (token, "arch=", 5) == 0)
31359 {
31360 token = strtok (NULL, ",");
31361 continue;
31362 }
31363 for (i = 0; i < NUM_FEATURES; ++i)
31364 {
31365 if (strcmp (token, feature_list[i].name) == 0)
31366 {
31367 if (predicate_list)
31368 {
31369 predicate_arg = build_string_literal (
31370 strlen (feature_list[i].name) + 1,
31371 feature_list[i].name);
31372 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31373 predicate_chain);
31374 }
31375 /* Find the maximum priority feature. */
31376 if (feature_list[i].priority > priority)
31377 priority = feature_list[i].priority;
31378
31379 break;
31380 }
31381 }
31382 if (predicate_list && i == NUM_FEATURES)
31383 {
31384 error_at (DECL_SOURCE_LOCATION (decl),
31385 "No dispatcher found for %s", token);
31386 return 0;
31387 }
31388 token = strtok (NULL, ",");
31389 }
31390 free (tok_str);
31391
31392 if (predicate_list && predicate_chain == NULL_TREE)
31393 {
31394 error_at (DECL_SOURCE_LOCATION (decl),
31395 "No dispatcher found for the versioning attributes : %s",
31396 attrs_str);
31397 return 0;
31398 }
31399 else if (predicate_list)
31400 {
31401 predicate_chain = nreverse (predicate_chain);
31402 *predicate_list = predicate_chain;
31403 }
31404
31405 return priority;
31406 }
31407
31408 /* This compares the priority of target features in function DECL1
31409 and DECL2. It returns positive value if DECL1 is higher priority,
31410 negative value if DECL2 is higher priority and 0 if they are the
31411 same. */
31412
31413 static int
31414 ix86_compare_version_priority (tree decl1, tree decl2)
31415 {
31416 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
31417 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
31418
31419 return (int)priority1 - (int)priority2;
31420 }
31421
31422 /* V1 and V2 point to function versions with different priorities
31423 based on the target ISA. This function compares their priorities. */
31424
31425 static int
31426 feature_compare (const void *v1, const void *v2)
31427 {
31428 typedef struct _function_version_info
31429 {
31430 tree version_decl;
31431 tree predicate_chain;
31432 unsigned int dispatch_priority;
31433 } function_version_info;
31434
31435 const function_version_info c1 = *(const function_version_info *)v1;
31436 const function_version_info c2 = *(const function_version_info *)v2;
31437 return (c2.dispatch_priority - c1.dispatch_priority);
31438 }
31439
31440 /* This function generates the dispatch function for
31441 multi-versioned functions. DISPATCH_DECL is the function which will
31442 contain the dispatch logic. FNDECLS are the function choices for
31443 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
31444 in DISPATCH_DECL in which the dispatch code is generated. */
31445
31446 static int
31447 dispatch_function_versions (tree dispatch_decl,
31448 void *fndecls_p,
31449 basic_block *empty_bb)
31450 {
31451 tree default_decl;
31452 gimple *ifunc_cpu_init_stmt;
31453 gimple_seq gseq;
31454 int ix;
31455 tree ele;
31456 vec<tree> *fndecls;
31457 unsigned int num_versions = 0;
31458 unsigned int actual_versions = 0;
31459 unsigned int i;
31460
31461 struct _function_version_info
31462 {
31463 tree version_decl;
31464 tree predicate_chain;
31465 unsigned int dispatch_priority;
31466 }*function_version_info;
31467
31468 gcc_assert (dispatch_decl != NULL
31469 && fndecls_p != NULL
31470 && empty_bb != NULL);
31471
31472 /*fndecls_p is actually a vector. */
31473 fndecls = static_cast<vec<tree> *> (fndecls_p);
31474
31475 /* At least one more version other than the default. */
31476 num_versions = fndecls->length ();
31477 gcc_assert (num_versions >= 2);
31478
31479 function_version_info = (struct _function_version_info *)
31480 XNEWVEC (struct _function_version_info, (num_versions - 1));
31481
31482 /* The first version in the vector is the default decl. */
31483 default_decl = (*fndecls)[0];
31484
31485 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
31486
31487 gseq = bb_seq (*empty_bb);
31488 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
31489 constructors, so explicity call __builtin_cpu_init here. */
31490 ifunc_cpu_init_stmt = gimple_build_call_vec (
31491 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
31492 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
31493 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
31494 set_bb_seq (*empty_bb, gseq);
31495
31496 pop_cfun ();
31497
31498
31499 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
31500 {
31501 tree version_decl = ele;
31502 tree predicate_chain = NULL_TREE;
31503 unsigned int priority;
31504 /* Get attribute string, parse it and find the right predicate decl.
31505 The predicate function could be a lengthy combination of many
31506 features, like arch-type and various isa-variants. */
31507 priority = get_builtin_code_for_version (version_decl,
31508 &predicate_chain);
31509
31510 if (predicate_chain == NULL_TREE)
31511 continue;
31512
31513 function_version_info [actual_versions].version_decl = version_decl;
31514 function_version_info [actual_versions].predicate_chain
31515 = predicate_chain;
31516 function_version_info [actual_versions].dispatch_priority = priority;
31517 actual_versions++;
31518 }
31519
31520 /* Sort the versions according to descending order of dispatch priority. The
31521 priority is based on the ISA. This is not a perfect solution. There
31522 could still be ambiguity. If more than one function version is suitable
31523 to execute, which one should be dispatched? In future, allow the user
31524 to specify a dispatch priority next to the version. */
31525 qsort (function_version_info, actual_versions,
31526 sizeof (struct _function_version_info), feature_compare);
31527
31528 for (i = 0; i < actual_versions; ++i)
31529 *empty_bb = add_condition_to_bb (dispatch_decl,
31530 function_version_info[i].version_decl,
31531 function_version_info[i].predicate_chain,
31532 *empty_bb);
31533
31534 /* dispatch default version at the end. */
31535 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
31536 NULL, *empty_bb);
31537
31538 free (function_version_info);
31539 return 0;
31540 }
31541
31542 /* This function changes the assembler name for functions that are
31543 versions. If DECL is a function version and has a "target"
31544 attribute, it appends the attribute string to its assembler name. */
31545
31546 static tree
31547 ix86_mangle_function_version_assembler_name (tree decl, tree id)
31548 {
31549 tree version_attr;
31550 const char *orig_name, *version_string;
31551 char *attr_str, *assembler_name;
31552
31553 if (DECL_DECLARED_INLINE_P (decl)
31554 && lookup_attribute ("gnu_inline",
31555 DECL_ATTRIBUTES (decl)))
31556 error_at (DECL_SOURCE_LOCATION (decl),
31557 "Function versions cannot be marked as gnu_inline,"
31558 " bodies have to be generated");
31559
31560 if (DECL_VIRTUAL_P (decl)
31561 || DECL_VINDEX (decl))
31562 sorry ("Virtual function multiversioning not supported");
31563
31564 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31565
31566 /* target attribute string cannot be NULL. */
31567 gcc_assert (version_attr != NULL_TREE);
31568
31569 orig_name = IDENTIFIER_POINTER (id);
31570 version_string
31571 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
31572
31573 if (strcmp (version_string, "default") == 0)
31574 return id;
31575
31576 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
31577 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
31578
31579 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
31580
31581 /* Allow assembler name to be modified if already set. */
31582 if (DECL_ASSEMBLER_NAME_SET_P (decl))
31583 SET_DECL_RTL (decl, NULL);
31584
31585 tree ret = get_identifier (assembler_name);
31586 XDELETEVEC (attr_str);
31587 XDELETEVEC (assembler_name);
31588 return ret;
31589 }
31590
31591
31592 static tree
31593 ix86_mangle_decl_assembler_name (tree decl, tree id)
31594 {
31595 /* For function version, add the target suffix to the assembler name. */
31596 if (TREE_CODE (decl) == FUNCTION_DECL
31597 && DECL_FUNCTION_VERSIONED (decl))
31598 id = ix86_mangle_function_version_assembler_name (decl, id);
31599 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
31600 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
31601 #endif
31602
31603 return id;
31604 }
31605
31606 /* Make a dispatcher declaration for the multi-versioned function DECL.
31607 Calls to DECL function will be replaced with calls to the dispatcher
31608 by the front-end. Returns the decl of the dispatcher function. */
31609
31610 static tree
31611 ix86_get_function_versions_dispatcher (void *decl)
31612 {
31613 tree fn = (tree) decl;
31614 struct cgraph_node *node = NULL;
31615 struct cgraph_node *default_node = NULL;
31616 struct cgraph_function_version_info *node_v = NULL;
31617 struct cgraph_function_version_info *first_v = NULL;
31618
31619 tree dispatch_decl = NULL;
31620
31621 struct cgraph_function_version_info *default_version_info = NULL;
31622
31623 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
31624
31625 node = cgraph_node::get (fn);
31626 gcc_assert (node != NULL);
31627
31628 node_v = node->function_version ();
31629 gcc_assert (node_v != NULL);
31630
31631 if (node_v->dispatcher_resolver != NULL)
31632 return node_v->dispatcher_resolver;
31633
31634 /* Find the default version and make it the first node. */
31635 first_v = node_v;
31636 /* Go to the beginning of the chain. */
31637 while (first_v->prev != NULL)
31638 first_v = first_v->prev;
31639 default_version_info = first_v;
31640 while (default_version_info != NULL)
31641 {
31642 if (is_function_default_version
31643 (default_version_info->this_node->decl))
31644 break;
31645 default_version_info = default_version_info->next;
31646 }
31647
31648 /* If there is no default node, just return NULL. */
31649 if (default_version_info == NULL)
31650 return NULL;
31651
31652 /* Make default info the first node. */
31653 if (first_v != default_version_info)
31654 {
31655 default_version_info->prev->next = default_version_info->next;
31656 if (default_version_info->next)
31657 default_version_info->next->prev = default_version_info->prev;
31658 first_v->prev = default_version_info;
31659 default_version_info->next = first_v;
31660 default_version_info->prev = NULL;
31661 }
31662
31663 default_node = default_version_info->this_node;
31664
31665 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
31666 if (targetm.has_ifunc_p ())
31667 {
31668 struct cgraph_function_version_info *it_v = NULL;
31669 struct cgraph_node *dispatcher_node = NULL;
31670 struct cgraph_function_version_info *dispatcher_version_info = NULL;
31671
31672 /* Right now, the dispatching is done via ifunc. */
31673 dispatch_decl = make_dispatcher_decl (default_node->decl);
31674
31675 dispatcher_node = cgraph_node::get_create (dispatch_decl);
31676 gcc_assert (dispatcher_node != NULL);
31677 dispatcher_node->dispatcher_function = 1;
31678 dispatcher_version_info
31679 = dispatcher_node->insert_new_function_version ();
31680 dispatcher_version_info->next = default_version_info;
31681 dispatcher_node->definition = 1;
31682
31683 /* Set the dispatcher for all the versions. */
31684 it_v = default_version_info;
31685 while (it_v != NULL)
31686 {
31687 it_v->dispatcher_resolver = dispatch_decl;
31688 it_v = it_v->next;
31689 }
31690 }
31691 else
31692 #endif
31693 {
31694 error_at (DECL_SOURCE_LOCATION (default_node->decl),
31695 "multiversioning needs ifunc which is not supported "
31696 "on this target");
31697 }
31698
31699 return dispatch_decl;
31700 }
31701
31702 /* Make the resolver function decl to dispatch the versions of
31703 a multi-versioned function, DEFAULT_DECL. IFUNC_ALIAS_DECL is
31704 ifunc alias that will point to the created resolver. Create an
31705 empty basic block in the resolver and store the pointer in
31706 EMPTY_BB. Return the decl of the resolver function. */
31707
31708 static tree
31709 make_resolver_func (const tree default_decl,
31710 const tree ifunc_alias_decl,
31711 basic_block *empty_bb)
31712 {
31713 char *resolver_name;
31714 tree decl, type, decl_name, t;
31715
31716 /* IFUNC's have to be globally visible. So, if the default_decl is
31717 not, then the name of the IFUNC should be made unique. */
31718 if (TREE_PUBLIC (default_decl) == 0)
31719 {
31720 char *ifunc_name = make_unique_name (default_decl, "ifunc", true);
31721 symtab->change_decl_assembler_name (ifunc_alias_decl,
31722 get_identifier (ifunc_name));
31723 XDELETEVEC (ifunc_name);
31724 }
31725
31726 resolver_name = make_unique_name (default_decl, "resolver", false);
31727
31728 /* The resolver function should return a (void *). */
31729 type = build_function_type_list (ptr_type_node, NULL_TREE);
31730
31731 decl = build_fn_decl (resolver_name, type);
31732 decl_name = get_identifier (resolver_name);
31733 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
31734
31735 DECL_NAME (decl) = decl_name;
31736 TREE_USED (decl) = 1;
31737 DECL_ARTIFICIAL (decl) = 1;
31738 DECL_IGNORED_P (decl) = 1;
31739 TREE_PUBLIC (decl) = 0;
31740 DECL_UNINLINABLE (decl) = 1;
31741
31742 /* Resolver is not external, body is generated. */
31743 DECL_EXTERNAL (decl) = 0;
31744 DECL_EXTERNAL (ifunc_alias_decl) = 0;
31745
31746 DECL_CONTEXT (decl) = NULL_TREE;
31747 DECL_INITIAL (decl) = make_node (BLOCK);
31748 DECL_STATIC_CONSTRUCTOR (decl) = 0;
31749
31750 if (DECL_COMDAT_GROUP (default_decl)
31751 || TREE_PUBLIC (default_decl))
31752 {
31753 /* In this case, each translation unit with a call to this
31754 versioned function will put out a resolver. Ensure it
31755 is comdat to keep just one copy. */
31756 DECL_COMDAT (decl) = 1;
31757 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
31758 }
31759 /* Build result decl and add to function_decl. */
31760 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
31761 DECL_ARTIFICIAL (t) = 1;
31762 DECL_IGNORED_P (t) = 1;
31763 DECL_RESULT (decl) = t;
31764
31765 gimplify_function_tree (decl);
31766 push_cfun (DECL_STRUCT_FUNCTION (decl));
31767 *empty_bb = init_lowered_empty_function (decl, false,
31768 profile_count::uninitialized ());
31769
31770 cgraph_node::add_new_function (decl, true);
31771 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
31772
31773 pop_cfun ();
31774
31775 gcc_assert (ifunc_alias_decl != NULL);
31776 /* Mark ifunc_alias_decl as "ifunc" with resolver as resolver_name. */
31777 DECL_ATTRIBUTES (ifunc_alias_decl)
31778 = make_attribute ("ifunc", resolver_name,
31779 DECL_ATTRIBUTES (ifunc_alias_decl));
31780
31781 /* Create the alias for dispatch to resolver here. */
31782 cgraph_node::create_same_body_alias (ifunc_alias_decl, decl);
31783 XDELETEVEC (resolver_name);
31784 return decl;
31785 }
31786
31787 /* Generate the dispatching code body to dispatch multi-versioned function
31788 DECL. The target hook is called to process the "target" attributes and
31789 provide the code to dispatch the right function at run-time. NODE points
31790 to the dispatcher decl whose body will be created. */
31791
31792 static tree
31793 ix86_generate_version_dispatcher_body (void *node_p)
31794 {
31795 tree resolver_decl;
31796 basic_block empty_bb;
31797 tree default_ver_decl;
31798 struct cgraph_node *versn;
31799 struct cgraph_node *node;
31800
31801 struct cgraph_function_version_info *node_version_info = NULL;
31802 struct cgraph_function_version_info *versn_info = NULL;
31803
31804 node = (cgraph_node *)node_p;
31805
31806 node_version_info = node->function_version ();
31807 gcc_assert (node->dispatcher_function
31808 && node_version_info != NULL);
31809
31810 if (node_version_info->dispatcher_resolver)
31811 return node_version_info->dispatcher_resolver;
31812
31813 /* The first version in the chain corresponds to the default version. */
31814 default_ver_decl = node_version_info->next->this_node->decl;
31815
31816 /* node is going to be an alias, so remove the finalized bit. */
31817 node->definition = false;
31818
31819 resolver_decl = make_resolver_func (default_ver_decl,
31820 node->decl, &empty_bb);
31821
31822 node_version_info->dispatcher_resolver = resolver_decl;
31823
31824 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
31825
31826 auto_vec<tree, 2> fn_ver_vec;
31827
31828 for (versn_info = node_version_info->next; versn_info;
31829 versn_info = versn_info->next)
31830 {
31831 versn = versn_info->this_node;
31832 /* Check for virtual functions here again, as by this time it should
31833 have been determined if this function needs a vtable index or
31834 not. This happens for methods in derived classes that override
31835 virtual methods in base classes but are not explicitly marked as
31836 virtual. */
31837 if (DECL_VINDEX (versn->decl))
31838 sorry ("Virtual function multiversioning not supported");
31839
31840 fn_ver_vec.safe_push (versn->decl);
31841 }
31842
31843 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
31844 cgraph_edge::rebuild_edges ();
31845 pop_cfun ();
31846 return resolver_decl;
31847 }
31848 /* This builds the processor_model struct type defined in
31849 libgcc/config/i386/cpuinfo.c */
31850
31851 static tree
31852 build_processor_model_struct (void)
31853 {
31854 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
31855 "__cpu_features"};
31856 tree field = NULL_TREE, field_chain = NULL_TREE;
31857 int i;
31858 tree type = make_node (RECORD_TYPE);
31859
31860 /* The first 3 fields are unsigned int. */
31861 for (i = 0; i < 3; ++i)
31862 {
31863 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
31864 get_identifier (field_name[i]), unsigned_type_node);
31865 if (field_chain != NULL_TREE)
31866 DECL_CHAIN (field) = field_chain;
31867 field_chain = field;
31868 }
31869
31870 /* The last field is an array of unsigned integers of size one. */
31871 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
31872 get_identifier (field_name[3]),
31873 build_array_type (unsigned_type_node,
31874 build_index_type (size_one_node)));
31875 if (field_chain != NULL_TREE)
31876 DECL_CHAIN (field) = field_chain;
31877 field_chain = field;
31878
31879 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
31880 return type;
31881 }
31882
31883 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
31884
31885 static tree
31886 make_var_decl (tree type, const char *name)
31887 {
31888 tree new_decl;
31889
31890 new_decl = build_decl (UNKNOWN_LOCATION,
31891 VAR_DECL,
31892 get_identifier(name),
31893 type);
31894
31895 DECL_EXTERNAL (new_decl) = 1;
31896 TREE_STATIC (new_decl) = 1;
31897 TREE_PUBLIC (new_decl) = 1;
31898 DECL_INITIAL (new_decl) = 0;
31899 DECL_ARTIFICIAL (new_decl) = 0;
31900 DECL_PRESERVE_P (new_decl) = 1;
31901
31902 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
31903 assemble_variable (new_decl, 0, 0, 0);
31904
31905 return new_decl;
31906 }
31907
31908 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
31909 into an integer defined in libgcc/config/i386/cpuinfo.c */
31910
31911 static tree
31912 fold_builtin_cpu (tree fndecl, tree *args)
31913 {
31914 unsigned int i;
31915 enum ix86_builtins fn_code = (enum ix86_builtins)
31916 DECL_FUNCTION_CODE (fndecl);
31917 tree param_string_cst = NULL;
31918
31919 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
31920 enum processor_features
31921 {
31922 F_CMOV = 0,
31923 F_MMX,
31924 F_POPCNT,
31925 F_SSE,
31926 F_SSE2,
31927 F_SSE3,
31928 F_SSSE3,
31929 F_SSE4_1,
31930 F_SSE4_2,
31931 F_AVX,
31932 F_AVX2,
31933 F_SSE4_A,
31934 F_FMA4,
31935 F_XOP,
31936 F_FMA,
31937 F_AVX512F,
31938 F_BMI,
31939 F_BMI2,
31940 F_AES,
31941 F_PCLMUL,
31942 F_AVX512VL,
31943 F_AVX512BW,
31944 F_AVX512DQ,
31945 F_AVX512CD,
31946 F_AVX512ER,
31947 F_AVX512PF,
31948 F_AVX512VBMI,
31949 F_AVX512IFMA,
31950 F_AVX5124VNNIW,
31951 F_AVX5124FMAPS,
31952 F_AVX512VPOPCNTDQ,
31953 F_MAX
31954 };
31955
31956 /* These are the values for vendor types and cpu types and subtypes
31957 in cpuinfo.c. Cpu types and subtypes should be subtracted by
31958 the corresponding start value. */
31959 enum processor_model
31960 {
31961 M_INTEL = 1,
31962 M_AMD,
31963 M_CPU_TYPE_START,
31964 M_INTEL_BONNELL,
31965 M_INTEL_CORE2,
31966 M_INTEL_COREI7,
31967 M_AMDFAM10H,
31968 M_AMDFAM15H,
31969 M_INTEL_SILVERMONT,
31970 M_INTEL_KNL,
31971 M_AMD_BTVER1,
31972 M_AMD_BTVER2,
31973 M_AMDFAM17H,
31974 M_INTEL_KNM,
31975 M_CPU_SUBTYPE_START,
31976 M_INTEL_COREI7_NEHALEM,
31977 M_INTEL_COREI7_WESTMERE,
31978 M_INTEL_COREI7_SANDYBRIDGE,
31979 M_AMDFAM10H_BARCELONA,
31980 M_AMDFAM10H_SHANGHAI,
31981 M_AMDFAM10H_ISTANBUL,
31982 M_AMDFAM15H_BDVER1,
31983 M_AMDFAM15H_BDVER2,
31984 M_AMDFAM15H_BDVER3,
31985 M_AMDFAM15H_BDVER4,
31986 M_AMDFAM17H_ZNVER1,
31987 M_INTEL_COREI7_IVYBRIDGE,
31988 M_INTEL_COREI7_HASWELL,
31989 M_INTEL_COREI7_BROADWELL,
31990 M_INTEL_COREI7_SKYLAKE,
31991 M_INTEL_COREI7_SKYLAKE_AVX512,
31992 M_INTEL_COREI7_CANNONLAKE
31993 };
31994
31995 static struct _arch_names_table
31996 {
31997 const char *const name;
31998 const enum processor_model model;
31999 }
32000 const arch_names_table[] =
32001 {
32002 {"amd", M_AMD},
32003 {"intel", M_INTEL},
32004 {"atom", M_INTEL_BONNELL},
32005 {"slm", M_INTEL_SILVERMONT},
32006 {"core2", M_INTEL_CORE2},
32007 {"corei7", M_INTEL_COREI7},
32008 {"nehalem", M_INTEL_COREI7_NEHALEM},
32009 {"westmere", M_INTEL_COREI7_WESTMERE},
32010 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
32011 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
32012 {"haswell", M_INTEL_COREI7_HASWELL},
32013 {"broadwell", M_INTEL_COREI7_BROADWELL},
32014 {"skylake", M_INTEL_COREI7_SKYLAKE},
32015 {"skylake-avx512", M_INTEL_COREI7_SKYLAKE_AVX512},
32016 {"cannonlake", M_INTEL_COREI7_CANNONLAKE},
32017 {"bonnell", M_INTEL_BONNELL},
32018 {"silvermont", M_INTEL_SILVERMONT},
32019 {"knl", M_INTEL_KNL},
32020 {"knm", M_INTEL_KNM},
32021 {"amdfam10h", M_AMDFAM10H},
32022 {"barcelona", M_AMDFAM10H_BARCELONA},
32023 {"shanghai", M_AMDFAM10H_SHANGHAI},
32024 {"istanbul", M_AMDFAM10H_ISTANBUL},
32025 {"btver1", M_AMD_BTVER1},
32026 {"amdfam15h", M_AMDFAM15H},
32027 {"bdver1", M_AMDFAM15H_BDVER1},
32028 {"bdver2", M_AMDFAM15H_BDVER2},
32029 {"bdver3", M_AMDFAM15H_BDVER3},
32030 {"bdver4", M_AMDFAM15H_BDVER4},
32031 {"btver2", M_AMD_BTVER2},
32032 {"amdfam17h", M_AMDFAM17H},
32033 {"znver1", M_AMDFAM17H_ZNVER1},
32034 };
32035
32036 static struct _isa_names_table
32037 {
32038 const char *const name;
32039 const enum processor_features feature;
32040 }
32041 const isa_names_table[] =
32042 {
32043 {"cmov", F_CMOV},
32044 {"mmx", F_MMX},
32045 {"popcnt", F_POPCNT},
32046 {"sse", F_SSE},
32047 {"sse2", F_SSE2},
32048 {"sse3", F_SSE3},
32049 {"ssse3", F_SSSE3},
32050 {"sse4a", F_SSE4_A},
32051 {"sse4.1", F_SSE4_1},
32052 {"sse4.2", F_SSE4_2},
32053 {"avx", F_AVX},
32054 {"fma4", F_FMA4},
32055 {"xop", F_XOP},
32056 {"fma", F_FMA},
32057 {"avx2", F_AVX2},
32058 {"avx512f", F_AVX512F},
32059 {"bmi", F_BMI},
32060 {"bmi2", F_BMI2},
32061 {"aes", F_AES},
32062 {"pclmul", F_PCLMUL},
32063 {"avx512vl",F_AVX512VL},
32064 {"avx512bw",F_AVX512BW},
32065 {"avx512dq",F_AVX512DQ},
32066 {"avx512cd",F_AVX512CD},
32067 {"avx512er",F_AVX512ER},
32068 {"avx512pf",F_AVX512PF},
32069 {"avx512vbmi",F_AVX512VBMI},
32070 {"avx512ifma",F_AVX512IFMA},
32071 {"avx5124vnniw",F_AVX5124VNNIW},
32072 {"avx5124fmaps",F_AVX5124FMAPS},
32073 {"avx512vpopcntdq",F_AVX512VPOPCNTDQ}
32074 };
32075
32076 tree __processor_model_type = build_processor_model_struct ();
32077 tree __cpu_model_var = make_var_decl (__processor_model_type,
32078 "__cpu_model");
32079
32080
32081 varpool_node::add (__cpu_model_var);
32082
32083 gcc_assert ((args != NULL) && (*args != NULL));
32084
32085 param_string_cst = *args;
32086 while (param_string_cst
32087 && TREE_CODE (param_string_cst) != STRING_CST)
32088 {
32089 /* *args must be a expr that can contain other EXPRS leading to a
32090 STRING_CST. */
32091 if (!EXPR_P (param_string_cst))
32092 {
32093 error ("Parameter to builtin must be a string constant or literal");
32094 return integer_zero_node;
32095 }
32096 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
32097 }
32098
32099 gcc_assert (param_string_cst);
32100
32101 if (fn_code == IX86_BUILTIN_CPU_IS)
32102 {
32103 tree ref;
32104 tree field;
32105 tree final;
32106
32107 unsigned int field_val = 0;
32108 unsigned int NUM_ARCH_NAMES
32109 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
32110
32111 for (i = 0; i < NUM_ARCH_NAMES; i++)
32112 if (strcmp (arch_names_table[i].name,
32113 TREE_STRING_POINTER (param_string_cst)) == 0)
32114 break;
32115
32116 if (i == NUM_ARCH_NAMES)
32117 {
32118 error ("Parameter to builtin not valid: %s",
32119 TREE_STRING_POINTER (param_string_cst));
32120 return integer_zero_node;
32121 }
32122
32123 field = TYPE_FIELDS (__processor_model_type);
32124 field_val = arch_names_table[i].model;
32125
32126 /* CPU types are stored in the next field. */
32127 if (field_val > M_CPU_TYPE_START
32128 && field_val < M_CPU_SUBTYPE_START)
32129 {
32130 field = DECL_CHAIN (field);
32131 field_val -= M_CPU_TYPE_START;
32132 }
32133
32134 /* CPU subtypes are stored in the next field. */
32135 if (field_val > M_CPU_SUBTYPE_START)
32136 {
32137 field = DECL_CHAIN ( DECL_CHAIN (field));
32138 field_val -= M_CPU_SUBTYPE_START;
32139 }
32140
32141 /* Get the appropriate field in __cpu_model. */
32142 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32143 field, NULL_TREE);
32144
32145 /* Check the value. */
32146 final = build2 (EQ_EXPR, unsigned_type_node, ref,
32147 build_int_cstu (unsigned_type_node, field_val));
32148 return build1 (CONVERT_EXPR, integer_type_node, final);
32149 }
32150 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32151 {
32152 tree ref;
32153 tree array_elt;
32154 tree field;
32155 tree final;
32156
32157 unsigned int field_val = 0;
32158 unsigned int NUM_ISA_NAMES
32159 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
32160
32161 for (i = 0; i < NUM_ISA_NAMES; i++)
32162 if (strcmp (isa_names_table[i].name,
32163 TREE_STRING_POINTER (param_string_cst)) == 0)
32164 break;
32165
32166 if (i == NUM_ISA_NAMES)
32167 {
32168 error ("Parameter to builtin not valid: %s",
32169 TREE_STRING_POINTER (param_string_cst));
32170 return integer_zero_node;
32171 }
32172
32173 field = TYPE_FIELDS (__processor_model_type);
32174 /* Get the last field, which is __cpu_features. */
32175 while (DECL_CHAIN (field))
32176 field = DECL_CHAIN (field);
32177
32178 /* Get the appropriate field: __cpu_model.__cpu_features */
32179 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32180 field, NULL_TREE);
32181
32182 /* Access the 0th element of __cpu_features array. */
32183 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
32184 integer_zero_node, NULL_TREE, NULL_TREE);
32185
32186 field_val = (1 << isa_names_table[i].feature);
32187 /* Return __cpu_model.__cpu_features[0] & field_val */
32188 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
32189 build_int_cstu (unsigned_type_node, field_val));
32190 return build1 (CONVERT_EXPR, integer_type_node, final);
32191 }
32192 gcc_unreachable ();
32193 }
32194
32195 static tree
32196 ix86_fold_builtin (tree fndecl, int n_args,
32197 tree *args, bool ignore ATTRIBUTE_UNUSED)
32198 {
32199 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
32200 {
32201 enum ix86_builtins fn_code = (enum ix86_builtins)
32202 DECL_FUNCTION_CODE (fndecl);
32203 switch (fn_code)
32204 {
32205 case IX86_BUILTIN_CPU_IS:
32206 case IX86_BUILTIN_CPU_SUPPORTS:
32207 gcc_assert (n_args == 1);
32208 return fold_builtin_cpu (fndecl, args);
32209
32210 case IX86_BUILTIN_NANQ:
32211 case IX86_BUILTIN_NANSQ:
32212 {
32213 tree type = TREE_TYPE (TREE_TYPE (fndecl));
32214 const char *str = c_getstr (*args);
32215 int quiet = fn_code == IX86_BUILTIN_NANQ;
32216 REAL_VALUE_TYPE real;
32217
32218 if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
32219 return build_real (type, real);
32220 return NULL_TREE;
32221 }
32222
32223 case IX86_BUILTIN_INFQ:
32224 case IX86_BUILTIN_HUGE_VALQ:
32225 {
32226 tree type = TREE_TYPE (TREE_TYPE (fndecl));
32227 REAL_VALUE_TYPE inf;
32228 real_inf (&inf);
32229 return build_real (type, inf);
32230 }
32231
32232 case IX86_BUILTIN_TZCNT16:
32233 case IX86_BUILTIN_CTZS:
32234 case IX86_BUILTIN_TZCNT32:
32235 case IX86_BUILTIN_TZCNT64:
32236 gcc_assert (n_args == 1);
32237 if (TREE_CODE (args[0]) == INTEGER_CST)
32238 {
32239 tree type = TREE_TYPE (TREE_TYPE (fndecl));
32240 tree arg = args[0];
32241 if (fn_code == IX86_BUILTIN_TZCNT16
32242 || fn_code == IX86_BUILTIN_CTZS)
32243 arg = fold_convert (short_unsigned_type_node, arg);
32244 if (integer_zerop (arg))
32245 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
32246 else
32247 return fold_const_call (CFN_CTZ, type, arg);
32248 }
32249 break;
32250
32251 case IX86_BUILTIN_LZCNT16:
32252 case IX86_BUILTIN_CLZS:
32253 case IX86_BUILTIN_LZCNT32:
32254 case IX86_BUILTIN_LZCNT64:
32255 gcc_assert (n_args == 1);
32256 if (TREE_CODE (args[0]) == INTEGER_CST)
32257 {
32258 tree type = TREE_TYPE (TREE_TYPE (fndecl));
32259 tree arg = args[0];
32260 if (fn_code == IX86_BUILTIN_LZCNT16
32261 || fn_code == IX86_BUILTIN_CLZS)
32262 arg = fold_convert (short_unsigned_type_node, arg);
32263 if (integer_zerop (arg))
32264 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
32265 else
32266 return fold_const_call (CFN_CLZ, type, arg);
32267 }
32268 break;
32269
32270 case IX86_BUILTIN_BEXTR32:
32271 case IX86_BUILTIN_BEXTR64:
32272 case IX86_BUILTIN_BEXTRI32:
32273 case IX86_BUILTIN_BEXTRI64:
32274 gcc_assert (n_args == 2);
32275 if (tree_fits_uhwi_p (args[1]))
32276 {
32277 unsigned HOST_WIDE_INT res = 0;
32278 unsigned int prec = TYPE_PRECISION (TREE_TYPE (args[0]));
32279 unsigned int start = tree_to_uhwi (args[1]);
32280 unsigned int len = (start & 0xff00) >> 8;
32281 start &= 0xff;
32282 if (start >= prec || len == 0)
32283 res = 0;
32284 else if (!tree_fits_uhwi_p (args[0]))
32285 break;
32286 else
32287 res = tree_to_uhwi (args[0]) >> start;
32288 if (len > prec)
32289 len = prec;
32290 if (len < HOST_BITS_PER_WIDE_INT)
32291 res &= (HOST_WIDE_INT_1U << len) - 1;
32292 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
32293 }
32294 break;
32295
32296 case IX86_BUILTIN_BZHI32:
32297 case IX86_BUILTIN_BZHI64:
32298 gcc_assert (n_args == 2);
32299 if (tree_fits_uhwi_p (args[1]))
32300 {
32301 unsigned int idx = tree_to_uhwi (args[1]) & 0xff;
32302 if (idx >= TYPE_PRECISION (TREE_TYPE (args[0])))
32303 return args[0];
32304 if (!tree_fits_uhwi_p (args[0]))
32305 break;
32306 unsigned HOST_WIDE_INT res = tree_to_uhwi (args[0]);
32307 res &= ~(HOST_WIDE_INT_M1U << idx);
32308 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
32309 }
32310 break;
32311
32312 case IX86_BUILTIN_PDEP32:
32313 case IX86_BUILTIN_PDEP64:
32314 gcc_assert (n_args == 2);
32315 if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
32316 {
32317 unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
32318 unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
32319 unsigned HOST_WIDE_INT res = 0;
32320 unsigned HOST_WIDE_INT m, k = 1;
32321 for (m = 1; m; m <<= 1)
32322 if ((mask & m) != 0)
32323 {
32324 if ((src & k) != 0)
32325 res |= m;
32326 k <<= 1;
32327 }
32328 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
32329 }
32330 break;
32331
32332 case IX86_BUILTIN_PEXT32:
32333 case IX86_BUILTIN_PEXT64:
32334 gcc_assert (n_args == 2);
32335 if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
32336 {
32337 unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
32338 unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
32339 unsigned HOST_WIDE_INT res = 0;
32340 unsigned HOST_WIDE_INT m, k = 1;
32341 for (m = 1; m; m <<= 1)
32342 if ((mask & m) != 0)
32343 {
32344 if ((src & m) != 0)
32345 res |= k;
32346 k <<= 1;
32347 }
32348 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
32349 }
32350 break;
32351
32352 default:
32353 break;
32354 }
32355 }
32356
32357 #ifdef SUBTARGET_FOLD_BUILTIN
32358 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
32359 #endif
32360
32361 return NULL_TREE;
32362 }
32363
32364 /* Fold a MD builtin (use ix86_fold_builtin for folding into
32365 constant) in GIMPLE. */
32366
32367 bool
32368 ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
32369 {
32370 gimple *stmt = gsi_stmt (*gsi);
32371 tree fndecl = gimple_call_fndecl (stmt);
32372 gcc_checking_assert (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD);
32373 int n_args = gimple_call_num_args (stmt);
32374 enum ix86_builtins fn_code = (enum ix86_builtins) DECL_FUNCTION_CODE (fndecl);
32375 tree decl = NULL_TREE;
32376 tree arg0, arg1;
32377
32378 switch (fn_code)
32379 {
32380 case IX86_BUILTIN_TZCNT32:
32381 decl = builtin_decl_implicit (BUILT_IN_CTZ);
32382 goto fold_tzcnt_lzcnt;
32383
32384 case IX86_BUILTIN_TZCNT64:
32385 decl = builtin_decl_implicit (BUILT_IN_CTZLL);
32386 goto fold_tzcnt_lzcnt;
32387
32388 case IX86_BUILTIN_LZCNT32:
32389 decl = builtin_decl_implicit (BUILT_IN_CLZ);
32390 goto fold_tzcnt_lzcnt;
32391
32392 case IX86_BUILTIN_LZCNT64:
32393 decl = builtin_decl_implicit (BUILT_IN_CLZLL);
32394 goto fold_tzcnt_lzcnt;
32395
32396 fold_tzcnt_lzcnt:
32397 gcc_assert (n_args == 1);
32398 arg0 = gimple_call_arg (stmt, 0);
32399 if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (stmt))
32400 {
32401 int prec = TYPE_PRECISION (TREE_TYPE (arg0));
32402 /* If arg0 is provably non-zero, optimize into generic
32403 __builtin_c[tl]z{,ll} function the middle-end handles
32404 better. */
32405 if (!expr_not_equal_to (arg0, wi::zero (prec)))
32406 return false;
32407
32408 location_t loc = gimple_location (stmt);
32409 gimple *g = gimple_build_call (decl, 1, arg0);
32410 gimple_set_location (g, loc);
32411 tree lhs = make_ssa_name (integer_type_node);
32412 gimple_call_set_lhs (g, lhs);
32413 gsi_insert_before (gsi, g, GSI_SAME_STMT);
32414 g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR, lhs);
32415 gimple_set_location (g, loc);
32416 gsi_replace (gsi, g, false);
32417 return true;
32418 }
32419 break;
32420
32421 case IX86_BUILTIN_BZHI32:
32422 case IX86_BUILTIN_BZHI64:
32423 gcc_assert (n_args == 2);
32424 arg1 = gimple_call_arg (stmt, 1);
32425 if (tree_fits_uhwi_p (arg1) && gimple_call_lhs (stmt))
32426 {
32427 unsigned int idx = tree_to_uhwi (arg1) & 0xff;
32428 arg0 = gimple_call_arg (stmt, 0);
32429 if (idx < TYPE_PRECISION (TREE_TYPE (arg0)))
32430 break;
32431 location_t loc = gimple_location (stmt);
32432 gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
32433 gimple_set_location (g, loc);
32434 gsi_replace (gsi, g, false);
32435 return true;
32436 }
32437 break;
32438
32439 case IX86_BUILTIN_PDEP32:
32440 case IX86_BUILTIN_PDEP64:
32441 case IX86_BUILTIN_PEXT32:
32442 case IX86_BUILTIN_PEXT64:
32443 gcc_assert (n_args == 2);
32444 arg1 = gimple_call_arg (stmt, 1);
32445 if (integer_all_onesp (arg1) && gimple_call_lhs (stmt))
32446 {
32447 location_t loc = gimple_location (stmt);
32448 arg0 = gimple_call_arg (stmt, 0);
32449 gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
32450 gimple_set_location (g, loc);
32451 gsi_replace (gsi, g, false);
32452 return true;
32453 }
32454 break;
32455
32456 default:
32457 break;
32458 }
32459
32460 return false;
32461 }
32462
32463 /* Make builtins to detect cpu type and features supported. NAME is
32464 the builtin name, CODE is the builtin code, and FTYPE is the function
32465 type of the builtin. */
32466
32467 static void
32468 make_cpu_type_builtin (const char* name, int code,
32469 enum ix86_builtin_func_type ftype, bool is_const)
32470 {
32471 tree decl;
32472 tree type;
32473
32474 type = ix86_get_builtin_func_type (ftype);
32475 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
32476 NULL, NULL_TREE);
32477 gcc_assert (decl != NULL_TREE);
32478 ix86_builtins[(int) code] = decl;
32479 TREE_READONLY (decl) = is_const;
32480 }
32481
32482 /* Make builtins to get CPU type and features supported. The created
32483 builtins are :
32484
32485 __builtin_cpu_init (), to detect cpu type and features,
32486 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
32487 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
32488 */
32489
32490 static void
32491 ix86_init_platform_type_builtins (void)
32492 {
32493 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
32494 INT_FTYPE_VOID, false);
32495 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
32496 INT_FTYPE_PCCHAR, true);
32497 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
32498 INT_FTYPE_PCCHAR, true);
32499 }
32500
32501 /* Internal method for ix86_init_builtins. */
32502
32503 static void
32504 ix86_init_builtins_va_builtins_abi (void)
32505 {
32506 tree ms_va_ref, sysv_va_ref;
32507 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
32508 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
32509 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
32510 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
32511
32512 if (!TARGET_64BIT)
32513 return;
32514 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
32515 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
32516 ms_va_ref = build_reference_type (ms_va_list_type_node);
32517 sysv_va_ref =
32518 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
32519
32520 fnvoid_va_end_ms =
32521 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32522 fnvoid_va_start_ms =
32523 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32524 fnvoid_va_end_sysv =
32525 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
32526 fnvoid_va_start_sysv =
32527 build_varargs_function_type_list (void_type_node, sysv_va_ref,
32528 NULL_TREE);
32529 fnvoid_va_copy_ms =
32530 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
32531 NULL_TREE);
32532 fnvoid_va_copy_sysv =
32533 build_function_type_list (void_type_node, sysv_va_ref,
32534 sysv_va_ref, NULL_TREE);
32535
32536 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
32537 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
32538 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
32539 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
32540 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
32541 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
32542 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
32543 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32544 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
32545 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32546 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
32547 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32548 }
32549
32550 static void
32551 ix86_init_builtin_types (void)
32552 {
32553 tree float80_type_node, const_string_type_node;
32554
32555 /* The __float80 type. */
32556 float80_type_node = long_double_type_node;
32557 if (TYPE_MODE (float80_type_node) != XFmode)
32558 {
32559 if (float64x_type_node != NULL_TREE
32560 && TYPE_MODE (float64x_type_node) == XFmode)
32561 float80_type_node = float64x_type_node;
32562 else
32563 {
32564 /* The __float80 type. */
32565 float80_type_node = make_node (REAL_TYPE);
32566
32567 TYPE_PRECISION (float80_type_node) = 80;
32568 layout_type (float80_type_node);
32569 }
32570 }
32571 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
32572
32573 /* The __float128 type. The node has already been created as
32574 _Float128, so we only need to register the __float128 name for
32575 it. */
32576 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
32577
32578 const_string_type_node
32579 = build_pointer_type (build_qualified_type
32580 (char_type_node, TYPE_QUAL_CONST));
32581
32582 /* This macro is built by i386-builtin-types.awk. */
32583 DEFINE_BUILTIN_PRIMITIVE_TYPES;
32584 }
32585
32586 static void
32587 ix86_init_builtins (void)
32588 {
32589 tree ftype, decl;
32590
32591 ix86_init_builtin_types ();
32592
32593 /* Builtins to get CPU type and features. */
32594 ix86_init_platform_type_builtins ();
32595
32596 /* TFmode support builtins. */
32597 def_builtin_const (0, "__builtin_infq",
32598 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
32599 def_builtin_const (0, "__builtin_huge_valq",
32600 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
32601
32602 ftype = ix86_get_builtin_func_type (FLOAT128_FTYPE_CONST_STRING);
32603 decl = add_builtin_function ("__builtin_nanq", ftype, IX86_BUILTIN_NANQ,
32604 BUILT_IN_MD, "nanq", NULL_TREE);
32605 TREE_READONLY (decl) = 1;
32606 ix86_builtins[(int) IX86_BUILTIN_NANQ] = decl;
32607
32608 decl = add_builtin_function ("__builtin_nansq", ftype, IX86_BUILTIN_NANSQ,
32609 BUILT_IN_MD, "nansq", NULL_TREE);
32610 TREE_READONLY (decl) = 1;
32611 ix86_builtins[(int) IX86_BUILTIN_NANSQ] = decl;
32612
32613 /* We will expand them to normal call if SSE isn't available since
32614 they are used by libgcc. */
32615 ftype = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
32616 decl = add_builtin_function ("__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ,
32617 BUILT_IN_MD, "__fabstf2", NULL_TREE);
32618 TREE_READONLY (decl) = 1;
32619 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
32620
32621 ftype = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
32622 decl = add_builtin_function ("__builtin_copysignq", ftype,
32623 IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
32624 "__copysigntf3", NULL_TREE);
32625 TREE_READONLY (decl) = 1;
32626 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
32627
32628 ix86_init_tm_builtins ();
32629 ix86_init_mmx_sse_builtins ();
32630 ix86_init_mpx_builtins ();
32631
32632 if (TARGET_LP64)
32633 ix86_init_builtins_va_builtins_abi ();
32634
32635 #ifdef SUBTARGET_INIT_BUILTINS
32636 SUBTARGET_INIT_BUILTINS;
32637 #endif
32638 }
32639
32640 /* Return the ix86 builtin for CODE. */
32641
32642 static tree
32643 ix86_builtin_decl (unsigned code, bool)
32644 {
32645 if (code >= IX86_BUILTIN_MAX)
32646 return error_mark_node;
32647
32648 return ix86_builtins[code];
32649 }
32650
32651 /* Errors in the source file can cause expand_expr to return const0_rtx
32652 where we expect a vector. To avoid crashing, use one of the vector
32653 clear instructions. */
32654 static rtx
32655 safe_vector_operand (rtx x, machine_mode mode)
32656 {
32657 if (x == const0_rtx)
32658 x = CONST0_RTX (mode);
32659 return x;
32660 }
32661
32662 /* Fixup modeless constants to fit required mode. */
32663 static rtx
32664 fixup_modeless_constant (rtx x, machine_mode mode)
32665 {
32666 if (GET_MODE (x) == VOIDmode)
32667 x = convert_to_mode (mode, x, 1);
32668 return x;
32669 }
32670
32671 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
32672
32673 static rtx
32674 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
32675 {
32676 rtx pat;
32677 tree arg0 = CALL_EXPR_ARG (exp, 0);
32678 tree arg1 = CALL_EXPR_ARG (exp, 1);
32679 rtx op0 = expand_normal (arg0);
32680 rtx op1 = expand_normal (arg1);
32681 machine_mode tmode = insn_data[icode].operand[0].mode;
32682 machine_mode mode0 = insn_data[icode].operand[1].mode;
32683 machine_mode mode1 = insn_data[icode].operand[2].mode;
32684
32685 if (VECTOR_MODE_P (mode0))
32686 op0 = safe_vector_operand (op0, mode0);
32687 if (VECTOR_MODE_P (mode1))
32688 op1 = safe_vector_operand (op1, mode1);
32689
32690 if (optimize || !target
32691 || GET_MODE (target) != tmode
32692 || !insn_data[icode].operand[0].predicate (target, tmode))
32693 target = gen_reg_rtx (tmode);
32694
32695 if (GET_MODE (op1) == SImode && mode1 == TImode)
32696 {
32697 rtx x = gen_reg_rtx (V4SImode);
32698 emit_insn (gen_sse2_loadd (x, op1));
32699 op1 = gen_lowpart (TImode, x);
32700 }
32701
32702 if (!insn_data[icode].operand[1].predicate (op0, mode0))
32703 op0 = copy_to_mode_reg (mode0, op0);
32704 if (!insn_data[icode].operand[2].predicate (op1, mode1))
32705 op1 = copy_to_mode_reg (mode1, op1);
32706
32707 pat = GEN_FCN (icode) (target, op0, op1);
32708 if (! pat)
32709 return 0;
32710
32711 emit_insn (pat);
32712
32713 return target;
32714 }
32715
32716 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
32717
32718 static rtx
32719 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
32720 enum ix86_builtin_func_type m_type,
32721 enum rtx_code sub_code)
32722 {
32723 rtx pat;
32724 int i;
32725 int nargs;
32726 bool comparison_p = false;
32727 bool tf_p = false;
32728 bool last_arg_constant = false;
32729 int num_memory = 0;
32730 struct {
32731 rtx op;
32732 machine_mode mode;
32733 } args[4];
32734
32735 machine_mode tmode = insn_data[icode].operand[0].mode;
32736
32737 switch (m_type)
32738 {
32739 case MULTI_ARG_4_DF2_DI_I:
32740 case MULTI_ARG_4_DF2_DI_I1:
32741 case MULTI_ARG_4_SF2_SI_I:
32742 case MULTI_ARG_4_SF2_SI_I1:
32743 nargs = 4;
32744 last_arg_constant = true;
32745 break;
32746
32747 case MULTI_ARG_3_SF:
32748 case MULTI_ARG_3_DF:
32749 case MULTI_ARG_3_SF2:
32750 case MULTI_ARG_3_DF2:
32751 case MULTI_ARG_3_DI:
32752 case MULTI_ARG_3_SI:
32753 case MULTI_ARG_3_SI_DI:
32754 case MULTI_ARG_3_HI:
32755 case MULTI_ARG_3_HI_SI:
32756 case MULTI_ARG_3_QI:
32757 case MULTI_ARG_3_DI2:
32758 case MULTI_ARG_3_SI2:
32759 case MULTI_ARG_3_HI2:
32760 case MULTI_ARG_3_QI2:
32761 nargs = 3;
32762 break;
32763
32764 case MULTI_ARG_2_SF:
32765 case MULTI_ARG_2_DF:
32766 case MULTI_ARG_2_DI:
32767 case MULTI_ARG_2_SI:
32768 case MULTI_ARG_2_HI:
32769 case MULTI_ARG_2_QI:
32770 nargs = 2;
32771 break;
32772
32773 case MULTI_ARG_2_DI_IMM:
32774 case MULTI_ARG_2_SI_IMM:
32775 case MULTI_ARG_2_HI_IMM:
32776 case MULTI_ARG_2_QI_IMM:
32777 nargs = 2;
32778 last_arg_constant = true;
32779 break;
32780
32781 case MULTI_ARG_1_SF:
32782 case MULTI_ARG_1_DF:
32783 case MULTI_ARG_1_SF2:
32784 case MULTI_ARG_1_DF2:
32785 case MULTI_ARG_1_DI:
32786 case MULTI_ARG_1_SI:
32787 case MULTI_ARG_1_HI:
32788 case MULTI_ARG_1_QI:
32789 case MULTI_ARG_1_SI_DI:
32790 case MULTI_ARG_1_HI_DI:
32791 case MULTI_ARG_1_HI_SI:
32792 case MULTI_ARG_1_QI_DI:
32793 case MULTI_ARG_1_QI_SI:
32794 case MULTI_ARG_1_QI_HI:
32795 nargs = 1;
32796 break;
32797
32798 case MULTI_ARG_2_DI_CMP:
32799 case MULTI_ARG_2_SI_CMP:
32800 case MULTI_ARG_2_HI_CMP:
32801 case MULTI_ARG_2_QI_CMP:
32802 nargs = 2;
32803 comparison_p = true;
32804 break;
32805
32806 case MULTI_ARG_2_SF_TF:
32807 case MULTI_ARG_2_DF_TF:
32808 case MULTI_ARG_2_DI_TF:
32809 case MULTI_ARG_2_SI_TF:
32810 case MULTI_ARG_2_HI_TF:
32811 case MULTI_ARG_2_QI_TF:
32812 nargs = 2;
32813 tf_p = true;
32814 break;
32815
32816 default:
32817 gcc_unreachable ();
32818 }
32819
32820 if (optimize || !target
32821 || GET_MODE (target) != tmode
32822 || !insn_data[icode].operand[0].predicate (target, tmode))
32823 target = gen_reg_rtx (tmode);
32824 else if (memory_operand (target, tmode))
32825 num_memory++;
32826
32827 gcc_assert (nargs <= 4);
32828
32829 for (i = 0; i < nargs; i++)
32830 {
32831 tree arg = CALL_EXPR_ARG (exp, i);
32832 rtx op = expand_normal (arg);
32833 int adjust = (comparison_p) ? 1 : 0;
32834 machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
32835
32836 if (last_arg_constant && i == nargs - 1)
32837 {
32838 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
32839 {
32840 enum insn_code new_icode = icode;
32841 switch (icode)
32842 {
32843 case CODE_FOR_xop_vpermil2v2df3:
32844 case CODE_FOR_xop_vpermil2v4sf3:
32845 case CODE_FOR_xop_vpermil2v4df3:
32846 case CODE_FOR_xop_vpermil2v8sf3:
32847 error ("the last argument must be a 2-bit immediate");
32848 return gen_reg_rtx (tmode);
32849 case CODE_FOR_xop_rotlv2di3:
32850 new_icode = CODE_FOR_rotlv2di3;
32851 goto xop_rotl;
32852 case CODE_FOR_xop_rotlv4si3:
32853 new_icode = CODE_FOR_rotlv4si3;
32854 goto xop_rotl;
32855 case CODE_FOR_xop_rotlv8hi3:
32856 new_icode = CODE_FOR_rotlv8hi3;
32857 goto xop_rotl;
32858 case CODE_FOR_xop_rotlv16qi3:
32859 new_icode = CODE_FOR_rotlv16qi3;
32860 xop_rotl:
32861 if (CONST_INT_P (op))
32862 {
32863 int mask = GET_MODE_UNIT_BITSIZE (tmode) - 1;
32864 op = GEN_INT (INTVAL (op) & mask);
32865 gcc_checking_assert
32866 (insn_data[icode].operand[i + 1].predicate (op, mode));
32867 }
32868 else
32869 {
32870 gcc_checking_assert
32871 (nargs == 2
32872 && insn_data[new_icode].operand[0].mode == tmode
32873 && insn_data[new_icode].operand[1].mode == tmode
32874 && insn_data[new_icode].operand[2].mode == mode
32875 && insn_data[new_icode].operand[0].predicate
32876 == insn_data[icode].operand[0].predicate
32877 && insn_data[new_icode].operand[1].predicate
32878 == insn_data[icode].operand[1].predicate);
32879 icode = new_icode;
32880 goto non_constant;
32881 }
32882 break;
32883 default:
32884 gcc_unreachable ();
32885 }
32886 }
32887 }
32888 else
32889 {
32890 non_constant:
32891 if (VECTOR_MODE_P (mode))
32892 op = safe_vector_operand (op, mode);
32893
32894 /* If we aren't optimizing, only allow one memory operand to be
32895 generated. */
32896 if (memory_operand (op, mode))
32897 num_memory++;
32898
32899 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
32900
32901 if (optimize
32902 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
32903 || num_memory > 1)
32904 op = force_reg (mode, op);
32905 }
32906
32907 args[i].op = op;
32908 args[i].mode = mode;
32909 }
32910
32911 switch (nargs)
32912 {
32913 case 1:
32914 pat = GEN_FCN (icode) (target, args[0].op);
32915 break;
32916
32917 case 2:
32918 if (tf_p)
32919 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
32920 GEN_INT ((int)sub_code));
32921 else if (! comparison_p)
32922 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
32923 else
32924 {
32925 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
32926 args[0].op,
32927 args[1].op);
32928
32929 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
32930 }
32931 break;
32932
32933 case 3:
32934 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
32935 break;
32936
32937 case 4:
32938 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
32939 break;
32940
32941 default:
32942 gcc_unreachable ();
32943 }
32944
32945 if (! pat)
32946 return 0;
32947
32948 emit_insn (pat);
32949 return target;
32950 }
32951
32952 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
32953 insns with vec_merge. */
32954
32955 static rtx
32956 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
32957 rtx target)
32958 {
32959 rtx pat;
32960 tree arg0 = CALL_EXPR_ARG (exp, 0);
32961 rtx op1, op0 = expand_normal (arg0);
32962 machine_mode tmode = insn_data[icode].operand[0].mode;
32963 machine_mode mode0 = insn_data[icode].operand[1].mode;
32964
32965 if (optimize || !target
32966 || GET_MODE (target) != tmode
32967 || !insn_data[icode].operand[0].predicate (target, tmode))
32968 target = gen_reg_rtx (tmode);
32969
32970 if (VECTOR_MODE_P (mode0))
32971 op0 = safe_vector_operand (op0, mode0);
32972
32973 if ((optimize && !register_operand (op0, mode0))
32974 || !insn_data[icode].operand[1].predicate (op0, mode0))
32975 op0 = copy_to_mode_reg (mode0, op0);
32976
32977 op1 = op0;
32978 if (!insn_data[icode].operand[2].predicate (op1, mode0))
32979 op1 = copy_to_mode_reg (mode0, op1);
32980
32981 pat = GEN_FCN (icode) (target, op0, op1);
32982 if (! pat)
32983 return 0;
32984 emit_insn (pat);
32985 return target;
32986 }
32987
32988 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
32989
32990 static rtx
32991 ix86_expand_sse_compare (const struct builtin_description *d,
32992 tree exp, rtx target, bool swap)
32993 {
32994 rtx pat;
32995 tree arg0 = CALL_EXPR_ARG (exp, 0);
32996 tree arg1 = CALL_EXPR_ARG (exp, 1);
32997 rtx op0 = expand_normal (arg0);
32998 rtx op1 = expand_normal (arg1);
32999 rtx op2;
33000 machine_mode tmode = insn_data[d->icode].operand[0].mode;
33001 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33002 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33003 enum rtx_code comparison = d->comparison;
33004
33005 if (VECTOR_MODE_P (mode0))
33006 op0 = safe_vector_operand (op0, mode0);
33007 if (VECTOR_MODE_P (mode1))
33008 op1 = safe_vector_operand (op1, mode1);
33009
33010 /* Swap operands if we have a comparison that isn't available in
33011 hardware. */
33012 if (swap)
33013 std::swap (op0, op1);
33014
33015 if (optimize || !target
33016 || GET_MODE (target) != tmode
33017 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33018 target = gen_reg_rtx (tmode);
33019
33020 if ((optimize && !register_operand (op0, mode0))
33021 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
33022 op0 = copy_to_mode_reg (mode0, op0);
33023 if ((optimize && !register_operand (op1, mode1))
33024 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
33025 op1 = copy_to_mode_reg (mode1, op1);
33026
33027 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
33028 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33029 if (! pat)
33030 return 0;
33031 emit_insn (pat);
33032 return target;
33033 }
33034
33035 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
33036
33037 static rtx
33038 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
33039 rtx target)
33040 {
33041 rtx pat;
33042 tree arg0 = CALL_EXPR_ARG (exp, 0);
33043 tree arg1 = CALL_EXPR_ARG (exp, 1);
33044 rtx op0 = expand_normal (arg0);
33045 rtx op1 = expand_normal (arg1);
33046 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33047 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33048 enum rtx_code comparison = d->comparison;
33049
33050 if (VECTOR_MODE_P (mode0))
33051 op0 = safe_vector_operand (op0, mode0);
33052 if (VECTOR_MODE_P (mode1))
33053 op1 = safe_vector_operand (op1, mode1);
33054
33055 /* Swap operands if we have a comparison that isn't available in
33056 hardware. */
33057 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
33058 std::swap (op0, op1);
33059
33060 target = gen_reg_rtx (SImode);
33061 emit_move_insn (target, const0_rtx);
33062 target = gen_rtx_SUBREG (QImode, target, 0);
33063
33064 if ((optimize && !register_operand (op0, mode0))
33065 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33066 op0 = copy_to_mode_reg (mode0, op0);
33067 if ((optimize && !register_operand (op1, mode1))
33068 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33069 op1 = copy_to_mode_reg (mode1, op1);
33070
33071 pat = GEN_FCN (d->icode) (op0, op1);
33072 if (! pat)
33073 return 0;
33074 emit_insn (pat);
33075 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33076 gen_rtx_fmt_ee (comparison, QImode,
33077 SET_DEST (pat),
33078 const0_rtx)));
33079
33080 return SUBREG_REG (target);
33081 }
33082
33083 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
33084
33085 static rtx
33086 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
33087 rtx target)
33088 {
33089 rtx pat;
33090 tree arg0 = CALL_EXPR_ARG (exp, 0);
33091 rtx op1, op0 = expand_normal (arg0);
33092 machine_mode tmode = insn_data[d->icode].operand[0].mode;
33093 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33094
33095 if (optimize || target == 0
33096 || GET_MODE (target) != tmode
33097 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33098 target = gen_reg_rtx (tmode);
33099
33100 if (VECTOR_MODE_P (mode0))
33101 op0 = safe_vector_operand (op0, mode0);
33102
33103 if ((optimize && !register_operand (op0, mode0))
33104 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33105 op0 = copy_to_mode_reg (mode0, op0);
33106
33107 op1 = GEN_INT (d->comparison);
33108
33109 pat = GEN_FCN (d->icode) (target, op0, op1);
33110 if (! pat)
33111 return 0;
33112 emit_insn (pat);
33113 return target;
33114 }
33115
33116 static rtx
33117 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
33118 tree exp, rtx target)
33119 {
33120 rtx pat;
33121 tree arg0 = CALL_EXPR_ARG (exp, 0);
33122 tree arg1 = CALL_EXPR_ARG (exp, 1);
33123 rtx op0 = expand_normal (arg0);
33124 rtx op1 = expand_normal (arg1);
33125 rtx op2;
33126 machine_mode tmode = insn_data[d->icode].operand[0].mode;
33127 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33128 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33129
33130 if (optimize || target == 0
33131 || GET_MODE (target) != tmode
33132 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33133 target = gen_reg_rtx (tmode);
33134
33135 op0 = safe_vector_operand (op0, mode0);
33136 op1 = safe_vector_operand (op1, mode1);
33137
33138 if ((optimize && !register_operand (op0, mode0))
33139 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33140 op0 = copy_to_mode_reg (mode0, op0);
33141 if ((optimize && !register_operand (op1, mode1))
33142 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33143 op1 = copy_to_mode_reg (mode1, op1);
33144
33145 op2 = GEN_INT (d->comparison);
33146
33147 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33148 if (! pat)
33149 return 0;
33150 emit_insn (pat);
33151 return target;
33152 }
33153
33154 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
33155
33156 static rtx
33157 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
33158 rtx target)
33159 {
33160 rtx pat;
33161 tree arg0 = CALL_EXPR_ARG (exp, 0);
33162 tree arg1 = CALL_EXPR_ARG (exp, 1);
33163 rtx op0 = expand_normal (arg0);
33164 rtx op1 = expand_normal (arg1);
33165 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33166 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33167 enum rtx_code comparison = d->comparison;
33168
33169 if (VECTOR_MODE_P (mode0))
33170 op0 = safe_vector_operand (op0, mode0);
33171 if (VECTOR_MODE_P (mode1))
33172 op1 = safe_vector_operand (op1, mode1);
33173
33174 target = gen_reg_rtx (SImode);
33175 emit_move_insn (target, const0_rtx);
33176 target = gen_rtx_SUBREG (QImode, target, 0);
33177
33178 if ((optimize && !register_operand (op0, mode0))
33179 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33180 op0 = copy_to_mode_reg (mode0, op0);
33181 if ((optimize && !register_operand (op1, mode1))
33182 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33183 op1 = copy_to_mode_reg (mode1, op1);
33184
33185 pat = GEN_FCN (d->icode) (op0, op1);
33186 if (! pat)
33187 return 0;
33188 emit_insn (pat);
33189 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33190 gen_rtx_fmt_ee (comparison, QImode,
33191 SET_DEST (pat),
33192 const0_rtx)));
33193
33194 return SUBREG_REG (target);
33195 }
33196
33197 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
33198
33199 static rtx
33200 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
33201 tree exp, rtx target)
33202 {
33203 rtx pat;
33204 tree arg0 = CALL_EXPR_ARG (exp, 0);
33205 tree arg1 = CALL_EXPR_ARG (exp, 1);
33206 tree arg2 = CALL_EXPR_ARG (exp, 2);
33207 tree arg3 = CALL_EXPR_ARG (exp, 3);
33208 tree arg4 = CALL_EXPR_ARG (exp, 4);
33209 rtx scratch0, scratch1;
33210 rtx op0 = expand_normal (arg0);
33211 rtx op1 = expand_normal (arg1);
33212 rtx op2 = expand_normal (arg2);
33213 rtx op3 = expand_normal (arg3);
33214 rtx op4 = expand_normal (arg4);
33215 machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
33216
33217 tmode0 = insn_data[d->icode].operand[0].mode;
33218 tmode1 = insn_data[d->icode].operand[1].mode;
33219 modev2 = insn_data[d->icode].operand[2].mode;
33220 modei3 = insn_data[d->icode].operand[3].mode;
33221 modev4 = insn_data[d->icode].operand[4].mode;
33222 modei5 = insn_data[d->icode].operand[5].mode;
33223 modeimm = insn_data[d->icode].operand[6].mode;
33224
33225 if (VECTOR_MODE_P (modev2))
33226 op0 = safe_vector_operand (op0, modev2);
33227 if (VECTOR_MODE_P (modev4))
33228 op2 = safe_vector_operand (op2, modev4);
33229
33230 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33231 op0 = copy_to_mode_reg (modev2, op0);
33232 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
33233 op1 = copy_to_mode_reg (modei3, op1);
33234 if ((optimize && !register_operand (op2, modev4))
33235 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
33236 op2 = copy_to_mode_reg (modev4, op2);
33237 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
33238 op3 = copy_to_mode_reg (modei5, op3);
33239
33240 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
33241 {
33242 error ("the fifth argument must be an 8-bit immediate");
33243 return const0_rtx;
33244 }
33245
33246 if (d->code == IX86_BUILTIN_PCMPESTRI128)
33247 {
33248 if (optimize || !target
33249 || GET_MODE (target) != tmode0
33250 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33251 target = gen_reg_rtx (tmode0);
33252
33253 scratch1 = gen_reg_rtx (tmode1);
33254
33255 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
33256 }
33257 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
33258 {
33259 if (optimize || !target
33260 || GET_MODE (target) != tmode1
33261 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33262 target = gen_reg_rtx (tmode1);
33263
33264 scratch0 = gen_reg_rtx (tmode0);
33265
33266 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
33267 }
33268 else
33269 {
33270 gcc_assert (d->flag);
33271
33272 scratch0 = gen_reg_rtx (tmode0);
33273 scratch1 = gen_reg_rtx (tmode1);
33274
33275 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
33276 }
33277
33278 if (! pat)
33279 return 0;
33280
33281 emit_insn (pat);
33282
33283 if (d->flag)
33284 {
33285 target = gen_reg_rtx (SImode);
33286 emit_move_insn (target, const0_rtx);
33287 target = gen_rtx_SUBREG (QImode, target, 0);
33288
33289 emit_insn
33290 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33291 gen_rtx_fmt_ee (EQ, QImode,
33292 gen_rtx_REG ((machine_mode) d->flag,
33293 FLAGS_REG),
33294 const0_rtx)));
33295 return SUBREG_REG (target);
33296 }
33297 else
33298 return target;
33299 }
33300
33301
33302 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
33303
33304 static rtx
33305 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
33306 tree exp, rtx target)
33307 {
33308 rtx pat;
33309 tree arg0 = CALL_EXPR_ARG (exp, 0);
33310 tree arg1 = CALL_EXPR_ARG (exp, 1);
33311 tree arg2 = CALL_EXPR_ARG (exp, 2);
33312 rtx scratch0, scratch1;
33313 rtx op0 = expand_normal (arg0);
33314 rtx op1 = expand_normal (arg1);
33315 rtx op2 = expand_normal (arg2);
33316 machine_mode tmode0, tmode1, modev2, modev3, modeimm;
33317
33318 tmode0 = insn_data[d->icode].operand[0].mode;
33319 tmode1 = insn_data[d->icode].operand[1].mode;
33320 modev2 = insn_data[d->icode].operand[2].mode;
33321 modev3 = insn_data[d->icode].operand[3].mode;
33322 modeimm = insn_data[d->icode].operand[4].mode;
33323
33324 if (VECTOR_MODE_P (modev2))
33325 op0 = safe_vector_operand (op0, modev2);
33326 if (VECTOR_MODE_P (modev3))
33327 op1 = safe_vector_operand (op1, modev3);
33328
33329 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33330 op0 = copy_to_mode_reg (modev2, op0);
33331 if ((optimize && !register_operand (op1, modev3))
33332 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
33333 op1 = copy_to_mode_reg (modev3, op1);
33334
33335 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
33336 {
33337 error ("the third argument must be an 8-bit immediate");
33338 return const0_rtx;
33339 }
33340
33341 if (d->code == IX86_BUILTIN_PCMPISTRI128)
33342 {
33343 if (optimize || !target
33344 || GET_MODE (target) != tmode0
33345 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33346 target = gen_reg_rtx (tmode0);
33347
33348 scratch1 = gen_reg_rtx (tmode1);
33349
33350 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
33351 }
33352 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
33353 {
33354 if (optimize || !target
33355 || GET_MODE (target) != tmode1
33356 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33357 target = gen_reg_rtx (tmode1);
33358
33359 scratch0 = gen_reg_rtx (tmode0);
33360
33361 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
33362 }
33363 else
33364 {
33365 gcc_assert (d->flag);
33366
33367 scratch0 = gen_reg_rtx (tmode0);
33368 scratch1 = gen_reg_rtx (tmode1);
33369
33370 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
33371 }
33372
33373 if (! pat)
33374 return 0;
33375
33376 emit_insn (pat);
33377
33378 if (d->flag)
33379 {
33380 target = gen_reg_rtx (SImode);
33381 emit_move_insn (target, const0_rtx);
33382 target = gen_rtx_SUBREG (QImode, target, 0);
33383
33384 emit_insn
33385 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33386 gen_rtx_fmt_ee (EQ, QImode,
33387 gen_rtx_REG ((machine_mode) d->flag,
33388 FLAGS_REG),
33389 const0_rtx)));
33390 return SUBREG_REG (target);
33391 }
33392 else
33393 return target;
33394 }
33395
33396 /* Subroutine of ix86_expand_builtin to take care of insns with
33397 variable number of operands. */
33398
33399 static rtx
33400 ix86_expand_args_builtin (const struct builtin_description *d,
33401 tree exp, rtx target)
33402 {
33403 rtx pat, real_target;
33404 unsigned int i, nargs;
33405 unsigned int nargs_constant = 0;
33406 unsigned int mask_pos = 0;
33407 int num_memory = 0;
33408 struct
33409 {
33410 rtx op;
33411 machine_mode mode;
33412 } args[6];
33413 bool second_arg_count = false;
33414 enum insn_code icode = d->icode;
33415 const struct insn_data_d *insn_p = &insn_data[icode];
33416 machine_mode tmode = insn_p->operand[0].mode;
33417 machine_mode rmode = VOIDmode;
33418 bool swap = false;
33419 enum rtx_code comparison = d->comparison;
33420
33421 switch ((enum ix86_builtin_func_type) d->flag)
33422 {
33423 case V2DF_FTYPE_V2DF_ROUND:
33424 case V4DF_FTYPE_V4DF_ROUND:
33425 case V8DF_FTYPE_V8DF_ROUND:
33426 case V4SF_FTYPE_V4SF_ROUND:
33427 case V8SF_FTYPE_V8SF_ROUND:
33428 case V16SF_FTYPE_V16SF_ROUND:
33429 case V4SI_FTYPE_V4SF_ROUND:
33430 case V8SI_FTYPE_V8SF_ROUND:
33431 case V16SI_FTYPE_V16SF_ROUND:
33432 return ix86_expand_sse_round (d, exp, target);
33433 case V4SI_FTYPE_V2DF_V2DF_ROUND:
33434 case V8SI_FTYPE_V4DF_V4DF_ROUND:
33435 case V16SI_FTYPE_V8DF_V8DF_ROUND:
33436 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
33437 case INT_FTYPE_V8SF_V8SF_PTEST:
33438 case INT_FTYPE_V4DI_V4DI_PTEST:
33439 case INT_FTYPE_V4DF_V4DF_PTEST:
33440 case INT_FTYPE_V4SF_V4SF_PTEST:
33441 case INT_FTYPE_V2DI_V2DI_PTEST:
33442 case INT_FTYPE_V2DF_V2DF_PTEST:
33443 return ix86_expand_sse_ptest (d, exp, target);
33444 case FLOAT128_FTYPE_FLOAT128:
33445 case FLOAT_FTYPE_FLOAT:
33446 case INT_FTYPE_INT:
33447 case UINT_FTYPE_UINT:
33448 case UINT16_FTYPE_UINT16:
33449 case UINT64_FTYPE_INT:
33450 case UINT64_FTYPE_UINT64:
33451 case INT64_FTYPE_INT64:
33452 case INT64_FTYPE_V4SF:
33453 case INT64_FTYPE_V2DF:
33454 case INT_FTYPE_V16QI:
33455 case INT_FTYPE_V8QI:
33456 case INT_FTYPE_V8SF:
33457 case INT_FTYPE_V4DF:
33458 case INT_FTYPE_V4SF:
33459 case INT_FTYPE_V2DF:
33460 case INT_FTYPE_V32QI:
33461 case V16QI_FTYPE_V16QI:
33462 case V8SI_FTYPE_V8SF:
33463 case V8SI_FTYPE_V4SI:
33464 case V8HI_FTYPE_V8HI:
33465 case V8HI_FTYPE_V16QI:
33466 case V8QI_FTYPE_V8QI:
33467 case V8SF_FTYPE_V8SF:
33468 case V8SF_FTYPE_V8SI:
33469 case V8SF_FTYPE_V4SF:
33470 case V8SF_FTYPE_V8HI:
33471 case V4SI_FTYPE_V4SI:
33472 case V4SI_FTYPE_V16QI:
33473 case V4SI_FTYPE_V4SF:
33474 case V4SI_FTYPE_V8SI:
33475 case V4SI_FTYPE_V8HI:
33476 case V4SI_FTYPE_V4DF:
33477 case V4SI_FTYPE_V2DF:
33478 case V4HI_FTYPE_V4HI:
33479 case V4DF_FTYPE_V4DF:
33480 case V4DF_FTYPE_V4SI:
33481 case V4DF_FTYPE_V4SF:
33482 case V4DF_FTYPE_V2DF:
33483 case V4SF_FTYPE_V4SF:
33484 case V4SF_FTYPE_V4SI:
33485 case V4SF_FTYPE_V8SF:
33486 case V4SF_FTYPE_V4DF:
33487 case V4SF_FTYPE_V8HI:
33488 case V4SF_FTYPE_V2DF:
33489 case V2DI_FTYPE_V2DI:
33490 case V2DI_FTYPE_V16QI:
33491 case V2DI_FTYPE_V8HI:
33492 case V2DI_FTYPE_V4SI:
33493 case V2DF_FTYPE_V2DF:
33494 case V2DF_FTYPE_V4SI:
33495 case V2DF_FTYPE_V4DF:
33496 case V2DF_FTYPE_V4SF:
33497 case V2DF_FTYPE_V2SI:
33498 case V2SI_FTYPE_V2SI:
33499 case V2SI_FTYPE_V4SF:
33500 case V2SI_FTYPE_V2SF:
33501 case V2SI_FTYPE_V2DF:
33502 case V2SF_FTYPE_V2SF:
33503 case V2SF_FTYPE_V2SI:
33504 case V32QI_FTYPE_V32QI:
33505 case V32QI_FTYPE_V16QI:
33506 case V16HI_FTYPE_V16HI:
33507 case V16HI_FTYPE_V8HI:
33508 case V8SI_FTYPE_V8SI:
33509 case V16HI_FTYPE_V16QI:
33510 case V8SI_FTYPE_V16QI:
33511 case V4DI_FTYPE_V16QI:
33512 case V8SI_FTYPE_V8HI:
33513 case V4DI_FTYPE_V8HI:
33514 case V4DI_FTYPE_V4SI:
33515 case V4DI_FTYPE_V2DI:
33516 case UQI_FTYPE_UQI:
33517 case UHI_FTYPE_UHI:
33518 case USI_FTYPE_USI:
33519 case USI_FTYPE_UQI:
33520 case USI_FTYPE_UHI:
33521 case UDI_FTYPE_UDI:
33522 case UHI_FTYPE_V16QI:
33523 case USI_FTYPE_V32QI:
33524 case UDI_FTYPE_V64QI:
33525 case V16QI_FTYPE_UHI:
33526 case V32QI_FTYPE_USI:
33527 case V64QI_FTYPE_UDI:
33528 case V8HI_FTYPE_UQI:
33529 case V16HI_FTYPE_UHI:
33530 case V32HI_FTYPE_USI:
33531 case V4SI_FTYPE_UQI:
33532 case V8SI_FTYPE_UQI:
33533 case V4SI_FTYPE_UHI:
33534 case V8SI_FTYPE_UHI:
33535 case UQI_FTYPE_V8HI:
33536 case UHI_FTYPE_V16HI:
33537 case USI_FTYPE_V32HI:
33538 case UQI_FTYPE_V4SI:
33539 case UQI_FTYPE_V8SI:
33540 case UHI_FTYPE_V16SI:
33541 case UQI_FTYPE_V2DI:
33542 case UQI_FTYPE_V4DI:
33543 case UQI_FTYPE_V8DI:
33544 case V16SI_FTYPE_UHI:
33545 case V2DI_FTYPE_UQI:
33546 case V4DI_FTYPE_UQI:
33547 case V16SI_FTYPE_INT:
33548 case V16SF_FTYPE_V8SF:
33549 case V16SI_FTYPE_V8SI:
33550 case V16SF_FTYPE_V4SF:
33551 case V16SI_FTYPE_V4SI:
33552 case V16SI_FTYPE_V16SF:
33553 case V16SI_FTYPE_V16SI:
33554 case V16SF_FTYPE_V16SF:
33555 case V8DI_FTYPE_UQI:
33556 case V8DI_FTYPE_V8DI:
33557 case V8DF_FTYPE_V4DF:
33558 case V8DF_FTYPE_V2DF:
33559 case V8DF_FTYPE_V8DF:
33560 nargs = 1;
33561 break;
33562 case V4SF_FTYPE_V4SF_VEC_MERGE:
33563 case V2DF_FTYPE_V2DF_VEC_MERGE:
33564 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
33565 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
33566 case V16QI_FTYPE_V16QI_V16QI:
33567 case V16QI_FTYPE_V8HI_V8HI:
33568 case V16SF_FTYPE_V16SF_V16SF:
33569 case V8QI_FTYPE_V8QI_V8QI:
33570 case V8QI_FTYPE_V4HI_V4HI:
33571 case V8HI_FTYPE_V8HI_V8HI:
33572 case V8HI_FTYPE_V16QI_V16QI:
33573 case V8HI_FTYPE_V4SI_V4SI:
33574 case V8SF_FTYPE_V8SF_V8SF:
33575 case V8SF_FTYPE_V8SF_V8SI:
33576 case V8DF_FTYPE_V8DF_V8DF:
33577 case V4SI_FTYPE_V4SI_V4SI:
33578 case V4SI_FTYPE_V8HI_V8HI:
33579 case V4SI_FTYPE_V2DF_V2DF:
33580 case V4HI_FTYPE_V4HI_V4HI:
33581 case V4HI_FTYPE_V8QI_V8QI:
33582 case V4HI_FTYPE_V2SI_V2SI:
33583 case V4DF_FTYPE_V4DF_V4DF:
33584 case V4DF_FTYPE_V4DF_V4DI:
33585 case V4SF_FTYPE_V4SF_V4SF:
33586 case V4SF_FTYPE_V4SF_V4SI:
33587 case V4SF_FTYPE_V4SF_V2SI:
33588 case V4SF_FTYPE_V4SF_V2DF:
33589 case V4SF_FTYPE_V4SF_UINT:
33590 case V4SF_FTYPE_V4SF_DI:
33591 case V4SF_FTYPE_V4SF_SI:
33592 case V2DI_FTYPE_V2DI_V2DI:
33593 case V2DI_FTYPE_V16QI_V16QI:
33594 case V2DI_FTYPE_V4SI_V4SI:
33595 case V2DI_FTYPE_V2DI_V16QI:
33596 case V2SI_FTYPE_V2SI_V2SI:
33597 case V2SI_FTYPE_V4HI_V4HI:
33598 case V2SI_FTYPE_V2SF_V2SF:
33599 case V2DF_FTYPE_V2DF_V2DF:
33600 case V2DF_FTYPE_V2DF_V4SF:
33601 case V2DF_FTYPE_V2DF_V2DI:
33602 case V2DF_FTYPE_V2DF_DI:
33603 case V2DF_FTYPE_V2DF_SI:
33604 case V2DF_FTYPE_V2DF_UINT:
33605 case V2SF_FTYPE_V2SF_V2SF:
33606 case V1DI_FTYPE_V1DI_V1DI:
33607 case V1DI_FTYPE_V8QI_V8QI:
33608 case V1DI_FTYPE_V2SI_V2SI:
33609 case V32QI_FTYPE_V16HI_V16HI:
33610 case V16HI_FTYPE_V8SI_V8SI:
33611 case V64QI_FTYPE_V64QI_V64QI:
33612 case V32QI_FTYPE_V32QI_V32QI:
33613 case V16HI_FTYPE_V32QI_V32QI:
33614 case V16HI_FTYPE_V16HI_V16HI:
33615 case V8SI_FTYPE_V4DF_V4DF:
33616 case V8SI_FTYPE_V8SI_V8SI:
33617 case V8SI_FTYPE_V16HI_V16HI:
33618 case V4DI_FTYPE_V4DI_V4DI:
33619 case V4DI_FTYPE_V8SI_V8SI:
33620 case V8DI_FTYPE_V64QI_V64QI:
33621 if (comparison == UNKNOWN)
33622 return ix86_expand_binop_builtin (icode, exp, target);
33623 nargs = 2;
33624 break;
33625 case V4SF_FTYPE_V4SF_V4SF_SWAP:
33626 case V2DF_FTYPE_V2DF_V2DF_SWAP:
33627 gcc_assert (comparison != UNKNOWN);
33628 nargs = 2;
33629 swap = true;
33630 break;
33631 case V16HI_FTYPE_V16HI_V8HI_COUNT:
33632 case V16HI_FTYPE_V16HI_SI_COUNT:
33633 case V8SI_FTYPE_V8SI_V4SI_COUNT:
33634 case V8SI_FTYPE_V8SI_SI_COUNT:
33635 case V4DI_FTYPE_V4DI_V2DI_COUNT:
33636 case V4DI_FTYPE_V4DI_INT_COUNT:
33637 case V8HI_FTYPE_V8HI_V8HI_COUNT:
33638 case V8HI_FTYPE_V8HI_SI_COUNT:
33639 case V4SI_FTYPE_V4SI_V4SI_COUNT:
33640 case V4SI_FTYPE_V4SI_SI_COUNT:
33641 case V4HI_FTYPE_V4HI_V4HI_COUNT:
33642 case V4HI_FTYPE_V4HI_SI_COUNT:
33643 case V2DI_FTYPE_V2DI_V2DI_COUNT:
33644 case V2DI_FTYPE_V2DI_SI_COUNT:
33645 case V2SI_FTYPE_V2SI_V2SI_COUNT:
33646 case V2SI_FTYPE_V2SI_SI_COUNT:
33647 case V1DI_FTYPE_V1DI_V1DI_COUNT:
33648 case V1DI_FTYPE_V1DI_SI_COUNT:
33649 nargs = 2;
33650 second_arg_count = true;
33651 break;
33652 case V16HI_FTYPE_V16HI_INT_V16HI_UHI_COUNT:
33653 case V16HI_FTYPE_V16HI_V8HI_V16HI_UHI_COUNT:
33654 case V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT:
33655 case V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT:
33656 case V2DI_FTYPE_V2DI_INT_V2DI_UQI_COUNT:
33657 case V2DI_FTYPE_V2DI_V2DI_V2DI_UQI_COUNT:
33658 case V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT:
33659 case V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT:
33660 case V4DI_FTYPE_V4DI_INT_V4DI_UQI_COUNT:
33661 case V4DI_FTYPE_V4DI_V2DI_V4DI_UQI_COUNT:
33662 case V4SI_FTYPE_V4SI_INT_V4SI_UQI_COUNT:
33663 case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI_COUNT:
33664 case V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT:
33665 case V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT:
33666 case V8HI_FTYPE_V8HI_INT_V8HI_UQI_COUNT:
33667 case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI_COUNT:
33668 case V8SI_FTYPE_V8SI_INT_V8SI_UQI_COUNT:
33669 case V8SI_FTYPE_V8SI_V4SI_V8SI_UQI_COUNT:
33670 nargs = 4;
33671 second_arg_count = true;
33672 break;
33673 case UINT64_FTYPE_UINT64_UINT64:
33674 case UINT_FTYPE_UINT_UINT:
33675 case UINT_FTYPE_UINT_USHORT:
33676 case UINT_FTYPE_UINT_UCHAR:
33677 case UINT16_FTYPE_UINT16_INT:
33678 case UINT8_FTYPE_UINT8_INT:
33679 case UQI_FTYPE_UQI_UQI:
33680 case UHI_FTYPE_UHI_UHI:
33681 case USI_FTYPE_USI_USI:
33682 case UDI_FTYPE_UDI_UDI:
33683 case V16SI_FTYPE_V8DF_V8DF:
33684 nargs = 2;
33685 break;
33686 case V2DI_FTYPE_V2DI_INT_CONVERT:
33687 nargs = 2;
33688 rmode = V1TImode;
33689 nargs_constant = 1;
33690 break;
33691 case V4DI_FTYPE_V4DI_INT_CONVERT:
33692 nargs = 2;
33693 rmode = V2TImode;
33694 nargs_constant = 1;
33695 break;
33696 case V8DI_FTYPE_V8DI_INT_CONVERT:
33697 nargs = 2;
33698 rmode = V4TImode;
33699 nargs_constant = 1;
33700 break;
33701 case V8HI_FTYPE_V8HI_INT:
33702 case V8HI_FTYPE_V8SF_INT:
33703 case V16HI_FTYPE_V16SF_INT:
33704 case V8HI_FTYPE_V4SF_INT:
33705 case V8SF_FTYPE_V8SF_INT:
33706 case V4SF_FTYPE_V16SF_INT:
33707 case V16SF_FTYPE_V16SF_INT:
33708 case V4SI_FTYPE_V4SI_INT:
33709 case V4SI_FTYPE_V8SI_INT:
33710 case V4HI_FTYPE_V4HI_INT:
33711 case V4DF_FTYPE_V4DF_INT:
33712 case V4DF_FTYPE_V8DF_INT:
33713 case V4SF_FTYPE_V4SF_INT:
33714 case V4SF_FTYPE_V8SF_INT:
33715 case V2DI_FTYPE_V2DI_INT:
33716 case V2DF_FTYPE_V2DF_INT:
33717 case V2DF_FTYPE_V4DF_INT:
33718 case V16HI_FTYPE_V16HI_INT:
33719 case V8SI_FTYPE_V8SI_INT:
33720 case V16SI_FTYPE_V16SI_INT:
33721 case V4SI_FTYPE_V16SI_INT:
33722 case V4DI_FTYPE_V4DI_INT:
33723 case V2DI_FTYPE_V4DI_INT:
33724 case V4DI_FTYPE_V8DI_INT:
33725 case QI_FTYPE_V4SF_INT:
33726 case QI_FTYPE_V2DF_INT:
33727 case UQI_FTYPE_UQI_UQI_CONST:
33728 case UHI_FTYPE_UHI_UQI:
33729 case USI_FTYPE_USI_UQI:
33730 case UDI_FTYPE_UDI_UQI:
33731 nargs = 2;
33732 nargs_constant = 1;
33733 break;
33734 case V16QI_FTYPE_V16QI_V16QI_V16QI:
33735 case V8SF_FTYPE_V8SF_V8SF_V8SF:
33736 case V4DF_FTYPE_V4DF_V4DF_V4DF:
33737 case V4SF_FTYPE_V4SF_V4SF_V4SF:
33738 case V2DF_FTYPE_V2DF_V2DF_V2DF:
33739 case V32QI_FTYPE_V32QI_V32QI_V32QI:
33740 case UHI_FTYPE_V16SI_V16SI_UHI:
33741 case UQI_FTYPE_V8DI_V8DI_UQI:
33742 case V16HI_FTYPE_V16SI_V16HI_UHI:
33743 case V16QI_FTYPE_V16SI_V16QI_UHI:
33744 case V16QI_FTYPE_V8DI_V16QI_UQI:
33745 case V16SF_FTYPE_V16SF_V16SF_UHI:
33746 case V16SF_FTYPE_V4SF_V16SF_UHI:
33747 case V16SI_FTYPE_SI_V16SI_UHI:
33748 case V16SI_FTYPE_V16HI_V16SI_UHI:
33749 case V16SI_FTYPE_V16QI_V16SI_UHI:
33750 case V8SF_FTYPE_V4SF_V8SF_UQI:
33751 case V4DF_FTYPE_V2DF_V4DF_UQI:
33752 case V8SI_FTYPE_V4SI_V8SI_UQI:
33753 case V8SI_FTYPE_SI_V8SI_UQI:
33754 case V4SI_FTYPE_V4SI_V4SI_UQI:
33755 case V4SI_FTYPE_SI_V4SI_UQI:
33756 case V4DI_FTYPE_V2DI_V4DI_UQI:
33757 case V4DI_FTYPE_DI_V4DI_UQI:
33758 case V2DI_FTYPE_V2DI_V2DI_UQI:
33759 case V2DI_FTYPE_DI_V2DI_UQI:
33760 case V64QI_FTYPE_V64QI_V64QI_UDI:
33761 case V64QI_FTYPE_V16QI_V64QI_UDI:
33762 case V64QI_FTYPE_QI_V64QI_UDI:
33763 case V32QI_FTYPE_V32QI_V32QI_USI:
33764 case V32QI_FTYPE_V16QI_V32QI_USI:
33765 case V32QI_FTYPE_QI_V32QI_USI:
33766 case V16QI_FTYPE_V16QI_V16QI_UHI:
33767 case V16QI_FTYPE_QI_V16QI_UHI:
33768 case V32HI_FTYPE_V8HI_V32HI_USI:
33769 case V32HI_FTYPE_HI_V32HI_USI:
33770 case V16HI_FTYPE_V8HI_V16HI_UHI:
33771 case V16HI_FTYPE_HI_V16HI_UHI:
33772 case V8HI_FTYPE_V8HI_V8HI_UQI:
33773 case V8HI_FTYPE_HI_V8HI_UQI:
33774 case V8SF_FTYPE_V8HI_V8SF_UQI:
33775 case V4SF_FTYPE_V8HI_V4SF_UQI:
33776 case V8SI_FTYPE_V8SF_V8SI_UQI:
33777 case V4SI_FTYPE_V4SF_V4SI_UQI:
33778 case V4DI_FTYPE_V4SF_V4DI_UQI:
33779 case V2DI_FTYPE_V4SF_V2DI_UQI:
33780 case V4SF_FTYPE_V4DI_V4SF_UQI:
33781 case V4SF_FTYPE_V2DI_V4SF_UQI:
33782 case V4DF_FTYPE_V4DI_V4DF_UQI:
33783 case V2DF_FTYPE_V2DI_V2DF_UQI:
33784 case V16QI_FTYPE_V8HI_V16QI_UQI:
33785 case V16QI_FTYPE_V16HI_V16QI_UHI:
33786 case V16QI_FTYPE_V4SI_V16QI_UQI:
33787 case V16QI_FTYPE_V8SI_V16QI_UQI:
33788 case V8HI_FTYPE_V4SI_V8HI_UQI:
33789 case V8HI_FTYPE_V8SI_V8HI_UQI:
33790 case V16QI_FTYPE_V2DI_V16QI_UQI:
33791 case V16QI_FTYPE_V4DI_V16QI_UQI:
33792 case V8HI_FTYPE_V2DI_V8HI_UQI:
33793 case V8HI_FTYPE_V4DI_V8HI_UQI:
33794 case V4SI_FTYPE_V2DI_V4SI_UQI:
33795 case V4SI_FTYPE_V4DI_V4SI_UQI:
33796 case V32QI_FTYPE_V32HI_V32QI_USI:
33797 case UHI_FTYPE_V16QI_V16QI_UHI:
33798 case USI_FTYPE_V32QI_V32QI_USI:
33799 case UDI_FTYPE_V64QI_V64QI_UDI:
33800 case UQI_FTYPE_V8HI_V8HI_UQI:
33801 case UHI_FTYPE_V16HI_V16HI_UHI:
33802 case USI_FTYPE_V32HI_V32HI_USI:
33803 case UQI_FTYPE_V4SI_V4SI_UQI:
33804 case UQI_FTYPE_V8SI_V8SI_UQI:
33805 case UQI_FTYPE_V2DI_V2DI_UQI:
33806 case UQI_FTYPE_V4DI_V4DI_UQI:
33807 case V4SF_FTYPE_V2DF_V4SF_UQI:
33808 case V4SF_FTYPE_V4DF_V4SF_UQI:
33809 case V16SI_FTYPE_V16SI_V16SI_UHI:
33810 case V16SI_FTYPE_V4SI_V16SI_UHI:
33811 case V2DI_FTYPE_V4SI_V2DI_UQI:
33812 case V2DI_FTYPE_V8HI_V2DI_UQI:
33813 case V2DI_FTYPE_V16QI_V2DI_UQI:
33814 case V4DI_FTYPE_V4DI_V4DI_UQI:
33815 case V4DI_FTYPE_V4SI_V4DI_UQI:
33816 case V4DI_FTYPE_V8HI_V4DI_UQI:
33817 case V4DI_FTYPE_V16QI_V4DI_UQI:
33818 case V4DI_FTYPE_V4DF_V4DI_UQI:
33819 case V2DI_FTYPE_V2DF_V2DI_UQI:
33820 case V4SI_FTYPE_V4DF_V4SI_UQI:
33821 case V4SI_FTYPE_V2DF_V4SI_UQI:
33822 case V4SI_FTYPE_V8HI_V4SI_UQI:
33823 case V4SI_FTYPE_V16QI_V4SI_UQI:
33824 case V4DI_FTYPE_V4DI_V4DI_V4DI:
33825 case V8DF_FTYPE_V2DF_V8DF_UQI:
33826 case V8DF_FTYPE_V4DF_V8DF_UQI:
33827 case V8DF_FTYPE_V8DF_V8DF_UQI:
33828 case V8SF_FTYPE_V8SF_V8SF_UQI:
33829 case V8SF_FTYPE_V8SI_V8SF_UQI:
33830 case V4DF_FTYPE_V4DF_V4DF_UQI:
33831 case V4SF_FTYPE_V4SF_V4SF_UQI:
33832 case V2DF_FTYPE_V2DF_V2DF_UQI:
33833 case V2DF_FTYPE_V4SF_V2DF_UQI:
33834 case V2DF_FTYPE_V4SI_V2DF_UQI:
33835 case V4SF_FTYPE_V4SI_V4SF_UQI:
33836 case V4DF_FTYPE_V4SF_V4DF_UQI:
33837 case V4DF_FTYPE_V4SI_V4DF_UQI:
33838 case V8SI_FTYPE_V8SI_V8SI_UQI:
33839 case V8SI_FTYPE_V8HI_V8SI_UQI:
33840 case V8SI_FTYPE_V16QI_V8SI_UQI:
33841 case V8DF_FTYPE_V8SI_V8DF_UQI:
33842 case V8DI_FTYPE_DI_V8DI_UQI:
33843 case V16SF_FTYPE_V8SF_V16SF_UHI:
33844 case V16SI_FTYPE_V8SI_V16SI_UHI:
33845 case V16HI_FTYPE_V16HI_V16HI_UHI:
33846 case V8HI_FTYPE_V16QI_V8HI_UQI:
33847 case V16HI_FTYPE_V16QI_V16HI_UHI:
33848 case V32HI_FTYPE_V32HI_V32HI_USI:
33849 case V32HI_FTYPE_V32QI_V32HI_USI:
33850 case V8DI_FTYPE_V16QI_V8DI_UQI:
33851 case V8DI_FTYPE_V2DI_V8DI_UQI:
33852 case V8DI_FTYPE_V4DI_V8DI_UQI:
33853 case V8DI_FTYPE_V8DI_V8DI_UQI:
33854 case V8DI_FTYPE_V8HI_V8DI_UQI:
33855 case V8DI_FTYPE_V8SI_V8DI_UQI:
33856 case V8HI_FTYPE_V8DI_V8HI_UQI:
33857 case V8SI_FTYPE_V8DI_V8SI_UQI:
33858 case V4SI_FTYPE_V4SI_V4SI_V4SI:
33859 case V16SI_FTYPE_V16SI_V16SI_V16SI:
33860 case V8DI_FTYPE_V8DI_V8DI_V8DI:
33861 case V32HI_FTYPE_V32HI_V32HI_V32HI:
33862 case V2DI_FTYPE_V2DI_V2DI_V2DI:
33863 case V16HI_FTYPE_V16HI_V16HI_V16HI:
33864 case V8SI_FTYPE_V8SI_V8SI_V8SI:
33865 case V8HI_FTYPE_V8HI_V8HI_V8HI:
33866 nargs = 3;
33867 break;
33868 case V32QI_FTYPE_V32QI_V32QI_INT:
33869 case V16HI_FTYPE_V16HI_V16HI_INT:
33870 case V16QI_FTYPE_V16QI_V16QI_INT:
33871 case V4DI_FTYPE_V4DI_V4DI_INT:
33872 case V8HI_FTYPE_V8HI_V8HI_INT:
33873 case V8SI_FTYPE_V8SI_V8SI_INT:
33874 case V8SI_FTYPE_V8SI_V4SI_INT:
33875 case V8SF_FTYPE_V8SF_V8SF_INT:
33876 case V8SF_FTYPE_V8SF_V4SF_INT:
33877 case V4SI_FTYPE_V4SI_V4SI_INT:
33878 case V4DF_FTYPE_V4DF_V4DF_INT:
33879 case V16SF_FTYPE_V16SF_V16SF_INT:
33880 case V16SF_FTYPE_V16SF_V4SF_INT:
33881 case V16SI_FTYPE_V16SI_V4SI_INT:
33882 case V4DF_FTYPE_V4DF_V2DF_INT:
33883 case V4SF_FTYPE_V4SF_V4SF_INT:
33884 case V2DI_FTYPE_V2DI_V2DI_INT:
33885 case V4DI_FTYPE_V4DI_V2DI_INT:
33886 case V2DF_FTYPE_V2DF_V2DF_INT:
33887 case UQI_FTYPE_V8DI_V8UDI_INT:
33888 case UQI_FTYPE_V8DF_V8DF_INT:
33889 case UQI_FTYPE_V2DF_V2DF_INT:
33890 case UQI_FTYPE_V4SF_V4SF_INT:
33891 case UHI_FTYPE_V16SI_V16SI_INT:
33892 case UHI_FTYPE_V16SF_V16SF_INT:
33893 case V64QI_FTYPE_V64QI_V64QI_INT:
33894 case V32HI_FTYPE_V32HI_V32HI_INT:
33895 case V16SI_FTYPE_V16SI_V16SI_INT:
33896 case V8DI_FTYPE_V8DI_V8DI_INT:
33897 nargs = 3;
33898 nargs_constant = 1;
33899 break;
33900 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
33901 nargs = 3;
33902 rmode = V4DImode;
33903 nargs_constant = 1;
33904 break;
33905 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
33906 nargs = 3;
33907 rmode = V2DImode;
33908 nargs_constant = 1;
33909 break;
33910 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
33911 nargs = 3;
33912 rmode = DImode;
33913 nargs_constant = 1;
33914 break;
33915 case V2DI_FTYPE_V2DI_UINT_UINT:
33916 nargs = 3;
33917 nargs_constant = 2;
33918 break;
33919 case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
33920 nargs = 3;
33921 rmode = V8DImode;
33922 nargs_constant = 1;
33923 break;
33924 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT:
33925 nargs = 5;
33926 rmode = V8DImode;
33927 mask_pos = 2;
33928 nargs_constant = 1;
33929 break;
33930 case QI_FTYPE_V8DF_INT_UQI:
33931 case QI_FTYPE_V4DF_INT_UQI:
33932 case QI_FTYPE_V2DF_INT_UQI:
33933 case HI_FTYPE_V16SF_INT_UHI:
33934 case QI_FTYPE_V8SF_INT_UQI:
33935 case QI_FTYPE_V4SF_INT_UQI:
33936 nargs = 3;
33937 mask_pos = 1;
33938 nargs_constant = 1;
33939 break;
33940 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_USI_CONVERT:
33941 nargs = 5;
33942 rmode = V4DImode;
33943 mask_pos = 2;
33944 nargs_constant = 1;
33945 break;
33946 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UHI_CONVERT:
33947 nargs = 5;
33948 rmode = V2DImode;
33949 mask_pos = 2;
33950 nargs_constant = 1;
33951 break;
33952 case V32QI_FTYPE_V32QI_V32QI_V32QI_USI:
33953 case V32HI_FTYPE_V32HI_V32HI_V32HI_USI:
33954 case V32HI_FTYPE_V64QI_V64QI_V32HI_USI:
33955 case V16SI_FTYPE_V32HI_V32HI_V16SI_UHI:
33956 case V64QI_FTYPE_V64QI_V64QI_V64QI_UDI:
33957 case V32HI_FTYPE_V32HI_V8HI_V32HI_USI:
33958 case V16HI_FTYPE_V16HI_V8HI_V16HI_UHI:
33959 case V8SI_FTYPE_V8SI_V4SI_V8SI_UQI:
33960 case V4DI_FTYPE_V4DI_V2DI_V4DI_UQI:
33961 case V64QI_FTYPE_V32HI_V32HI_V64QI_UDI:
33962 case V32QI_FTYPE_V16HI_V16HI_V32QI_USI:
33963 case V16QI_FTYPE_V8HI_V8HI_V16QI_UHI:
33964 case V32HI_FTYPE_V16SI_V16SI_V32HI_USI:
33965 case V16HI_FTYPE_V8SI_V8SI_V16HI_UHI:
33966 case V8HI_FTYPE_V4SI_V4SI_V8HI_UQI:
33967 case V4DF_FTYPE_V4DF_V4DI_V4DF_UQI:
33968 case V8SF_FTYPE_V8SF_V8SI_V8SF_UQI:
33969 case V4SF_FTYPE_V4SF_V4SI_V4SF_UQI:
33970 case V2DF_FTYPE_V2DF_V2DI_V2DF_UQI:
33971 case V2DI_FTYPE_V4SI_V4SI_V2DI_UQI:
33972 case V4DI_FTYPE_V8SI_V8SI_V4DI_UQI:
33973 case V4DF_FTYPE_V4DI_V4DF_V4DF_UQI:
33974 case V8SF_FTYPE_V8SI_V8SF_V8SF_UQI:
33975 case V2DF_FTYPE_V2DI_V2DF_V2DF_UQI:
33976 case V4SF_FTYPE_V4SI_V4SF_V4SF_UQI:
33977 case V16SF_FTYPE_V16SF_V16SF_V16SF_UHI:
33978 case V16SF_FTYPE_V16SF_V16SI_V16SF_UHI:
33979 case V16SF_FTYPE_V16SI_V16SF_V16SF_UHI:
33980 case V16SI_FTYPE_V16SI_V16SI_V16SI_UHI:
33981 case V16SI_FTYPE_V16SI_V4SI_V16SI_UHI:
33982 case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI:
33983 case V8SI_FTYPE_V8SI_V8SI_V8SI_UQI:
33984 case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI:
33985 case V8SF_FTYPE_V8SF_V8SF_V8SF_UQI:
33986 case V16QI_FTYPE_V16QI_V16QI_V16QI_UHI:
33987 case V16HI_FTYPE_V16HI_V16HI_V16HI_UHI:
33988 case V2DI_FTYPE_V2DI_V2DI_V2DI_UQI:
33989 case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI:
33990 case V4DI_FTYPE_V4DI_V4DI_V4DI_UQI:
33991 case V4DF_FTYPE_V4DF_V4DF_V4DF_UQI:
33992 case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI:
33993 case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI:
33994 case V8DF_FTYPE_V8DF_V8DI_V8DF_UQI:
33995 case V8DF_FTYPE_V8DI_V8DF_V8DF_UQI:
33996 case V8DI_FTYPE_V16SI_V16SI_V8DI_UQI:
33997 case V8DI_FTYPE_V8DI_V2DI_V8DI_UQI:
33998 case V8DI_FTYPE_V8DI_V8DI_V8DI_UQI:
33999 case V8HI_FTYPE_V16QI_V16QI_V8HI_UQI:
34000 case V16HI_FTYPE_V32QI_V32QI_V16HI_UHI:
34001 case V8SI_FTYPE_V16HI_V16HI_V8SI_UQI:
34002 case V4SI_FTYPE_V8HI_V8HI_V4SI_UQI:
34003 nargs = 4;
34004 break;
34005 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
34006 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
34007 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
34008 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
34009 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
34010 nargs = 4;
34011 nargs_constant = 1;
34012 break;
34013 case UQI_FTYPE_V4DI_V4DI_INT_UQI:
34014 case UQI_FTYPE_V8SI_V8SI_INT_UQI:
34015 case QI_FTYPE_V4DF_V4DF_INT_UQI:
34016 case QI_FTYPE_V8SF_V8SF_INT_UQI:
34017 case UQI_FTYPE_V2DI_V2DI_INT_UQI:
34018 case UQI_FTYPE_V4SI_V4SI_INT_UQI:
34019 case UQI_FTYPE_V2DF_V2DF_INT_UQI:
34020 case UQI_FTYPE_V4SF_V4SF_INT_UQI:
34021 case UDI_FTYPE_V64QI_V64QI_INT_UDI:
34022 case USI_FTYPE_V32QI_V32QI_INT_USI:
34023 case UHI_FTYPE_V16QI_V16QI_INT_UHI:
34024 case USI_FTYPE_V32HI_V32HI_INT_USI:
34025 case UHI_FTYPE_V16HI_V16HI_INT_UHI:
34026 case UQI_FTYPE_V8HI_V8HI_INT_UQI:
34027 case V32HI_FTYPE_V32HI_V32HI_V32HI_INT:
34028 case V16HI_FTYPE_V16HI_V16HI_V16HI_INT:
34029 case V8HI_FTYPE_V8HI_V8HI_V8HI_INT:
34030 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT:
34031 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT:
34032 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT:
34033 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT:
34034 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT:
34035 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT:
34036 nargs = 4;
34037 mask_pos = 1;
34038 nargs_constant = 1;
34039 break;
34040 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
34041 nargs = 4;
34042 nargs_constant = 2;
34043 break;
34044 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
34045 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
34046 nargs = 4;
34047 break;
34048 case UQI_FTYPE_V8DI_V8DI_INT_UQI:
34049 case UHI_FTYPE_V16SI_V16SI_INT_UHI:
34050 mask_pos = 1;
34051 nargs = 4;
34052 nargs_constant = 1;
34053 break;
34054 case V8SF_FTYPE_V8SF_INT_V8SF_UQI:
34055 case V4SF_FTYPE_V4SF_INT_V4SF_UQI:
34056 case V2DF_FTYPE_V4DF_INT_V2DF_UQI:
34057 case V2DI_FTYPE_V4DI_INT_V2DI_UQI:
34058 case V8SF_FTYPE_V16SF_INT_V8SF_UQI:
34059 case V8SI_FTYPE_V16SI_INT_V8SI_UQI:
34060 case V2DF_FTYPE_V8DF_INT_V2DF_UQI:
34061 case V2DI_FTYPE_V8DI_INT_V2DI_UQI:
34062 case V4SF_FTYPE_V8SF_INT_V4SF_UQI:
34063 case V4SI_FTYPE_V8SI_INT_V4SI_UQI:
34064 case V8HI_FTYPE_V8SF_INT_V8HI_UQI:
34065 case V8HI_FTYPE_V4SF_INT_V8HI_UQI:
34066 case V32HI_FTYPE_V32HI_INT_V32HI_USI:
34067 case V16HI_FTYPE_V16HI_INT_V16HI_UHI:
34068 case V8HI_FTYPE_V8HI_INT_V8HI_UQI:
34069 case V4DI_FTYPE_V4DI_INT_V4DI_UQI:
34070 case V2DI_FTYPE_V2DI_INT_V2DI_UQI:
34071 case V8SI_FTYPE_V8SI_INT_V8SI_UQI:
34072 case V4SI_FTYPE_V4SI_INT_V4SI_UQI:
34073 case V4DF_FTYPE_V4DF_INT_V4DF_UQI:
34074 case V2DF_FTYPE_V2DF_INT_V2DF_UQI:
34075 case V8DF_FTYPE_V8DF_INT_V8DF_UQI:
34076 case V16SF_FTYPE_V16SF_INT_V16SF_UHI:
34077 case V16HI_FTYPE_V16SF_INT_V16HI_UHI:
34078 case V16SI_FTYPE_V16SI_INT_V16SI_UHI:
34079 case V4SI_FTYPE_V16SI_INT_V4SI_UQI:
34080 case V4DI_FTYPE_V8DI_INT_V4DI_UQI:
34081 case V4DF_FTYPE_V8DF_INT_V4DF_UQI:
34082 case V4SF_FTYPE_V16SF_INT_V4SF_UQI:
34083 case V8DI_FTYPE_V8DI_INT_V8DI_UQI:
34084 nargs = 4;
34085 mask_pos = 2;
34086 nargs_constant = 1;
34087 break;
34088 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI:
34089 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI:
34090 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI:
34091 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI:
34092 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI:
34093 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI:
34094 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI:
34095 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI:
34096 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI:
34097 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI:
34098 case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI:
34099 case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI:
34100 case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI:
34101 case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI:
34102 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI:
34103 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI:
34104 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_UQI:
34105 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UQI:
34106 case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI:
34107 case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_UHI:
34108 case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_UQI:
34109 case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI:
34110 case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI:
34111 case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_UQI:
34112 case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_UQI:
34113 case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_UQI:
34114 case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_UQI:
34115 nargs = 5;
34116 mask_pos = 2;
34117 nargs_constant = 1;
34118 break;
34119 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI:
34120 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI:
34121 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI:
34122 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI:
34123 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI:
34124 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI:
34125 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI:
34126 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI:
34127 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI:
34128 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI:
34129 nargs = 5;
34130 mask_pos = 1;
34131 nargs_constant = 1;
34132 break;
34133 case V64QI_FTYPE_V64QI_V64QI_INT_V64QI_UDI:
34134 case V32QI_FTYPE_V32QI_V32QI_INT_V32QI_USI:
34135 case V16QI_FTYPE_V16QI_V16QI_INT_V16QI_UHI:
34136 case V32HI_FTYPE_V32HI_V32HI_INT_V32HI_INT:
34137 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_INT:
34138 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_INT:
34139 case V16HI_FTYPE_V16HI_V16HI_INT_V16HI_INT:
34140 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_INT:
34141 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_INT:
34142 case V8HI_FTYPE_V8HI_V8HI_INT_V8HI_INT:
34143 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_INT:
34144 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_INT:
34145 nargs = 5;
34146 mask_pos = 1;
34147 nargs_constant = 2;
34148 break;
34149
34150 default:
34151 gcc_unreachable ();
34152 }
34153
34154 gcc_assert (nargs <= ARRAY_SIZE (args));
34155
34156 if (comparison != UNKNOWN)
34157 {
34158 gcc_assert (nargs == 2);
34159 return ix86_expand_sse_compare (d, exp, target, swap);
34160 }
34161
34162 if (rmode == VOIDmode || rmode == tmode)
34163 {
34164 if (optimize
34165 || target == 0
34166 || GET_MODE (target) != tmode
34167 || !insn_p->operand[0].predicate (target, tmode))
34168 target = gen_reg_rtx (tmode);
34169 else if (memory_operand (target, tmode))
34170 num_memory++;
34171 real_target = target;
34172 }
34173 else
34174 {
34175 real_target = gen_reg_rtx (tmode);
34176 target = lowpart_subreg (rmode, real_target, tmode);
34177 }
34178
34179 for (i = 0; i < nargs; i++)
34180 {
34181 tree arg = CALL_EXPR_ARG (exp, i);
34182 rtx op = expand_normal (arg);
34183 machine_mode mode = insn_p->operand[i + 1].mode;
34184 bool match = insn_p->operand[i + 1].predicate (op, mode);
34185
34186 if (second_arg_count && i == 1)
34187 {
34188 /* SIMD shift insns take either an 8-bit immediate or
34189 register as count. But builtin functions take int as
34190 count. If count doesn't match, we put it in register.
34191 The instructions are using 64-bit count, if op is just
34192 32-bit, zero-extend it, as negative shift counts
34193 are undefined behavior and zero-extension is more
34194 efficient. */
34195 if (!match)
34196 {
34197 if (SCALAR_INT_MODE_P (GET_MODE (op)))
34198 op = convert_modes (mode, GET_MODE (op), op, 1);
34199 else
34200 op = lowpart_subreg (mode, op, GET_MODE (op));
34201 if (!insn_p->operand[i + 1].predicate (op, mode))
34202 op = copy_to_reg (op);
34203 }
34204 }
34205 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34206 (!mask_pos && (nargs - i) <= nargs_constant))
34207 {
34208 if (!match)
34209 switch (icode)
34210 {
34211 case CODE_FOR_avx_vinsertf128v4di:
34212 case CODE_FOR_avx_vextractf128v4di:
34213 error ("the last argument must be an 1-bit immediate");
34214 return const0_rtx;
34215
34216 case CODE_FOR_avx512f_cmpv8di3_mask:
34217 case CODE_FOR_avx512f_cmpv16si3_mask:
34218 case CODE_FOR_avx512f_ucmpv8di3_mask:
34219 case CODE_FOR_avx512f_ucmpv16si3_mask:
34220 case CODE_FOR_avx512vl_cmpv4di3_mask:
34221 case CODE_FOR_avx512vl_cmpv8si3_mask:
34222 case CODE_FOR_avx512vl_ucmpv4di3_mask:
34223 case CODE_FOR_avx512vl_ucmpv8si3_mask:
34224 case CODE_FOR_avx512vl_cmpv2di3_mask:
34225 case CODE_FOR_avx512vl_cmpv4si3_mask:
34226 case CODE_FOR_avx512vl_ucmpv2di3_mask:
34227 case CODE_FOR_avx512vl_ucmpv4si3_mask:
34228 error ("the last argument must be a 3-bit immediate");
34229 return const0_rtx;
34230
34231 case CODE_FOR_sse4_1_roundsd:
34232 case CODE_FOR_sse4_1_roundss:
34233
34234 case CODE_FOR_sse4_1_roundpd:
34235 case CODE_FOR_sse4_1_roundps:
34236 case CODE_FOR_avx_roundpd256:
34237 case CODE_FOR_avx_roundps256:
34238
34239 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
34240 case CODE_FOR_sse4_1_roundps_sfix:
34241 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
34242 case CODE_FOR_avx_roundps_sfix256:
34243
34244 case CODE_FOR_sse4_1_blendps:
34245 case CODE_FOR_avx_blendpd256:
34246 case CODE_FOR_avx_vpermilv4df:
34247 case CODE_FOR_avx_vpermilv4df_mask:
34248 case CODE_FOR_avx512f_getmantv8df_mask:
34249 case CODE_FOR_avx512f_getmantv16sf_mask:
34250 case CODE_FOR_avx512vl_getmantv8sf_mask:
34251 case CODE_FOR_avx512vl_getmantv4df_mask:
34252 case CODE_FOR_avx512vl_getmantv4sf_mask:
34253 case CODE_FOR_avx512vl_getmantv2df_mask:
34254 case CODE_FOR_avx512dq_rangepv8df_mask_round:
34255 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
34256 case CODE_FOR_avx512dq_rangepv4df_mask:
34257 case CODE_FOR_avx512dq_rangepv8sf_mask:
34258 case CODE_FOR_avx512dq_rangepv2df_mask:
34259 case CODE_FOR_avx512dq_rangepv4sf_mask:
34260 case CODE_FOR_avx_shufpd256_mask:
34261 error ("the last argument must be a 4-bit immediate");
34262 return const0_rtx;
34263
34264 case CODE_FOR_sha1rnds4:
34265 case CODE_FOR_sse4_1_blendpd:
34266 case CODE_FOR_avx_vpermilv2df:
34267 case CODE_FOR_avx_vpermilv2df_mask:
34268 case CODE_FOR_xop_vpermil2v2df3:
34269 case CODE_FOR_xop_vpermil2v4sf3:
34270 case CODE_FOR_xop_vpermil2v4df3:
34271 case CODE_FOR_xop_vpermil2v8sf3:
34272 case CODE_FOR_avx512f_vinsertf32x4_mask:
34273 case CODE_FOR_avx512f_vinserti32x4_mask:
34274 case CODE_FOR_avx512f_vextractf32x4_mask:
34275 case CODE_FOR_avx512f_vextracti32x4_mask:
34276 case CODE_FOR_sse2_shufpd:
34277 case CODE_FOR_sse2_shufpd_mask:
34278 case CODE_FOR_avx512dq_shuf_f64x2_mask:
34279 case CODE_FOR_avx512dq_shuf_i64x2_mask:
34280 case CODE_FOR_avx512vl_shuf_i32x4_mask:
34281 case CODE_FOR_avx512vl_shuf_f32x4_mask:
34282 error ("the last argument must be a 2-bit immediate");
34283 return const0_rtx;
34284
34285 case CODE_FOR_avx_vextractf128v4df:
34286 case CODE_FOR_avx_vextractf128v8sf:
34287 case CODE_FOR_avx_vextractf128v8si:
34288 case CODE_FOR_avx_vinsertf128v4df:
34289 case CODE_FOR_avx_vinsertf128v8sf:
34290 case CODE_FOR_avx_vinsertf128v8si:
34291 case CODE_FOR_avx512f_vinsertf64x4_mask:
34292 case CODE_FOR_avx512f_vinserti64x4_mask:
34293 case CODE_FOR_avx512f_vextractf64x4_mask:
34294 case CODE_FOR_avx512f_vextracti64x4_mask:
34295 case CODE_FOR_avx512dq_vinsertf32x8_mask:
34296 case CODE_FOR_avx512dq_vinserti32x8_mask:
34297 case CODE_FOR_avx512vl_vinsertv4df:
34298 case CODE_FOR_avx512vl_vinsertv4di:
34299 case CODE_FOR_avx512vl_vinsertv8sf:
34300 case CODE_FOR_avx512vl_vinsertv8si:
34301 error ("the last argument must be a 1-bit immediate");
34302 return const0_rtx;
34303
34304 case CODE_FOR_avx_vmcmpv2df3:
34305 case CODE_FOR_avx_vmcmpv4sf3:
34306 case CODE_FOR_avx_cmpv2df3:
34307 case CODE_FOR_avx_cmpv4sf3:
34308 case CODE_FOR_avx_cmpv4df3:
34309 case CODE_FOR_avx_cmpv8sf3:
34310 case CODE_FOR_avx512f_cmpv8df3_mask:
34311 case CODE_FOR_avx512f_cmpv16sf3_mask:
34312 case CODE_FOR_avx512f_vmcmpv2df3_mask:
34313 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
34314 error ("the last argument must be a 5-bit immediate");
34315 return const0_rtx;
34316
34317 default:
34318 switch (nargs_constant)
34319 {
34320 case 2:
34321 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34322 (!mask_pos && (nargs - i) == nargs_constant))
34323 {
34324 error ("the next to last argument must be an 8-bit immediate");
34325 break;
34326 }
34327 /* FALLTHRU */
34328 case 1:
34329 error ("the last argument must be an 8-bit immediate");
34330 break;
34331 default:
34332 gcc_unreachable ();
34333 }
34334 return const0_rtx;
34335 }
34336 }
34337 else
34338 {
34339 if (VECTOR_MODE_P (mode))
34340 op = safe_vector_operand (op, mode);
34341
34342 /* If we aren't optimizing, only allow one memory operand to
34343 be generated. */
34344 if (memory_operand (op, mode))
34345 num_memory++;
34346
34347 op = fixup_modeless_constant (op, mode);
34348
34349 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34350 {
34351 if (optimize || !match || num_memory > 1)
34352 op = copy_to_mode_reg (mode, op);
34353 }
34354 else
34355 {
34356 op = copy_to_reg (op);
34357 op = lowpart_subreg (mode, op, GET_MODE (op));
34358 }
34359 }
34360
34361 args[i].op = op;
34362 args[i].mode = mode;
34363 }
34364
34365 switch (nargs)
34366 {
34367 case 1:
34368 pat = GEN_FCN (icode) (real_target, args[0].op);
34369 break;
34370 case 2:
34371 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
34372 break;
34373 case 3:
34374 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34375 args[2].op);
34376 break;
34377 case 4:
34378 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34379 args[2].op, args[3].op);
34380 break;
34381 case 5:
34382 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34383 args[2].op, args[3].op, args[4].op);
34384 break;
34385 case 6:
34386 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34387 args[2].op, args[3].op, args[4].op,
34388 args[5].op);
34389 break;
34390 default:
34391 gcc_unreachable ();
34392 }
34393
34394 if (! pat)
34395 return 0;
34396
34397 emit_insn (pat);
34398 return target;
34399 }
34400
34401 /* Transform pattern of following layout:
34402 (set A
34403 (unspec [B C] UNSPEC_EMBEDDED_ROUNDING))
34404 )
34405 into:
34406 (set (A B)) */
34407
34408 static rtx
34409 ix86_erase_embedded_rounding (rtx pat)
34410 {
34411 if (GET_CODE (pat) == INSN)
34412 pat = PATTERN (pat);
34413
34414 gcc_assert (GET_CODE (pat) == SET);
34415 rtx src = SET_SRC (pat);
34416 gcc_assert (XVECLEN (src, 0) == 2);
34417 rtx p0 = XVECEXP (src, 0, 0);
34418 gcc_assert (GET_CODE (src) == UNSPEC
34419 && XINT (src, 1) == UNSPEC_EMBEDDED_ROUNDING);
34420 rtx res = gen_rtx_SET (SET_DEST (pat), p0);
34421 return res;
34422 }
34423
34424 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
34425 with rounding. */
34426 static rtx
34427 ix86_expand_sse_comi_round (const struct builtin_description *d,
34428 tree exp, rtx target)
34429 {
34430 rtx pat, set_dst;
34431 tree arg0 = CALL_EXPR_ARG (exp, 0);
34432 tree arg1 = CALL_EXPR_ARG (exp, 1);
34433 tree arg2 = CALL_EXPR_ARG (exp, 2);
34434 tree arg3 = CALL_EXPR_ARG (exp, 3);
34435 rtx op0 = expand_normal (arg0);
34436 rtx op1 = expand_normal (arg1);
34437 rtx op2 = expand_normal (arg2);
34438 rtx op3 = expand_normal (arg3);
34439 enum insn_code icode = d->icode;
34440 const struct insn_data_d *insn_p = &insn_data[icode];
34441 machine_mode mode0 = insn_p->operand[0].mode;
34442 machine_mode mode1 = insn_p->operand[1].mode;
34443 enum rtx_code comparison = UNEQ;
34444 bool need_ucomi = false;
34445
34446 /* See avxintrin.h for values. */
34447 enum rtx_code comi_comparisons[32] =
34448 {
34449 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
34450 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
34451 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
34452 };
34453 bool need_ucomi_values[32] =
34454 {
34455 true, false, false, true, true, false, false, true,
34456 true, false, false, true, true, false, false, true,
34457 false, true, true, false, false, true, true, false,
34458 false, true, true, false, false, true, true, false
34459 };
34460
34461 if (!CONST_INT_P (op2))
34462 {
34463 error ("the third argument must be comparison constant");
34464 return const0_rtx;
34465 }
34466 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
34467 {
34468 error ("incorrect comparison mode");
34469 return const0_rtx;
34470 }
34471
34472 if (!insn_p->operand[2].predicate (op3, SImode))
34473 {
34474 error ("incorrect rounding operand");
34475 return const0_rtx;
34476 }
34477
34478 comparison = comi_comparisons[INTVAL (op2)];
34479 need_ucomi = need_ucomi_values[INTVAL (op2)];
34480
34481 if (VECTOR_MODE_P (mode0))
34482 op0 = safe_vector_operand (op0, mode0);
34483 if (VECTOR_MODE_P (mode1))
34484 op1 = safe_vector_operand (op1, mode1);
34485
34486 target = gen_reg_rtx (SImode);
34487 emit_move_insn (target, const0_rtx);
34488 target = gen_rtx_SUBREG (QImode, target, 0);
34489
34490 if ((optimize && !register_operand (op0, mode0))
34491 || !insn_p->operand[0].predicate (op0, mode0))
34492 op0 = copy_to_mode_reg (mode0, op0);
34493 if ((optimize && !register_operand (op1, mode1))
34494 || !insn_p->operand[1].predicate (op1, mode1))
34495 op1 = copy_to_mode_reg (mode1, op1);
34496
34497 if (need_ucomi)
34498 icode = icode == CODE_FOR_sse_comi_round
34499 ? CODE_FOR_sse_ucomi_round
34500 : CODE_FOR_sse2_ucomi_round;
34501
34502 pat = GEN_FCN (icode) (op0, op1, op3);
34503 if (! pat)
34504 return 0;
34505
34506 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
34507 if (INTVAL (op3) == NO_ROUND)
34508 {
34509 pat = ix86_erase_embedded_rounding (pat);
34510 if (! pat)
34511 return 0;
34512
34513 set_dst = SET_DEST (pat);
34514 }
34515 else
34516 {
34517 gcc_assert (GET_CODE (pat) == SET);
34518 set_dst = SET_DEST (pat);
34519 }
34520
34521 emit_insn (pat);
34522 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34523 gen_rtx_fmt_ee (comparison, QImode,
34524 set_dst,
34525 const0_rtx)));
34526
34527 return SUBREG_REG (target);
34528 }
34529
34530 static rtx
34531 ix86_expand_round_builtin (const struct builtin_description *d,
34532 tree exp, rtx target)
34533 {
34534 rtx pat;
34535 unsigned int i, nargs;
34536 struct
34537 {
34538 rtx op;
34539 machine_mode mode;
34540 } args[6];
34541 enum insn_code icode = d->icode;
34542 const struct insn_data_d *insn_p = &insn_data[icode];
34543 machine_mode tmode = insn_p->operand[0].mode;
34544 unsigned int nargs_constant = 0;
34545 unsigned int redundant_embed_rnd = 0;
34546
34547 switch ((enum ix86_builtin_func_type) d->flag)
34548 {
34549 case UINT64_FTYPE_V2DF_INT:
34550 case UINT64_FTYPE_V4SF_INT:
34551 case UINT_FTYPE_V2DF_INT:
34552 case UINT_FTYPE_V4SF_INT:
34553 case INT64_FTYPE_V2DF_INT:
34554 case INT64_FTYPE_V4SF_INT:
34555 case INT_FTYPE_V2DF_INT:
34556 case INT_FTYPE_V4SF_INT:
34557 nargs = 2;
34558 break;
34559 case V4SF_FTYPE_V4SF_UINT_INT:
34560 case V4SF_FTYPE_V4SF_UINT64_INT:
34561 case V2DF_FTYPE_V2DF_UINT64_INT:
34562 case V4SF_FTYPE_V4SF_INT_INT:
34563 case V4SF_FTYPE_V4SF_INT64_INT:
34564 case V2DF_FTYPE_V2DF_INT64_INT:
34565 case V4SF_FTYPE_V4SF_V4SF_INT:
34566 case V2DF_FTYPE_V2DF_V2DF_INT:
34567 case V4SF_FTYPE_V4SF_V2DF_INT:
34568 case V2DF_FTYPE_V2DF_V4SF_INT:
34569 nargs = 3;
34570 break;
34571 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
34572 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
34573 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
34574 case V8DI_FTYPE_V8DF_V8DI_QI_INT:
34575 case V8SF_FTYPE_V8DI_V8SF_QI_INT:
34576 case V8DF_FTYPE_V8DI_V8DF_QI_INT:
34577 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
34578 case V8DI_FTYPE_V8SF_V8DI_QI_INT:
34579 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
34580 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
34581 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
34582 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
34583 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
34584 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
34585 nargs = 4;
34586 break;
34587 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
34588 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
34589 nargs_constant = 2;
34590 nargs = 4;
34591 break;
34592 case INT_FTYPE_V4SF_V4SF_INT_INT:
34593 case INT_FTYPE_V2DF_V2DF_INT_INT:
34594 return ix86_expand_sse_comi_round (d, exp, target);
34595 case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT:
34596 case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT:
34597 case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT:
34598 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
34599 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
34600 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
34601 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
34602 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
34603 nargs = 5;
34604 break;
34605 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
34606 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
34607 nargs_constant = 4;
34608 nargs = 5;
34609 break;
34610 case UQI_FTYPE_V8DF_V8DF_INT_UQI_INT:
34611 case UQI_FTYPE_V2DF_V2DF_INT_UQI_INT:
34612 case UHI_FTYPE_V16SF_V16SF_INT_UHI_INT:
34613 case UQI_FTYPE_V4SF_V4SF_INT_UQI_INT:
34614 nargs_constant = 3;
34615 nargs = 5;
34616 break;
34617 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
34618 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
34619 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
34620 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
34621 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT:
34622 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT:
34623 nargs = 6;
34624 nargs_constant = 4;
34625 break;
34626 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
34627 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
34628 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
34629 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
34630 nargs = 6;
34631 nargs_constant = 3;
34632 break;
34633 default:
34634 gcc_unreachable ();
34635 }
34636 gcc_assert (nargs <= ARRAY_SIZE (args));
34637
34638 if (optimize
34639 || target == 0
34640 || GET_MODE (target) != tmode
34641 || !insn_p->operand[0].predicate (target, tmode))
34642 target = gen_reg_rtx (tmode);
34643
34644 for (i = 0; i < nargs; i++)
34645 {
34646 tree arg = CALL_EXPR_ARG (exp, i);
34647 rtx op = expand_normal (arg);
34648 machine_mode mode = insn_p->operand[i + 1].mode;
34649 bool match = insn_p->operand[i + 1].predicate (op, mode);
34650
34651 if (i == nargs - nargs_constant)
34652 {
34653 if (!match)
34654 {
34655 switch (icode)
34656 {
34657 case CODE_FOR_avx512f_getmantv8df_mask_round:
34658 case CODE_FOR_avx512f_getmantv16sf_mask_round:
34659 case CODE_FOR_avx512f_vgetmantv2df_round:
34660 case CODE_FOR_avx512f_vgetmantv2df_mask_round:
34661 case CODE_FOR_avx512f_vgetmantv4sf_round:
34662 case CODE_FOR_avx512f_vgetmantv4sf_mask_round:
34663 error ("the immediate argument must be a 4-bit immediate");
34664 return const0_rtx;
34665 case CODE_FOR_avx512f_cmpv8df3_mask_round:
34666 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
34667 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
34668 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
34669 error ("the immediate argument must be a 5-bit immediate");
34670 return const0_rtx;
34671 default:
34672 error ("the immediate argument must be an 8-bit immediate");
34673 return const0_rtx;
34674 }
34675 }
34676 }
34677 else if (i == nargs-1)
34678 {
34679 if (!insn_p->operand[nargs].predicate (op, SImode))
34680 {
34681 error ("incorrect rounding operand");
34682 return const0_rtx;
34683 }
34684
34685 /* If there is no rounding use normal version of the pattern. */
34686 if (INTVAL (op) == NO_ROUND)
34687 redundant_embed_rnd = 1;
34688 }
34689 else
34690 {
34691 if (VECTOR_MODE_P (mode))
34692 op = safe_vector_operand (op, mode);
34693
34694 op = fixup_modeless_constant (op, mode);
34695
34696 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34697 {
34698 if (optimize || !match)
34699 op = copy_to_mode_reg (mode, op);
34700 }
34701 else
34702 {
34703 op = copy_to_reg (op);
34704 op = lowpart_subreg (mode, op, GET_MODE (op));
34705 }
34706 }
34707
34708 args[i].op = op;
34709 args[i].mode = mode;
34710 }
34711
34712 switch (nargs)
34713 {
34714 case 1:
34715 pat = GEN_FCN (icode) (target, args[0].op);
34716 break;
34717 case 2:
34718 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34719 break;
34720 case 3:
34721 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34722 args[2].op);
34723 break;
34724 case 4:
34725 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34726 args[2].op, args[3].op);
34727 break;
34728 case 5:
34729 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34730 args[2].op, args[3].op, args[4].op);
34731 break;
34732 case 6:
34733 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34734 args[2].op, args[3].op, args[4].op,
34735 args[5].op);
34736 break;
34737 default:
34738 gcc_unreachable ();
34739 }
34740
34741 if (!pat)
34742 return 0;
34743
34744 if (redundant_embed_rnd)
34745 pat = ix86_erase_embedded_rounding (pat);
34746
34747 emit_insn (pat);
34748 return target;
34749 }
34750
34751 /* Subroutine of ix86_expand_builtin to take care of special insns
34752 with variable number of operands. */
34753
34754 static rtx
34755 ix86_expand_special_args_builtin (const struct builtin_description *d,
34756 tree exp, rtx target)
34757 {
34758 tree arg;
34759 rtx pat, op;
34760 unsigned int i, nargs, arg_adjust, memory;
34761 bool aligned_mem = false;
34762 struct
34763 {
34764 rtx op;
34765 machine_mode mode;
34766 } args[3];
34767 enum insn_code icode = d->icode;
34768 bool last_arg_constant = false;
34769 const struct insn_data_d *insn_p = &insn_data[icode];
34770 machine_mode tmode = insn_p->operand[0].mode;
34771 enum { load, store } klass;
34772
34773 switch ((enum ix86_builtin_func_type) d->flag)
34774 {
34775 case VOID_FTYPE_VOID:
34776 emit_insn (GEN_FCN (icode) (target));
34777 return 0;
34778 case VOID_FTYPE_UINT64:
34779 case VOID_FTYPE_UNSIGNED:
34780 nargs = 0;
34781 klass = store;
34782 memory = 0;
34783 break;
34784
34785 case INT_FTYPE_VOID:
34786 case USHORT_FTYPE_VOID:
34787 case UINT64_FTYPE_VOID:
34788 case UNSIGNED_FTYPE_VOID:
34789 nargs = 0;
34790 klass = load;
34791 memory = 0;
34792 break;
34793 case UINT64_FTYPE_PUNSIGNED:
34794 case V2DI_FTYPE_PV2DI:
34795 case V4DI_FTYPE_PV4DI:
34796 case V32QI_FTYPE_PCCHAR:
34797 case V16QI_FTYPE_PCCHAR:
34798 case V8SF_FTYPE_PCV4SF:
34799 case V8SF_FTYPE_PCFLOAT:
34800 case V4SF_FTYPE_PCFLOAT:
34801 case V4DF_FTYPE_PCV2DF:
34802 case V4DF_FTYPE_PCDOUBLE:
34803 case V2DF_FTYPE_PCDOUBLE:
34804 case VOID_FTYPE_PVOID:
34805 case V8DI_FTYPE_PV8DI:
34806 nargs = 1;
34807 klass = load;
34808 memory = 0;
34809 switch (icode)
34810 {
34811 case CODE_FOR_sse4_1_movntdqa:
34812 case CODE_FOR_avx2_movntdqa:
34813 case CODE_FOR_avx512f_movntdqa:
34814 aligned_mem = true;
34815 break;
34816 default:
34817 break;
34818 }
34819 break;
34820 case VOID_FTYPE_PV2SF_V4SF:
34821 case VOID_FTYPE_PV8DI_V8DI:
34822 case VOID_FTYPE_PV4DI_V4DI:
34823 case VOID_FTYPE_PV2DI_V2DI:
34824 case VOID_FTYPE_PCHAR_V32QI:
34825 case VOID_FTYPE_PCHAR_V16QI:
34826 case VOID_FTYPE_PFLOAT_V16SF:
34827 case VOID_FTYPE_PFLOAT_V8SF:
34828 case VOID_FTYPE_PFLOAT_V4SF:
34829 case VOID_FTYPE_PDOUBLE_V8DF:
34830 case VOID_FTYPE_PDOUBLE_V4DF:
34831 case VOID_FTYPE_PDOUBLE_V2DF:
34832 case VOID_FTYPE_PLONGLONG_LONGLONG:
34833 case VOID_FTYPE_PULONGLONG_ULONGLONG:
34834 case VOID_FTYPE_PINT_INT:
34835 nargs = 1;
34836 klass = store;
34837 /* Reserve memory operand for target. */
34838 memory = ARRAY_SIZE (args);
34839 switch (icode)
34840 {
34841 /* These builtins and instructions require the memory
34842 to be properly aligned. */
34843 case CODE_FOR_avx_movntv4di:
34844 case CODE_FOR_sse2_movntv2di:
34845 case CODE_FOR_avx_movntv8sf:
34846 case CODE_FOR_sse_movntv4sf:
34847 case CODE_FOR_sse4a_vmmovntv4sf:
34848 case CODE_FOR_avx_movntv4df:
34849 case CODE_FOR_sse2_movntv2df:
34850 case CODE_FOR_sse4a_vmmovntv2df:
34851 case CODE_FOR_sse2_movntidi:
34852 case CODE_FOR_sse_movntq:
34853 case CODE_FOR_sse2_movntisi:
34854 case CODE_FOR_avx512f_movntv16sf:
34855 case CODE_FOR_avx512f_movntv8df:
34856 case CODE_FOR_avx512f_movntv8di:
34857 aligned_mem = true;
34858 break;
34859 default:
34860 break;
34861 }
34862 break;
34863 case V4SF_FTYPE_V4SF_PCV2SF:
34864 case V2DF_FTYPE_V2DF_PCDOUBLE:
34865 nargs = 2;
34866 klass = load;
34867 memory = 1;
34868 break;
34869 case V8SF_FTYPE_PCV8SF_V8SI:
34870 case V4DF_FTYPE_PCV4DF_V4DI:
34871 case V4SF_FTYPE_PCV4SF_V4SI:
34872 case V2DF_FTYPE_PCV2DF_V2DI:
34873 case V8SI_FTYPE_PCV8SI_V8SI:
34874 case V4DI_FTYPE_PCV4DI_V4DI:
34875 case V4SI_FTYPE_PCV4SI_V4SI:
34876 case V2DI_FTYPE_PCV2DI_V2DI:
34877 case VOID_FTYPE_INT_INT64:
34878 nargs = 2;
34879 klass = load;
34880 memory = 0;
34881 break;
34882 case VOID_FTYPE_PV8DF_V8DF_UQI:
34883 case VOID_FTYPE_PV4DF_V4DF_UQI:
34884 case VOID_FTYPE_PV2DF_V2DF_UQI:
34885 case VOID_FTYPE_PV16SF_V16SF_UHI:
34886 case VOID_FTYPE_PV8SF_V8SF_UQI:
34887 case VOID_FTYPE_PV4SF_V4SF_UQI:
34888 case VOID_FTYPE_PV8DI_V8DI_UQI:
34889 case VOID_FTYPE_PV4DI_V4DI_UQI:
34890 case VOID_FTYPE_PV2DI_V2DI_UQI:
34891 case VOID_FTYPE_PV16SI_V16SI_UHI:
34892 case VOID_FTYPE_PV8SI_V8SI_UQI:
34893 case VOID_FTYPE_PV4SI_V4SI_UQI:
34894 case VOID_FTYPE_PV64QI_V64QI_UDI:
34895 case VOID_FTYPE_PV32HI_V32HI_USI:
34896 case VOID_FTYPE_PV32QI_V32QI_USI:
34897 case VOID_FTYPE_PV16QI_V16QI_UHI:
34898 case VOID_FTYPE_PV16HI_V16HI_UHI:
34899 case VOID_FTYPE_PV8HI_V8HI_UQI:
34900 switch (icode)
34901 {
34902 /* These builtins and instructions require the memory
34903 to be properly aligned. */
34904 case CODE_FOR_avx512f_storev16sf_mask:
34905 case CODE_FOR_avx512f_storev16si_mask:
34906 case CODE_FOR_avx512f_storev8df_mask:
34907 case CODE_FOR_avx512f_storev8di_mask:
34908 case CODE_FOR_avx512vl_storev8sf_mask:
34909 case CODE_FOR_avx512vl_storev8si_mask:
34910 case CODE_FOR_avx512vl_storev4df_mask:
34911 case CODE_FOR_avx512vl_storev4di_mask:
34912 case CODE_FOR_avx512vl_storev4sf_mask:
34913 case CODE_FOR_avx512vl_storev4si_mask:
34914 case CODE_FOR_avx512vl_storev2df_mask:
34915 case CODE_FOR_avx512vl_storev2di_mask:
34916 aligned_mem = true;
34917 break;
34918 default:
34919 break;
34920 }
34921 /* FALLTHRU */
34922 case VOID_FTYPE_PV8SF_V8SI_V8SF:
34923 case VOID_FTYPE_PV4DF_V4DI_V4DF:
34924 case VOID_FTYPE_PV4SF_V4SI_V4SF:
34925 case VOID_FTYPE_PV2DF_V2DI_V2DF:
34926 case VOID_FTYPE_PV8SI_V8SI_V8SI:
34927 case VOID_FTYPE_PV4DI_V4DI_V4DI:
34928 case VOID_FTYPE_PV4SI_V4SI_V4SI:
34929 case VOID_FTYPE_PV2DI_V2DI_V2DI:
34930 case VOID_FTYPE_PV8SI_V8DI_UQI:
34931 case VOID_FTYPE_PV8HI_V8DI_UQI:
34932 case VOID_FTYPE_PV16HI_V16SI_UHI:
34933 case VOID_FTYPE_PV16QI_V8DI_UQI:
34934 case VOID_FTYPE_PV16QI_V16SI_UHI:
34935 case VOID_FTYPE_PV4SI_V4DI_UQI:
34936 case VOID_FTYPE_PV4SI_V2DI_UQI:
34937 case VOID_FTYPE_PV8HI_V4DI_UQI:
34938 case VOID_FTYPE_PV8HI_V2DI_UQI:
34939 case VOID_FTYPE_PV8HI_V8SI_UQI:
34940 case VOID_FTYPE_PV8HI_V4SI_UQI:
34941 case VOID_FTYPE_PV16QI_V4DI_UQI:
34942 case VOID_FTYPE_PV16QI_V2DI_UQI:
34943 case VOID_FTYPE_PV16QI_V8SI_UQI:
34944 case VOID_FTYPE_PV16QI_V4SI_UQI:
34945 case VOID_FTYPE_PCHAR_V64QI_UDI:
34946 case VOID_FTYPE_PCHAR_V32QI_USI:
34947 case VOID_FTYPE_PCHAR_V16QI_UHI:
34948 case VOID_FTYPE_PSHORT_V32HI_USI:
34949 case VOID_FTYPE_PSHORT_V16HI_UHI:
34950 case VOID_FTYPE_PSHORT_V8HI_UQI:
34951 case VOID_FTYPE_PINT_V16SI_UHI:
34952 case VOID_FTYPE_PINT_V8SI_UQI:
34953 case VOID_FTYPE_PINT_V4SI_UQI:
34954 case VOID_FTYPE_PINT64_V8DI_UQI:
34955 case VOID_FTYPE_PINT64_V4DI_UQI:
34956 case VOID_FTYPE_PINT64_V2DI_UQI:
34957 case VOID_FTYPE_PDOUBLE_V8DF_UQI:
34958 case VOID_FTYPE_PDOUBLE_V4DF_UQI:
34959 case VOID_FTYPE_PDOUBLE_V2DF_UQI:
34960 case VOID_FTYPE_PFLOAT_V16SF_UHI:
34961 case VOID_FTYPE_PFLOAT_V8SF_UQI:
34962 case VOID_FTYPE_PFLOAT_V4SF_UQI:
34963 case VOID_FTYPE_PV32QI_V32HI_USI:
34964 case VOID_FTYPE_PV16QI_V16HI_UHI:
34965 case VOID_FTYPE_PV8QI_V8HI_UQI:
34966 nargs = 2;
34967 klass = store;
34968 /* Reserve memory operand for target. */
34969 memory = ARRAY_SIZE (args);
34970 break;
34971 case V4SF_FTYPE_PCV4SF_V4SF_UQI:
34972 case V8SF_FTYPE_PCV8SF_V8SF_UQI:
34973 case V16SF_FTYPE_PCV16SF_V16SF_UHI:
34974 case V4SI_FTYPE_PCV4SI_V4SI_UQI:
34975 case V8SI_FTYPE_PCV8SI_V8SI_UQI:
34976 case V16SI_FTYPE_PCV16SI_V16SI_UHI:
34977 case V2DF_FTYPE_PCV2DF_V2DF_UQI:
34978 case V4DF_FTYPE_PCV4DF_V4DF_UQI:
34979 case V8DF_FTYPE_PCV8DF_V8DF_UQI:
34980 case V2DI_FTYPE_PCV2DI_V2DI_UQI:
34981 case V4DI_FTYPE_PCV4DI_V4DI_UQI:
34982 case V8DI_FTYPE_PCV8DI_V8DI_UQI:
34983 case V64QI_FTYPE_PCV64QI_V64QI_UDI:
34984 case V32HI_FTYPE_PCV32HI_V32HI_USI:
34985 case V32QI_FTYPE_PCV32QI_V32QI_USI:
34986 case V16QI_FTYPE_PCV16QI_V16QI_UHI:
34987 case V16HI_FTYPE_PCV16HI_V16HI_UHI:
34988 case V8HI_FTYPE_PCV8HI_V8HI_UQI:
34989 switch (icode)
34990 {
34991 /* These builtins and instructions require the memory
34992 to be properly aligned. */
34993 case CODE_FOR_avx512f_loadv16sf_mask:
34994 case CODE_FOR_avx512f_loadv16si_mask:
34995 case CODE_FOR_avx512f_loadv8df_mask:
34996 case CODE_FOR_avx512f_loadv8di_mask:
34997 case CODE_FOR_avx512vl_loadv8sf_mask:
34998 case CODE_FOR_avx512vl_loadv8si_mask:
34999 case CODE_FOR_avx512vl_loadv4df_mask:
35000 case CODE_FOR_avx512vl_loadv4di_mask:
35001 case CODE_FOR_avx512vl_loadv4sf_mask:
35002 case CODE_FOR_avx512vl_loadv4si_mask:
35003 case CODE_FOR_avx512vl_loadv2df_mask:
35004 case CODE_FOR_avx512vl_loadv2di_mask:
35005 case CODE_FOR_avx512bw_loadv64qi_mask:
35006 case CODE_FOR_avx512vl_loadv32qi_mask:
35007 case CODE_FOR_avx512vl_loadv16qi_mask:
35008 case CODE_FOR_avx512bw_loadv32hi_mask:
35009 case CODE_FOR_avx512vl_loadv16hi_mask:
35010 case CODE_FOR_avx512vl_loadv8hi_mask:
35011 aligned_mem = true;
35012 break;
35013 default:
35014 break;
35015 }
35016 /* FALLTHRU */
35017 case V64QI_FTYPE_PCCHAR_V64QI_UDI:
35018 case V32QI_FTYPE_PCCHAR_V32QI_USI:
35019 case V16QI_FTYPE_PCCHAR_V16QI_UHI:
35020 case V32HI_FTYPE_PCSHORT_V32HI_USI:
35021 case V16HI_FTYPE_PCSHORT_V16HI_UHI:
35022 case V8HI_FTYPE_PCSHORT_V8HI_UQI:
35023 case V16SI_FTYPE_PCINT_V16SI_UHI:
35024 case V8SI_FTYPE_PCINT_V8SI_UQI:
35025 case V4SI_FTYPE_PCINT_V4SI_UQI:
35026 case V8DI_FTYPE_PCINT64_V8DI_UQI:
35027 case V4DI_FTYPE_PCINT64_V4DI_UQI:
35028 case V2DI_FTYPE_PCINT64_V2DI_UQI:
35029 case V8DF_FTYPE_PCDOUBLE_V8DF_UQI:
35030 case V4DF_FTYPE_PCDOUBLE_V4DF_UQI:
35031 case V2DF_FTYPE_PCDOUBLE_V2DF_UQI:
35032 case V16SF_FTYPE_PCFLOAT_V16SF_UHI:
35033 case V8SF_FTYPE_PCFLOAT_V8SF_UQI:
35034 case V4SF_FTYPE_PCFLOAT_V4SF_UQI:
35035 nargs = 3;
35036 klass = load;
35037 memory = 0;
35038 break;
35039 case VOID_FTYPE_UINT_UINT_UINT:
35040 case VOID_FTYPE_UINT64_UINT_UINT:
35041 case UCHAR_FTYPE_UINT_UINT_UINT:
35042 case UCHAR_FTYPE_UINT64_UINT_UINT:
35043 nargs = 3;
35044 klass = load;
35045 memory = ARRAY_SIZE (args);
35046 last_arg_constant = true;
35047 break;
35048 default:
35049 gcc_unreachable ();
35050 }
35051
35052 gcc_assert (nargs <= ARRAY_SIZE (args));
35053
35054 if (klass == store)
35055 {
35056 arg = CALL_EXPR_ARG (exp, 0);
35057 op = expand_normal (arg);
35058 gcc_assert (target == 0);
35059 if (memory)
35060 {
35061 op = ix86_zero_extend_to_Pmode (op);
35062 target = gen_rtx_MEM (tmode, op);
35063 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
35064 on it. Try to improve it using get_pointer_alignment,
35065 and if the special builtin is one that requires strict
35066 mode alignment, also from it's GET_MODE_ALIGNMENT.
35067 Failure to do so could lead to ix86_legitimate_combined_insn
35068 rejecting all changes to such insns. */
35069 unsigned int align = get_pointer_alignment (arg);
35070 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
35071 align = GET_MODE_ALIGNMENT (tmode);
35072 if (MEM_ALIGN (target) < align)
35073 set_mem_align (target, align);
35074 }
35075 else
35076 target = force_reg (tmode, op);
35077 arg_adjust = 1;
35078 }
35079 else
35080 {
35081 arg_adjust = 0;
35082 if (optimize
35083 || target == 0
35084 || !register_operand (target, tmode)
35085 || GET_MODE (target) != tmode)
35086 target = gen_reg_rtx (tmode);
35087 }
35088
35089 for (i = 0; i < nargs; i++)
35090 {
35091 machine_mode mode = insn_p->operand[i + 1].mode;
35092 bool match;
35093
35094 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
35095 op = expand_normal (arg);
35096 match = insn_p->operand[i + 1].predicate (op, mode);
35097
35098 if (last_arg_constant && (i + 1) == nargs)
35099 {
35100 if (!match)
35101 {
35102 if (icode == CODE_FOR_lwp_lwpvalsi3
35103 || icode == CODE_FOR_lwp_lwpinssi3
35104 || icode == CODE_FOR_lwp_lwpvaldi3
35105 || icode == CODE_FOR_lwp_lwpinsdi3)
35106 error ("the last argument must be a 32-bit immediate");
35107 else
35108 error ("the last argument must be an 8-bit immediate");
35109 return const0_rtx;
35110 }
35111 }
35112 else
35113 {
35114 if (i == memory)
35115 {
35116 /* This must be the memory operand. */
35117 op = ix86_zero_extend_to_Pmode (op);
35118 op = gen_rtx_MEM (mode, op);
35119 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
35120 on it. Try to improve it using get_pointer_alignment,
35121 and if the special builtin is one that requires strict
35122 mode alignment, also from it's GET_MODE_ALIGNMENT.
35123 Failure to do so could lead to ix86_legitimate_combined_insn
35124 rejecting all changes to such insns. */
35125 unsigned int align = get_pointer_alignment (arg);
35126 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
35127 align = GET_MODE_ALIGNMENT (mode);
35128 if (MEM_ALIGN (op) < align)
35129 set_mem_align (op, align);
35130 }
35131 else
35132 {
35133 /* This must be register. */
35134 if (VECTOR_MODE_P (mode))
35135 op = safe_vector_operand (op, mode);
35136
35137 op = fixup_modeless_constant (op, mode);
35138
35139 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
35140 op = copy_to_mode_reg (mode, op);
35141 else
35142 {
35143 op = copy_to_reg (op);
35144 op = lowpart_subreg (mode, op, GET_MODE (op));
35145 }
35146 }
35147 }
35148
35149 args[i].op = op;
35150 args[i].mode = mode;
35151 }
35152
35153 switch (nargs)
35154 {
35155 case 0:
35156 pat = GEN_FCN (icode) (target);
35157 break;
35158 case 1:
35159 pat = GEN_FCN (icode) (target, args[0].op);
35160 break;
35161 case 2:
35162 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
35163 break;
35164 case 3:
35165 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
35166 break;
35167 default:
35168 gcc_unreachable ();
35169 }
35170
35171 if (! pat)
35172 return 0;
35173 emit_insn (pat);
35174 return klass == store ? 0 : target;
35175 }
35176
35177 /* Return the integer constant in ARG. Constrain it to be in the range
35178 of the subparts of VEC_TYPE; issue an error if not. */
35179
35180 static int
35181 get_element_number (tree vec_type, tree arg)
35182 {
35183 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
35184
35185 if (!tree_fits_uhwi_p (arg)
35186 || (elt = tree_to_uhwi (arg), elt > max))
35187 {
35188 error ("selector must be an integer constant in the range 0..%wi", max);
35189 return 0;
35190 }
35191
35192 return elt;
35193 }
35194
35195 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
35196 ix86_expand_vector_init. We DO have language-level syntax for this, in
35197 the form of (type){ init-list }. Except that since we can't place emms
35198 instructions from inside the compiler, we can't allow the use of MMX
35199 registers unless the user explicitly asks for it. So we do *not* define
35200 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
35201 we have builtins invoked by mmintrin.h that gives us license to emit
35202 these sorts of instructions. */
35203
35204 static rtx
35205 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
35206 {
35207 machine_mode tmode = TYPE_MODE (type);
35208 machine_mode inner_mode = GET_MODE_INNER (tmode);
35209 int i, n_elt = GET_MODE_NUNITS (tmode);
35210 rtvec v = rtvec_alloc (n_elt);
35211
35212 gcc_assert (VECTOR_MODE_P (tmode));
35213 gcc_assert (call_expr_nargs (exp) == n_elt);
35214
35215 for (i = 0; i < n_elt; ++i)
35216 {
35217 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
35218 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
35219 }
35220
35221 if (!target || !register_operand (target, tmode))
35222 target = gen_reg_rtx (tmode);
35223
35224 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
35225 return target;
35226 }
35227
35228 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
35229 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
35230 had a language-level syntax for referencing vector elements. */
35231
35232 static rtx
35233 ix86_expand_vec_ext_builtin (tree exp, rtx target)
35234 {
35235 machine_mode tmode, mode0;
35236 tree arg0, arg1;
35237 int elt;
35238 rtx op0;
35239
35240 arg0 = CALL_EXPR_ARG (exp, 0);
35241 arg1 = CALL_EXPR_ARG (exp, 1);
35242
35243 op0 = expand_normal (arg0);
35244 elt = get_element_number (TREE_TYPE (arg0), arg1);
35245
35246 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
35247 mode0 = TYPE_MODE (TREE_TYPE (arg0));
35248 gcc_assert (VECTOR_MODE_P (mode0));
35249
35250 op0 = force_reg (mode0, op0);
35251
35252 if (optimize || !target || !register_operand (target, tmode))
35253 target = gen_reg_rtx (tmode);
35254
35255 ix86_expand_vector_extract (true, target, op0, elt);
35256
35257 return target;
35258 }
35259
35260 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
35261 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
35262 a language-level syntax for referencing vector elements. */
35263
35264 static rtx
35265 ix86_expand_vec_set_builtin (tree exp)
35266 {
35267 machine_mode tmode, mode1;
35268 tree arg0, arg1, arg2;
35269 int elt;
35270 rtx op0, op1, target;
35271
35272 arg0 = CALL_EXPR_ARG (exp, 0);
35273 arg1 = CALL_EXPR_ARG (exp, 1);
35274 arg2 = CALL_EXPR_ARG (exp, 2);
35275
35276 tmode = TYPE_MODE (TREE_TYPE (arg0));
35277 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
35278 gcc_assert (VECTOR_MODE_P (tmode));
35279
35280 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
35281 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
35282 elt = get_element_number (TREE_TYPE (arg0), arg2);
35283
35284 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
35285 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
35286
35287 op0 = force_reg (tmode, op0);
35288 op1 = force_reg (mode1, op1);
35289
35290 /* OP0 is the source of these builtin functions and shouldn't be
35291 modified. Create a copy, use it and return it as target. */
35292 target = gen_reg_rtx (tmode);
35293 emit_move_insn (target, op0);
35294 ix86_expand_vector_set (true, target, op1, elt);
35295
35296 return target;
35297 }
35298
35299 /* Emit conditional move of SRC to DST with condition
35300 OP1 CODE OP2. */
35301 static void
35302 ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
35303 {
35304 rtx t;
35305
35306 if (TARGET_CMOVE)
35307 {
35308 t = ix86_expand_compare (code, op1, op2);
35309 emit_insn (gen_rtx_SET (dst, gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
35310 src, dst)));
35311 }
35312 else
35313 {
35314 rtx_code_label *nomove = gen_label_rtx ();
35315 emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
35316 const0_rtx, GET_MODE (op1), 1, nomove);
35317 emit_move_insn (dst, src);
35318 emit_label (nomove);
35319 }
35320 }
35321
35322 /* Choose max of DST and SRC and put it to DST. */
35323 static void
35324 ix86_emit_move_max (rtx dst, rtx src)
35325 {
35326 ix86_emit_cmove (dst, src, LTU, dst, src);
35327 }
35328
35329 /* Expand an expression EXP that calls a built-in function,
35330 with result going to TARGET if that's convenient
35331 (and in mode MODE if that's convenient).
35332 SUBTARGET may be used as the target for computing one of EXP's operands.
35333 IGNORE is nonzero if the value is to be ignored. */
35334
35335 static rtx
35336 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
35337 machine_mode mode, int ignore)
35338 {
35339 size_t i;
35340 enum insn_code icode, icode2;
35341 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
35342 tree arg0, arg1, arg2, arg3, arg4;
35343 rtx op0, op1, op2, op3, op4, pat, pat2, insn;
35344 machine_mode mode0, mode1, mode2, mode3, mode4;
35345 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
35346
35347 /* For CPU builtins that can be folded, fold first and expand the fold. */
35348 switch (fcode)
35349 {
35350 case IX86_BUILTIN_CPU_INIT:
35351 {
35352 /* Make it call __cpu_indicator_init in libgcc. */
35353 tree call_expr, fndecl, type;
35354 type = build_function_type_list (integer_type_node, NULL_TREE);
35355 fndecl = build_fn_decl ("__cpu_indicator_init", type);
35356 call_expr = build_call_expr (fndecl, 0);
35357 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
35358 }
35359 case IX86_BUILTIN_CPU_IS:
35360 case IX86_BUILTIN_CPU_SUPPORTS:
35361 {
35362 tree arg0 = CALL_EXPR_ARG (exp, 0);
35363 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
35364 gcc_assert (fold_expr != NULL_TREE);
35365 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
35366 }
35367 }
35368
35369 /* Determine whether the builtin function is available under the current ISA.
35370 Originally the builtin was not created if it wasn't applicable to the
35371 current ISA based on the command line switches. With function specific
35372 options, we need to check in the context of the function making the call
35373 whether it is supported. Treat AVX512VL and MMX specially. For other flags,
35374 if isa includes more than one ISA bit, treat those are requiring any
35375 of them. For AVX512VL, require both AVX512VL and the non-AVX512VL
35376 ISAs. Likewise for MMX, require both MMX and the non-MMX ISAs.
35377 Similarly for 64BIT, but we shouldn't be building such builtins
35378 at all, -m64 is a whole TU option. */
35379 if (((ix86_builtins_isa[fcode].isa
35380 & ~(OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_MMX
35381 | OPTION_MASK_ISA_64BIT | OPTION_MASK_ISA_GFNI
35382 | OPTION_MASK_ISA_VPCLMULQDQ))
35383 && !(ix86_builtins_isa[fcode].isa
35384 & ~(OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_MMX
35385 | OPTION_MASK_ISA_64BIT | OPTION_MASK_ISA_GFNI
35386 | OPTION_MASK_ISA_VPCLMULQDQ)
35387 & ix86_isa_flags))
35388 || ((ix86_builtins_isa[fcode].isa & OPTION_MASK_ISA_AVX512VL)
35389 && !(ix86_isa_flags & OPTION_MASK_ISA_AVX512VL))
35390 || ((ix86_builtins_isa[fcode].isa & OPTION_MASK_ISA_GFNI)
35391 && !(ix86_isa_flags & OPTION_MASK_ISA_GFNI))
35392 || ((ix86_builtins_isa[fcode].isa & OPTION_MASK_ISA_MMX)
35393 && !(ix86_isa_flags & OPTION_MASK_ISA_MMX))
35394 || (ix86_builtins_isa[fcode].isa2
35395 && !(ix86_builtins_isa[fcode].isa2 & ix86_isa_flags2)))
35396 {
35397 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa,
35398 ix86_builtins_isa[fcode].isa2, 0, 0,
35399 NULL, NULL, (enum fpmath_unit) 0,
35400 false);
35401 if (!opts)
35402 error ("%qE needs unknown isa option", fndecl);
35403 else
35404 {
35405 gcc_assert (opts != NULL);
35406 error ("%qE needs isa option %s", fndecl, opts);
35407 free (opts);
35408 }
35409 return expand_call (exp, target, ignore);
35410 }
35411
35412 switch (fcode)
35413 {
35414 case IX86_BUILTIN_BNDMK:
35415 if (!target
35416 || GET_MODE (target) != BNDmode
35417 || !register_operand (target, BNDmode))
35418 target = gen_reg_rtx (BNDmode);
35419
35420 arg0 = CALL_EXPR_ARG (exp, 0);
35421 arg1 = CALL_EXPR_ARG (exp, 1);
35422
35423 op0 = expand_normal (arg0);
35424 op1 = expand_normal (arg1);
35425
35426 if (!register_operand (op0, Pmode))
35427 op0 = ix86_zero_extend_to_Pmode (op0);
35428 if (!register_operand (op1, Pmode))
35429 op1 = ix86_zero_extend_to_Pmode (op1);
35430
35431 /* Builtin arg1 is size of block but instruction op1 should
35432 be (size - 1). */
35433 op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
35434 NULL_RTX, 1, OPTAB_DIRECT);
35435
35436 emit_insn (BNDmode == BND64mode
35437 ? gen_bnd64_mk (target, op0, op1)
35438 : gen_bnd32_mk (target, op0, op1));
35439 return target;
35440
35441 case IX86_BUILTIN_BNDSTX:
35442 arg0 = CALL_EXPR_ARG (exp, 0);
35443 arg1 = CALL_EXPR_ARG (exp, 1);
35444 arg2 = CALL_EXPR_ARG (exp, 2);
35445
35446 op0 = expand_normal (arg0);
35447 op1 = expand_normal (arg1);
35448 op2 = expand_normal (arg2);
35449
35450 if (!register_operand (op0, Pmode))
35451 op0 = ix86_zero_extend_to_Pmode (op0);
35452 if (!register_operand (op1, BNDmode))
35453 op1 = copy_to_mode_reg (BNDmode, op1);
35454 if (!register_operand (op2, Pmode))
35455 op2 = ix86_zero_extend_to_Pmode (op2);
35456
35457 emit_insn (BNDmode == BND64mode
35458 ? gen_bnd64_stx (op2, op0, op1)
35459 : gen_bnd32_stx (op2, op0, op1));
35460 return 0;
35461
35462 case IX86_BUILTIN_BNDLDX:
35463 if (!target
35464 || GET_MODE (target) != BNDmode
35465 || !register_operand (target, BNDmode))
35466 target = gen_reg_rtx (BNDmode);
35467
35468 arg0 = CALL_EXPR_ARG (exp, 0);
35469 arg1 = CALL_EXPR_ARG (exp, 1);
35470
35471 op0 = expand_normal (arg0);
35472 op1 = expand_normal (arg1);
35473
35474 if (!register_operand (op0, Pmode))
35475 op0 = ix86_zero_extend_to_Pmode (op0);
35476 if (!register_operand (op1, Pmode))
35477 op1 = ix86_zero_extend_to_Pmode (op1);
35478
35479 emit_insn (BNDmode == BND64mode
35480 ? gen_bnd64_ldx (target, op0, op1)
35481 : gen_bnd32_ldx (target, op0, op1));
35482 return target;
35483
35484 case IX86_BUILTIN_BNDCL:
35485 arg0 = CALL_EXPR_ARG (exp, 0);
35486 arg1 = CALL_EXPR_ARG (exp, 1);
35487
35488 op0 = expand_normal (arg0);
35489 op1 = expand_normal (arg1);
35490
35491 if (!register_operand (op0, Pmode))
35492 op0 = ix86_zero_extend_to_Pmode (op0);
35493 if (!register_operand (op1, BNDmode))
35494 op1 = copy_to_mode_reg (BNDmode, op1);
35495
35496 emit_insn (BNDmode == BND64mode
35497 ? gen_bnd64_cl (op1, op0)
35498 : gen_bnd32_cl (op1, op0));
35499 return 0;
35500
35501 case IX86_BUILTIN_BNDCU:
35502 arg0 = CALL_EXPR_ARG (exp, 0);
35503 arg1 = CALL_EXPR_ARG (exp, 1);
35504
35505 op0 = expand_normal (arg0);
35506 op1 = expand_normal (arg1);
35507
35508 if (!register_operand (op0, Pmode))
35509 op0 = ix86_zero_extend_to_Pmode (op0);
35510 if (!register_operand (op1, BNDmode))
35511 op1 = copy_to_mode_reg (BNDmode, op1);
35512
35513 emit_insn (BNDmode == BND64mode
35514 ? gen_bnd64_cu (op1, op0)
35515 : gen_bnd32_cu (op1, op0));
35516 return 0;
35517
35518 case IX86_BUILTIN_BNDRET:
35519 arg0 = CALL_EXPR_ARG (exp, 0);
35520 target = chkp_get_rtl_bounds (arg0);
35521
35522 /* If no bounds were specified for returned value,
35523 then use INIT bounds. It usually happens when
35524 some built-in function is expanded. */
35525 if (!target)
35526 {
35527 rtx t1 = gen_reg_rtx (Pmode);
35528 rtx t2 = gen_reg_rtx (Pmode);
35529 target = gen_reg_rtx (BNDmode);
35530 emit_move_insn (t1, const0_rtx);
35531 emit_move_insn (t2, constm1_rtx);
35532 emit_insn (BNDmode == BND64mode
35533 ? gen_bnd64_mk (target, t1, t2)
35534 : gen_bnd32_mk (target, t1, t2));
35535 }
35536
35537 gcc_assert (target && REG_P (target));
35538 return target;
35539
35540 case IX86_BUILTIN_BNDNARROW:
35541 {
35542 rtx m1, m1h1, m1h2, lb, ub, t1;
35543
35544 /* Return value and lb. */
35545 arg0 = CALL_EXPR_ARG (exp, 0);
35546 /* Bounds. */
35547 arg1 = CALL_EXPR_ARG (exp, 1);
35548 /* Size. */
35549 arg2 = CALL_EXPR_ARG (exp, 2);
35550
35551 lb = expand_normal (arg0);
35552 op1 = expand_normal (arg1);
35553 op2 = expand_normal (arg2);
35554
35555 /* Size was passed but we need to use (size - 1) as for bndmk. */
35556 op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
35557 NULL_RTX, 1, OPTAB_DIRECT);
35558
35559 /* Add LB to size and inverse to get UB. */
35560 op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
35561 op2, 1, OPTAB_DIRECT);
35562 ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
35563
35564 if (!register_operand (lb, Pmode))
35565 lb = ix86_zero_extend_to_Pmode (lb);
35566 if (!register_operand (ub, Pmode))
35567 ub = ix86_zero_extend_to_Pmode (ub);
35568
35569 /* We need to move bounds to memory before any computations. */
35570 if (MEM_P (op1))
35571 m1 = op1;
35572 else
35573 {
35574 m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
35575 emit_move_insn (m1, op1);
35576 }
35577
35578 /* Generate mem expression to be used for access to LB and UB. */
35579 m1h1 = adjust_address (m1, Pmode, 0);
35580 m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
35581
35582 t1 = gen_reg_rtx (Pmode);
35583
35584 /* Compute LB. */
35585 emit_move_insn (t1, m1h1);
35586 ix86_emit_move_max (t1, lb);
35587 emit_move_insn (m1h1, t1);
35588
35589 /* Compute UB. UB is stored in 1's complement form. Therefore
35590 we also use max here. */
35591 emit_move_insn (t1, m1h2);
35592 ix86_emit_move_max (t1, ub);
35593 emit_move_insn (m1h2, t1);
35594
35595 op2 = gen_reg_rtx (BNDmode);
35596 emit_move_insn (op2, m1);
35597
35598 return chkp_join_splitted_slot (lb, op2);
35599 }
35600
35601 case IX86_BUILTIN_BNDINT:
35602 {
35603 rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
35604
35605 if (!target
35606 || GET_MODE (target) != BNDmode
35607 || !register_operand (target, BNDmode))
35608 target = gen_reg_rtx (BNDmode);
35609
35610 arg0 = CALL_EXPR_ARG (exp, 0);
35611 arg1 = CALL_EXPR_ARG (exp, 1);
35612
35613 op0 = expand_normal (arg0);
35614 op1 = expand_normal (arg1);
35615
35616 res = assign_386_stack_local (BNDmode, SLOT_TEMP);
35617 rh1 = adjust_address (res, Pmode, 0);
35618 rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
35619
35620 /* Put first bounds to temporaries. */
35621 lb1 = gen_reg_rtx (Pmode);
35622 ub1 = gen_reg_rtx (Pmode);
35623 if (MEM_P (op0))
35624 {
35625 emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
35626 emit_move_insn (ub1, adjust_address (op0, Pmode,
35627 GET_MODE_SIZE (Pmode)));
35628 }
35629 else
35630 {
35631 emit_move_insn (res, op0);
35632 emit_move_insn (lb1, rh1);
35633 emit_move_insn (ub1, rh2);
35634 }
35635
35636 /* Put second bounds to temporaries. */
35637 lb2 = gen_reg_rtx (Pmode);
35638 ub2 = gen_reg_rtx (Pmode);
35639 if (MEM_P (op1))
35640 {
35641 emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
35642 emit_move_insn (ub2, adjust_address (op1, Pmode,
35643 GET_MODE_SIZE (Pmode)));
35644 }
35645 else
35646 {
35647 emit_move_insn (res, op1);
35648 emit_move_insn (lb2, rh1);
35649 emit_move_insn (ub2, rh2);
35650 }
35651
35652 /* Compute LB. */
35653 ix86_emit_move_max (lb1, lb2);
35654 emit_move_insn (rh1, lb1);
35655
35656 /* Compute UB. UB is stored in 1's complement form. Therefore
35657 we also use max here. */
35658 ix86_emit_move_max (ub1, ub2);
35659 emit_move_insn (rh2, ub1);
35660
35661 emit_move_insn (target, res);
35662
35663 return target;
35664 }
35665
35666 case IX86_BUILTIN_SIZEOF:
35667 {
35668 tree name;
35669 rtx symbol;
35670
35671 if (!target
35672 || GET_MODE (target) != Pmode
35673 || !register_operand (target, Pmode))
35674 target = gen_reg_rtx (Pmode);
35675
35676 arg0 = CALL_EXPR_ARG (exp, 0);
35677 gcc_assert (VAR_P (arg0));
35678
35679 name = DECL_ASSEMBLER_NAME (arg0);
35680 symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
35681
35682 emit_insn (Pmode == SImode
35683 ? gen_move_size_reloc_si (target, symbol)
35684 : gen_move_size_reloc_di (target, symbol));
35685
35686 return target;
35687 }
35688
35689 case IX86_BUILTIN_BNDLOWER:
35690 {
35691 rtx mem, hmem;
35692
35693 if (!target
35694 || GET_MODE (target) != Pmode
35695 || !register_operand (target, Pmode))
35696 target = gen_reg_rtx (Pmode);
35697
35698 arg0 = CALL_EXPR_ARG (exp, 0);
35699 op0 = expand_normal (arg0);
35700
35701 /* We need to move bounds to memory first. */
35702 if (MEM_P (op0))
35703 mem = op0;
35704 else
35705 {
35706 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
35707 emit_move_insn (mem, op0);
35708 }
35709
35710 /* Generate mem expression to access LB and load it. */
35711 hmem = adjust_address (mem, Pmode, 0);
35712 emit_move_insn (target, hmem);
35713
35714 return target;
35715 }
35716
35717 case IX86_BUILTIN_BNDUPPER:
35718 {
35719 rtx mem, hmem, res;
35720
35721 if (!target
35722 || GET_MODE (target) != Pmode
35723 || !register_operand (target, Pmode))
35724 target = gen_reg_rtx (Pmode);
35725
35726 arg0 = CALL_EXPR_ARG (exp, 0);
35727 op0 = expand_normal (arg0);
35728
35729 /* We need to move bounds to memory first. */
35730 if (MEM_P (op0))
35731 mem = op0;
35732 else
35733 {
35734 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
35735 emit_move_insn (mem, op0);
35736 }
35737
35738 /* Generate mem expression to access UB. */
35739 hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
35740
35741 /* We need to inverse all bits of UB. */
35742 res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
35743
35744 if (res != target)
35745 emit_move_insn (target, res);
35746
35747 return target;
35748 }
35749
35750 case IX86_BUILTIN_MASKMOVQ:
35751 case IX86_BUILTIN_MASKMOVDQU:
35752 icode = (fcode == IX86_BUILTIN_MASKMOVQ
35753 ? CODE_FOR_mmx_maskmovq
35754 : CODE_FOR_sse2_maskmovdqu);
35755 /* Note the arg order is different from the operand order. */
35756 arg1 = CALL_EXPR_ARG (exp, 0);
35757 arg2 = CALL_EXPR_ARG (exp, 1);
35758 arg0 = CALL_EXPR_ARG (exp, 2);
35759 op0 = expand_normal (arg0);
35760 op1 = expand_normal (arg1);
35761 op2 = expand_normal (arg2);
35762 mode0 = insn_data[icode].operand[0].mode;
35763 mode1 = insn_data[icode].operand[1].mode;
35764 mode2 = insn_data[icode].operand[2].mode;
35765
35766 op0 = ix86_zero_extend_to_Pmode (op0);
35767 op0 = gen_rtx_MEM (mode1, op0);
35768
35769 if (!insn_data[icode].operand[0].predicate (op0, mode0))
35770 op0 = copy_to_mode_reg (mode0, op0);
35771 if (!insn_data[icode].operand[1].predicate (op1, mode1))
35772 op1 = copy_to_mode_reg (mode1, op1);
35773 if (!insn_data[icode].operand[2].predicate (op2, mode2))
35774 op2 = copy_to_mode_reg (mode2, op2);
35775 pat = GEN_FCN (icode) (op0, op1, op2);
35776 if (! pat)
35777 return 0;
35778 emit_insn (pat);
35779 return 0;
35780
35781 case IX86_BUILTIN_LDMXCSR:
35782 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
35783 target = assign_386_stack_local (SImode, SLOT_TEMP);
35784 emit_move_insn (target, op0);
35785 emit_insn (gen_sse_ldmxcsr (target));
35786 return 0;
35787
35788 case IX86_BUILTIN_STMXCSR:
35789 target = assign_386_stack_local (SImode, SLOT_TEMP);
35790 emit_insn (gen_sse_stmxcsr (target));
35791 return copy_to_mode_reg (SImode, target);
35792
35793 case IX86_BUILTIN_CLFLUSH:
35794 arg0 = CALL_EXPR_ARG (exp, 0);
35795 op0 = expand_normal (arg0);
35796 icode = CODE_FOR_sse2_clflush;
35797 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35798 op0 = ix86_zero_extend_to_Pmode (op0);
35799
35800 emit_insn (gen_sse2_clflush (op0));
35801 return 0;
35802
35803 case IX86_BUILTIN_CLWB:
35804 arg0 = CALL_EXPR_ARG (exp, 0);
35805 op0 = expand_normal (arg0);
35806 icode = CODE_FOR_clwb;
35807 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35808 op0 = ix86_zero_extend_to_Pmode (op0);
35809
35810 emit_insn (gen_clwb (op0));
35811 return 0;
35812
35813 case IX86_BUILTIN_CLFLUSHOPT:
35814 arg0 = CALL_EXPR_ARG (exp, 0);
35815 op0 = expand_normal (arg0);
35816 icode = CODE_FOR_clflushopt;
35817 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35818 op0 = ix86_zero_extend_to_Pmode (op0);
35819
35820 emit_insn (gen_clflushopt (op0));
35821 return 0;
35822
35823 case IX86_BUILTIN_MONITOR:
35824 case IX86_BUILTIN_MONITORX:
35825 arg0 = CALL_EXPR_ARG (exp, 0);
35826 arg1 = CALL_EXPR_ARG (exp, 1);
35827 arg2 = CALL_EXPR_ARG (exp, 2);
35828 op0 = expand_normal (arg0);
35829 op1 = expand_normal (arg1);
35830 op2 = expand_normal (arg2);
35831 if (!REG_P (op0))
35832 op0 = ix86_zero_extend_to_Pmode (op0);
35833 if (!REG_P (op1))
35834 op1 = copy_to_mode_reg (SImode, op1);
35835 if (!REG_P (op2))
35836 op2 = copy_to_mode_reg (SImode, op2);
35837
35838 emit_insn (fcode == IX86_BUILTIN_MONITOR
35839 ? ix86_gen_monitor (op0, op1, op2)
35840 : ix86_gen_monitorx (op0, op1, op2));
35841 return 0;
35842
35843 case IX86_BUILTIN_MWAIT:
35844 arg0 = CALL_EXPR_ARG (exp, 0);
35845 arg1 = CALL_EXPR_ARG (exp, 1);
35846 op0 = expand_normal (arg0);
35847 op1 = expand_normal (arg1);
35848 if (!REG_P (op0))
35849 op0 = copy_to_mode_reg (SImode, op0);
35850 if (!REG_P (op1))
35851 op1 = copy_to_mode_reg (SImode, op1);
35852 emit_insn (gen_sse3_mwait (op0, op1));
35853 return 0;
35854
35855 case IX86_BUILTIN_MWAITX:
35856 arg0 = CALL_EXPR_ARG (exp, 0);
35857 arg1 = CALL_EXPR_ARG (exp, 1);
35858 arg2 = CALL_EXPR_ARG (exp, 2);
35859 op0 = expand_normal (arg0);
35860 op1 = expand_normal (arg1);
35861 op2 = expand_normal (arg2);
35862 if (!REG_P (op0))
35863 op0 = copy_to_mode_reg (SImode, op0);
35864 if (!REG_P (op1))
35865 op1 = copy_to_mode_reg (SImode, op1);
35866 if (!REG_P (op2))
35867 op2 = copy_to_mode_reg (SImode, op2);
35868 emit_insn (gen_mwaitx (op0, op1, op2));
35869 return 0;
35870
35871 case IX86_BUILTIN_CLZERO:
35872 arg0 = CALL_EXPR_ARG (exp, 0);
35873 op0 = expand_normal (arg0);
35874 if (!REG_P (op0))
35875 op0 = ix86_zero_extend_to_Pmode (op0);
35876 emit_insn (ix86_gen_clzero (op0));
35877 return 0;
35878
35879 case IX86_BUILTIN_VEC_INIT_V2SI:
35880 case IX86_BUILTIN_VEC_INIT_V4HI:
35881 case IX86_BUILTIN_VEC_INIT_V8QI:
35882 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
35883
35884 case IX86_BUILTIN_VEC_EXT_V2DF:
35885 case IX86_BUILTIN_VEC_EXT_V2DI:
35886 case IX86_BUILTIN_VEC_EXT_V4SF:
35887 case IX86_BUILTIN_VEC_EXT_V4SI:
35888 case IX86_BUILTIN_VEC_EXT_V8HI:
35889 case IX86_BUILTIN_VEC_EXT_V2SI:
35890 case IX86_BUILTIN_VEC_EXT_V4HI:
35891 case IX86_BUILTIN_VEC_EXT_V16QI:
35892 return ix86_expand_vec_ext_builtin (exp, target);
35893
35894 case IX86_BUILTIN_VEC_SET_V2DI:
35895 case IX86_BUILTIN_VEC_SET_V4SF:
35896 case IX86_BUILTIN_VEC_SET_V4SI:
35897 case IX86_BUILTIN_VEC_SET_V8HI:
35898 case IX86_BUILTIN_VEC_SET_V4HI:
35899 case IX86_BUILTIN_VEC_SET_V16QI:
35900 return ix86_expand_vec_set_builtin (exp);
35901
35902 case IX86_BUILTIN_NANQ:
35903 case IX86_BUILTIN_NANSQ:
35904 return expand_call (exp, target, ignore);
35905
35906 case IX86_BUILTIN_RDPMC:
35907 case IX86_BUILTIN_RDTSC:
35908 case IX86_BUILTIN_RDTSCP:
35909 case IX86_BUILTIN_XGETBV:
35910
35911 op0 = gen_reg_rtx (DImode);
35912 op1 = gen_reg_rtx (DImode);
35913
35914 if (fcode == IX86_BUILTIN_RDPMC)
35915 {
35916 arg0 = CALL_EXPR_ARG (exp, 0);
35917 op2 = expand_normal (arg0);
35918 if (!register_operand (op2, SImode))
35919 op2 = copy_to_mode_reg (SImode, op2);
35920
35921 insn = (TARGET_64BIT
35922 ? gen_rdpmc_rex64 (op0, op1, op2)
35923 : gen_rdpmc (op0, op2));
35924 emit_insn (insn);
35925 }
35926 else if (fcode == IX86_BUILTIN_XGETBV)
35927 {
35928 arg0 = CALL_EXPR_ARG (exp, 0);
35929 op2 = expand_normal (arg0);
35930 if (!register_operand (op2, SImode))
35931 op2 = copy_to_mode_reg (SImode, op2);
35932
35933 insn = (TARGET_64BIT
35934 ? gen_xgetbv_rex64 (op0, op1, op2)
35935 : gen_xgetbv (op0, op2));
35936 emit_insn (insn);
35937 }
35938 else if (fcode == IX86_BUILTIN_RDTSC)
35939 {
35940 insn = (TARGET_64BIT
35941 ? gen_rdtsc_rex64 (op0, op1)
35942 : gen_rdtsc (op0));
35943 emit_insn (insn);
35944 }
35945 else
35946 {
35947 op2 = gen_reg_rtx (SImode);
35948
35949 insn = (TARGET_64BIT
35950 ? gen_rdtscp_rex64 (op0, op1, op2)
35951 : gen_rdtscp (op0, op2));
35952 emit_insn (insn);
35953
35954 arg0 = CALL_EXPR_ARG (exp, 0);
35955 op4 = expand_normal (arg0);
35956 if (!address_operand (op4, VOIDmode))
35957 {
35958 op4 = convert_memory_address (Pmode, op4);
35959 op4 = copy_addr_to_reg (op4);
35960 }
35961 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
35962 }
35963
35964 if (target == 0)
35965 {
35966 /* mode is VOIDmode if __builtin_rd* has been called
35967 without lhs. */
35968 if (mode == VOIDmode)
35969 return target;
35970 target = gen_reg_rtx (mode);
35971 }
35972
35973 if (TARGET_64BIT)
35974 {
35975 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
35976 op1, 1, OPTAB_DIRECT);
35977 op0 = expand_simple_binop (DImode, IOR, op0, op1,
35978 op0, 1, OPTAB_DIRECT);
35979 }
35980
35981 emit_move_insn (target, op0);
35982 return target;
35983
35984 case IX86_BUILTIN_FXSAVE:
35985 case IX86_BUILTIN_FXRSTOR:
35986 case IX86_BUILTIN_FXSAVE64:
35987 case IX86_BUILTIN_FXRSTOR64:
35988 case IX86_BUILTIN_FNSTENV:
35989 case IX86_BUILTIN_FLDENV:
35990 mode0 = BLKmode;
35991 switch (fcode)
35992 {
35993 case IX86_BUILTIN_FXSAVE:
35994 icode = CODE_FOR_fxsave;
35995 break;
35996 case IX86_BUILTIN_FXRSTOR:
35997 icode = CODE_FOR_fxrstor;
35998 break;
35999 case IX86_BUILTIN_FXSAVE64:
36000 icode = CODE_FOR_fxsave64;
36001 break;
36002 case IX86_BUILTIN_FXRSTOR64:
36003 icode = CODE_FOR_fxrstor64;
36004 break;
36005 case IX86_BUILTIN_FNSTENV:
36006 icode = CODE_FOR_fnstenv;
36007 break;
36008 case IX86_BUILTIN_FLDENV:
36009 icode = CODE_FOR_fldenv;
36010 break;
36011 default:
36012 gcc_unreachable ();
36013 }
36014
36015 arg0 = CALL_EXPR_ARG (exp, 0);
36016 op0 = expand_normal (arg0);
36017
36018 if (!address_operand (op0, VOIDmode))
36019 {
36020 op0 = convert_memory_address (Pmode, op0);
36021 op0 = copy_addr_to_reg (op0);
36022 }
36023 op0 = gen_rtx_MEM (mode0, op0);
36024
36025 pat = GEN_FCN (icode) (op0);
36026 if (pat)
36027 emit_insn (pat);
36028 return 0;
36029
36030 case IX86_BUILTIN_XSETBV:
36031 arg0 = CALL_EXPR_ARG (exp, 0);
36032 arg1 = CALL_EXPR_ARG (exp, 1);
36033 op0 = expand_normal (arg0);
36034 op1 = expand_normal (arg1);
36035
36036 if (!REG_P (op0))
36037 op0 = copy_to_mode_reg (SImode, op0);
36038
36039 if (TARGET_64BIT)
36040 {
36041 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
36042 NULL, 1, OPTAB_DIRECT);
36043
36044 op2 = gen_lowpart (SImode, op2);
36045 op1 = gen_lowpart (SImode, op1);
36046 if (!REG_P (op1))
36047 op1 = copy_to_mode_reg (SImode, op1);
36048 if (!REG_P (op2))
36049 op2 = copy_to_mode_reg (SImode, op2);
36050 icode = CODE_FOR_xsetbv_rex64;
36051 pat = GEN_FCN (icode) (op0, op1, op2);
36052 }
36053 else
36054 {
36055 if (!REG_P (op1))
36056 op1 = copy_to_mode_reg (DImode, op1);
36057 icode = CODE_FOR_xsetbv;
36058 pat = GEN_FCN (icode) (op0, op1);
36059 }
36060 if (pat)
36061 emit_insn (pat);
36062 return 0;
36063
36064 case IX86_BUILTIN_XSAVE:
36065 case IX86_BUILTIN_XRSTOR:
36066 case IX86_BUILTIN_XSAVE64:
36067 case IX86_BUILTIN_XRSTOR64:
36068 case IX86_BUILTIN_XSAVEOPT:
36069 case IX86_BUILTIN_XSAVEOPT64:
36070 case IX86_BUILTIN_XSAVES:
36071 case IX86_BUILTIN_XRSTORS:
36072 case IX86_BUILTIN_XSAVES64:
36073 case IX86_BUILTIN_XRSTORS64:
36074 case IX86_BUILTIN_XSAVEC:
36075 case IX86_BUILTIN_XSAVEC64:
36076 arg0 = CALL_EXPR_ARG (exp, 0);
36077 arg1 = CALL_EXPR_ARG (exp, 1);
36078 op0 = expand_normal (arg0);
36079 op1 = expand_normal (arg1);
36080
36081 if (!address_operand (op0, VOIDmode))
36082 {
36083 op0 = convert_memory_address (Pmode, op0);
36084 op0 = copy_addr_to_reg (op0);
36085 }
36086 op0 = gen_rtx_MEM (BLKmode, op0);
36087
36088 op1 = force_reg (DImode, op1);
36089
36090 if (TARGET_64BIT)
36091 {
36092 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
36093 NULL, 1, OPTAB_DIRECT);
36094 switch (fcode)
36095 {
36096 case IX86_BUILTIN_XSAVE:
36097 icode = CODE_FOR_xsave_rex64;
36098 break;
36099 case IX86_BUILTIN_XRSTOR:
36100 icode = CODE_FOR_xrstor_rex64;
36101 break;
36102 case IX86_BUILTIN_XSAVE64:
36103 icode = CODE_FOR_xsave64;
36104 break;
36105 case IX86_BUILTIN_XRSTOR64:
36106 icode = CODE_FOR_xrstor64;
36107 break;
36108 case IX86_BUILTIN_XSAVEOPT:
36109 icode = CODE_FOR_xsaveopt_rex64;
36110 break;
36111 case IX86_BUILTIN_XSAVEOPT64:
36112 icode = CODE_FOR_xsaveopt64;
36113 break;
36114 case IX86_BUILTIN_XSAVES:
36115 icode = CODE_FOR_xsaves_rex64;
36116 break;
36117 case IX86_BUILTIN_XRSTORS:
36118 icode = CODE_FOR_xrstors_rex64;
36119 break;
36120 case IX86_BUILTIN_XSAVES64:
36121 icode = CODE_FOR_xsaves64;
36122 break;
36123 case IX86_BUILTIN_XRSTORS64:
36124 icode = CODE_FOR_xrstors64;
36125 break;
36126 case IX86_BUILTIN_XSAVEC:
36127 icode = CODE_FOR_xsavec_rex64;
36128 break;
36129 case IX86_BUILTIN_XSAVEC64:
36130 icode = CODE_FOR_xsavec64;
36131 break;
36132 default:
36133 gcc_unreachable ();
36134 }
36135
36136 op2 = gen_lowpart (SImode, op2);
36137 op1 = gen_lowpart (SImode, op1);
36138 pat = GEN_FCN (icode) (op0, op1, op2);
36139 }
36140 else
36141 {
36142 switch (fcode)
36143 {
36144 case IX86_BUILTIN_XSAVE:
36145 icode = CODE_FOR_xsave;
36146 break;
36147 case IX86_BUILTIN_XRSTOR:
36148 icode = CODE_FOR_xrstor;
36149 break;
36150 case IX86_BUILTIN_XSAVEOPT:
36151 icode = CODE_FOR_xsaveopt;
36152 break;
36153 case IX86_BUILTIN_XSAVES:
36154 icode = CODE_FOR_xsaves;
36155 break;
36156 case IX86_BUILTIN_XRSTORS:
36157 icode = CODE_FOR_xrstors;
36158 break;
36159 case IX86_BUILTIN_XSAVEC:
36160 icode = CODE_FOR_xsavec;
36161 break;
36162 default:
36163 gcc_unreachable ();
36164 }
36165 pat = GEN_FCN (icode) (op0, op1);
36166 }
36167
36168 if (pat)
36169 emit_insn (pat);
36170 return 0;
36171
36172 case IX86_BUILTIN_LLWPCB:
36173 arg0 = CALL_EXPR_ARG (exp, 0);
36174 op0 = expand_normal (arg0);
36175 icode = CODE_FOR_lwp_llwpcb;
36176 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
36177 op0 = ix86_zero_extend_to_Pmode (op0);
36178 emit_insn (gen_lwp_llwpcb (op0));
36179 return 0;
36180
36181 case IX86_BUILTIN_SLWPCB:
36182 icode = CODE_FOR_lwp_slwpcb;
36183 if (!target
36184 || !insn_data[icode].operand[0].predicate (target, Pmode))
36185 target = gen_reg_rtx (Pmode);
36186 emit_insn (gen_lwp_slwpcb (target));
36187 return target;
36188
36189 case IX86_BUILTIN_BEXTRI32:
36190 case IX86_BUILTIN_BEXTRI64:
36191 arg0 = CALL_EXPR_ARG (exp, 0);
36192 arg1 = CALL_EXPR_ARG (exp, 1);
36193 op0 = expand_normal (arg0);
36194 op1 = expand_normal (arg1);
36195 icode = (fcode == IX86_BUILTIN_BEXTRI32
36196 ? CODE_FOR_tbm_bextri_si
36197 : CODE_FOR_tbm_bextri_di);
36198 if (!CONST_INT_P (op1))
36199 {
36200 error ("last argument must be an immediate");
36201 return const0_rtx;
36202 }
36203 else
36204 {
36205 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
36206 unsigned char lsb_index = INTVAL (op1) & 0xFF;
36207 op1 = GEN_INT (length);
36208 op2 = GEN_INT (lsb_index);
36209 pat = GEN_FCN (icode) (target, op0, op1, op2);
36210 if (pat)
36211 emit_insn (pat);
36212 return target;
36213 }
36214
36215 case IX86_BUILTIN_RDRAND16_STEP:
36216 icode = CODE_FOR_rdrandhi_1;
36217 mode0 = HImode;
36218 goto rdrand_step;
36219
36220 case IX86_BUILTIN_RDRAND32_STEP:
36221 icode = CODE_FOR_rdrandsi_1;
36222 mode0 = SImode;
36223 goto rdrand_step;
36224
36225 case IX86_BUILTIN_RDRAND64_STEP:
36226 icode = CODE_FOR_rdranddi_1;
36227 mode0 = DImode;
36228
36229 rdrand_step:
36230 arg0 = CALL_EXPR_ARG (exp, 0);
36231 op1 = expand_normal (arg0);
36232 if (!address_operand (op1, VOIDmode))
36233 {
36234 op1 = convert_memory_address (Pmode, op1);
36235 op1 = copy_addr_to_reg (op1);
36236 }
36237
36238 op0 = gen_reg_rtx (mode0);
36239 emit_insn (GEN_FCN (icode) (op0));
36240
36241 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
36242
36243 op1 = gen_reg_rtx (SImode);
36244 emit_move_insn (op1, CONST1_RTX (SImode));
36245
36246 /* Emit SImode conditional move. */
36247 if (mode0 == HImode)
36248 {
36249 if (TARGET_ZERO_EXTEND_WITH_AND
36250 && optimize_function_for_speed_p (cfun))
36251 {
36252 op2 = force_reg (SImode, const0_rtx);
36253
36254 emit_insn (gen_movstricthi
36255 (gen_lowpart (HImode, op2), op0));
36256 }
36257 else
36258 {
36259 op2 = gen_reg_rtx (SImode);
36260
36261 emit_insn (gen_zero_extendhisi2 (op2, op0));
36262 }
36263 }
36264 else if (mode0 == SImode)
36265 op2 = op0;
36266 else
36267 op2 = gen_rtx_SUBREG (SImode, op0, 0);
36268
36269 if (target == 0
36270 || !register_operand (target, SImode))
36271 target = gen_reg_rtx (SImode);
36272
36273 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
36274 const0_rtx);
36275 emit_insn (gen_rtx_SET (target,
36276 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
36277 return target;
36278
36279 case IX86_BUILTIN_RDSEED16_STEP:
36280 icode = CODE_FOR_rdseedhi_1;
36281 mode0 = HImode;
36282 goto rdseed_step;
36283
36284 case IX86_BUILTIN_RDSEED32_STEP:
36285 icode = CODE_FOR_rdseedsi_1;
36286 mode0 = SImode;
36287 goto rdseed_step;
36288
36289 case IX86_BUILTIN_RDSEED64_STEP:
36290 icode = CODE_FOR_rdseeddi_1;
36291 mode0 = DImode;
36292
36293 rdseed_step:
36294 arg0 = CALL_EXPR_ARG (exp, 0);
36295 op1 = expand_normal (arg0);
36296 if (!address_operand (op1, VOIDmode))
36297 {
36298 op1 = convert_memory_address (Pmode, op1);
36299 op1 = copy_addr_to_reg (op1);
36300 }
36301
36302 op0 = gen_reg_rtx (mode0);
36303 emit_insn (GEN_FCN (icode) (op0));
36304
36305 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
36306
36307 op2 = gen_reg_rtx (QImode);
36308
36309 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
36310 const0_rtx);
36311 emit_insn (gen_rtx_SET (op2, pat));
36312
36313 if (target == 0
36314 || !register_operand (target, SImode))
36315 target = gen_reg_rtx (SImode);
36316
36317 emit_insn (gen_zero_extendqisi2 (target, op2));
36318 return target;
36319
36320 case IX86_BUILTIN_SBB32:
36321 icode = CODE_FOR_subborrowsi;
36322 icode2 = CODE_FOR_subborrowsi_0;
36323 mode0 = SImode;
36324 mode1 = DImode;
36325 mode2 = CCmode;
36326 goto handlecarry;
36327
36328 case IX86_BUILTIN_SBB64:
36329 icode = CODE_FOR_subborrowdi;
36330 icode2 = CODE_FOR_subborrowdi_0;
36331 mode0 = DImode;
36332 mode1 = TImode;
36333 mode2 = CCmode;
36334 goto handlecarry;
36335
36336 case IX86_BUILTIN_ADDCARRYX32:
36337 icode = CODE_FOR_addcarrysi;
36338 icode2 = CODE_FOR_addcarrysi_0;
36339 mode0 = SImode;
36340 mode1 = DImode;
36341 mode2 = CCCmode;
36342 goto handlecarry;
36343
36344 case IX86_BUILTIN_ADDCARRYX64:
36345 icode = CODE_FOR_addcarrydi;
36346 icode2 = CODE_FOR_addcarrydi_0;
36347 mode0 = DImode;
36348 mode1 = TImode;
36349 mode2 = CCCmode;
36350
36351 handlecarry:
36352 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
36353 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
36354 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
36355 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
36356
36357 op1 = expand_normal (arg0);
36358 if (!integer_zerop (arg0))
36359 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
36360
36361 op2 = expand_normal (arg1);
36362 if (!register_operand (op2, mode0))
36363 op2 = copy_to_mode_reg (mode0, op2);
36364
36365 op3 = expand_normal (arg2);
36366 if (!register_operand (op3, mode0))
36367 op3 = copy_to_mode_reg (mode0, op3);
36368
36369 op4 = expand_normal (arg3);
36370 if (!address_operand (op4, VOIDmode))
36371 {
36372 op4 = convert_memory_address (Pmode, op4);
36373 op4 = copy_addr_to_reg (op4);
36374 }
36375
36376 op0 = gen_reg_rtx (mode0);
36377 if (integer_zerop (arg0))
36378 {
36379 /* If arg0 is 0, optimize right away into add or sub
36380 instruction that sets CCCmode flags. */
36381 op1 = gen_rtx_REG (mode2, FLAGS_REG);
36382 emit_insn (GEN_FCN (icode2) (op0, op2, op3));
36383 }
36384 else
36385 {
36386 /* Generate CF from input operand. */
36387 emit_insn (gen_addqi3_cconly_overflow (op1, constm1_rtx));
36388
36389 /* Generate instruction that consumes CF. */
36390 op1 = gen_rtx_REG (CCCmode, FLAGS_REG);
36391 pat = gen_rtx_LTU (mode1, op1, const0_rtx);
36392 pat2 = gen_rtx_LTU (mode0, op1, const0_rtx);
36393 emit_insn (GEN_FCN (icode) (op0, op2, op3, op1, pat, pat2));
36394 }
36395
36396 /* Return current CF value. */
36397 if (target == 0)
36398 target = gen_reg_rtx (QImode);
36399
36400 pat = gen_rtx_LTU (QImode, op1, const0_rtx);
36401 emit_insn (gen_rtx_SET (target, pat));
36402
36403 /* Store the result. */
36404 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
36405
36406 return target;
36407
36408 case IX86_BUILTIN_READ_FLAGS:
36409 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
36410
36411 if (optimize
36412 || target == NULL_RTX
36413 || !nonimmediate_operand (target, word_mode)
36414 || GET_MODE (target) != word_mode)
36415 target = gen_reg_rtx (word_mode);
36416
36417 emit_insn (gen_pop (target));
36418 return target;
36419
36420 case IX86_BUILTIN_WRITE_FLAGS:
36421
36422 arg0 = CALL_EXPR_ARG (exp, 0);
36423 op0 = expand_normal (arg0);
36424 if (!general_no_elim_operand (op0, word_mode))
36425 op0 = copy_to_mode_reg (word_mode, op0);
36426
36427 emit_insn (gen_push (op0));
36428 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
36429 return 0;
36430
36431 case IX86_BUILTIN_KTESTC8:
36432 icode = CODE_FOR_ktestqi;
36433 mode3 = CCCmode;
36434 goto kortest;
36435
36436 case IX86_BUILTIN_KTESTZ8:
36437 icode = CODE_FOR_ktestqi;
36438 mode3 = CCZmode;
36439 goto kortest;
36440
36441 case IX86_BUILTIN_KTESTC16:
36442 icode = CODE_FOR_ktesthi;
36443 mode3 = CCCmode;
36444 goto kortest;
36445
36446 case IX86_BUILTIN_KTESTZ16:
36447 icode = CODE_FOR_ktesthi;
36448 mode3 = CCZmode;
36449 goto kortest;
36450
36451 case IX86_BUILTIN_KTESTC32:
36452 icode = CODE_FOR_ktestsi;
36453 mode3 = CCCmode;
36454 goto kortest;
36455
36456 case IX86_BUILTIN_KTESTZ32:
36457 icode = CODE_FOR_ktestsi;
36458 mode3 = CCZmode;
36459 goto kortest;
36460
36461 case IX86_BUILTIN_KTESTC64:
36462 icode = CODE_FOR_ktestdi;
36463 mode3 = CCCmode;
36464 goto kortest;
36465
36466 case IX86_BUILTIN_KTESTZ64:
36467 icode = CODE_FOR_ktestdi;
36468 mode3 = CCZmode;
36469 goto kortest;
36470
36471 case IX86_BUILTIN_KORTESTC8:
36472 icode = CODE_FOR_kortestqi;
36473 mode3 = CCCmode;
36474 goto kortest;
36475
36476 case IX86_BUILTIN_KORTESTZ8:
36477 icode = CODE_FOR_kortestqi;
36478 mode3 = CCZmode;
36479 goto kortest;
36480
36481 case IX86_BUILTIN_KORTESTC16:
36482 icode = CODE_FOR_kortesthi;
36483 mode3 = CCCmode;
36484 goto kortest;
36485
36486 case IX86_BUILTIN_KORTESTZ16:
36487 icode = CODE_FOR_kortesthi;
36488 mode3 = CCZmode;
36489 goto kortest;
36490
36491 case IX86_BUILTIN_KORTESTC32:
36492 icode = CODE_FOR_kortestsi;
36493 mode3 = CCCmode;
36494 goto kortest;
36495
36496 case IX86_BUILTIN_KORTESTZ32:
36497 icode = CODE_FOR_kortestsi;
36498 mode3 = CCZmode;
36499 goto kortest;
36500
36501 case IX86_BUILTIN_KORTESTC64:
36502 icode = CODE_FOR_kortestdi;
36503 mode3 = CCCmode;
36504 goto kortest;
36505
36506 case IX86_BUILTIN_KORTESTZ64:
36507 icode = CODE_FOR_kortestdi;
36508 mode3 = CCZmode;
36509
36510 kortest:
36511 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
36512 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
36513 op0 = expand_normal (arg0);
36514 op1 = expand_normal (arg1);
36515
36516 mode0 = insn_data[icode].operand[0].mode;
36517 mode1 = insn_data[icode].operand[1].mode;
36518
36519 if (GET_MODE (op0) != VOIDmode)
36520 op0 = force_reg (GET_MODE (op0), op0);
36521
36522 op0 = gen_lowpart (mode0, op0);
36523
36524 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36525 op0 = copy_to_mode_reg (mode0, op0);
36526
36527 if (GET_MODE (op1) != VOIDmode)
36528 op1 = force_reg (GET_MODE (op1), op1);
36529
36530 op1 = gen_lowpart (mode1, op1);
36531
36532 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36533 op1 = copy_to_mode_reg (mode1, op1);
36534
36535 target = gen_reg_rtx (QImode);
36536
36537 /* Emit kortest. */
36538 emit_insn (GEN_FCN (icode) (op0, op1));
36539 /* And use setcc to return result from flags. */
36540 ix86_expand_setcc (target, EQ,
36541 gen_rtx_REG (mode3, FLAGS_REG), const0_rtx);
36542 return target;
36543
36544 case IX86_BUILTIN_GATHERSIV2DF:
36545 icode = CODE_FOR_avx2_gathersiv2df;
36546 goto gather_gen;
36547 case IX86_BUILTIN_GATHERSIV4DF:
36548 icode = CODE_FOR_avx2_gathersiv4df;
36549 goto gather_gen;
36550 case IX86_BUILTIN_GATHERDIV2DF:
36551 icode = CODE_FOR_avx2_gatherdiv2df;
36552 goto gather_gen;
36553 case IX86_BUILTIN_GATHERDIV4DF:
36554 icode = CODE_FOR_avx2_gatherdiv4df;
36555 goto gather_gen;
36556 case IX86_BUILTIN_GATHERSIV4SF:
36557 icode = CODE_FOR_avx2_gathersiv4sf;
36558 goto gather_gen;
36559 case IX86_BUILTIN_GATHERSIV8SF:
36560 icode = CODE_FOR_avx2_gathersiv8sf;
36561 goto gather_gen;
36562 case IX86_BUILTIN_GATHERDIV4SF:
36563 icode = CODE_FOR_avx2_gatherdiv4sf;
36564 goto gather_gen;
36565 case IX86_BUILTIN_GATHERDIV8SF:
36566 icode = CODE_FOR_avx2_gatherdiv8sf;
36567 goto gather_gen;
36568 case IX86_BUILTIN_GATHERSIV2DI:
36569 icode = CODE_FOR_avx2_gathersiv2di;
36570 goto gather_gen;
36571 case IX86_BUILTIN_GATHERSIV4DI:
36572 icode = CODE_FOR_avx2_gathersiv4di;
36573 goto gather_gen;
36574 case IX86_BUILTIN_GATHERDIV2DI:
36575 icode = CODE_FOR_avx2_gatherdiv2di;
36576 goto gather_gen;
36577 case IX86_BUILTIN_GATHERDIV4DI:
36578 icode = CODE_FOR_avx2_gatherdiv4di;
36579 goto gather_gen;
36580 case IX86_BUILTIN_GATHERSIV4SI:
36581 icode = CODE_FOR_avx2_gathersiv4si;
36582 goto gather_gen;
36583 case IX86_BUILTIN_GATHERSIV8SI:
36584 icode = CODE_FOR_avx2_gathersiv8si;
36585 goto gather_gen;
36586 case IX86_BUILTIN_GATHERDIV4SI:
36587 icode = CODE_FOR_avx2_gatherdiv4si;
36588 goto gather_gen;
36589 case IX86_BUILTIN_GATHERDIV8SI:
36590 icode = CODE_FOR_avx2_gatherdiv8si;
36591 goto gather_gen;
36592 case IX86_BUILTIN_GATHERALTSIV4DF:
36593 icode = CODE_FOR_avx2_gathersiv4df;
36594 goto gather_gen;
36595 case IX86_BUILTIN_GATHERALTDIV8SF:
36596 icode = CODE_FOR_avx2_gatherdiv8sf;
36597 goto gather_gen;
36598 case IX86_BUILTIN_GATHERALTSIV4DI:
36599 icode = CODE_FOR_avx2_gathersiv4di;
36600 goto gather_gen;
36601 case IX86_BUILTIN_GATHERALTDIV8SI:
36602 icode = CODE_FOR_avx2_gatherdiv8si;
36603 goto gather_gen;
36604 case IX86_BUILTIN_GATHER3SIV16SF:
36605 icode = CODE_FOR_avx512f_gathersiv16sf;
36606 goto gather_gen;
36607 case IX86_BUILTIN_GATHER3SIV8DF:
36608 icode = CODE_FOR_avx512f_gathersiv8df;
36609 goto gather_gen;
36610 case IX86_BUILTIN_GATHER3DIV16SF:
36611 icode = CODE_FOR_avx512f_gatherdiv16sf;
36612 goto gather_gen;
36613 case IX86_BUILTIN_GATHER3DIV8DF:
36614 icode = CODE_FOR_avx512f_gatherdiv8df;
36615 goto gather_gen;
36616 case IX86_BUILTIN_GATHER3SIV16SI:
36617 icode = CODE_FOR_avx512f_gathersiv16si;
36618 goto gather_gen;
36619 case IX86_BUILTIN_GATHER3SIV8DI:
36620 icode = CODE_FOR_avx512f_gathersiv8di;
36621 goto gather_gen;
36622 case IX86_BUILTIN_GATHER3DIV16SI:
36623 icode = CODE_FOR_avx512f_gatherdiv16si;
36624 goto gather_gen;
36625 case IX86_BUILTIN_GATHER3DIV8DI:
36626 icode = CODE_FOR_avx512f_gatherdiv8di;
36627 goto gather_gen;
36628 case IX86_BUILTIN_GATHER3ALTSIV8DF:
36629 icode = CODE_FOR_avx512f_gathersiv8df;
36630 goto gather_gen;
36631 case IX86_BUILTIN_GATHER3ALTDIV16SF:
36632 icode = CODE_FOR_avx512f_gatherdiv16sf;
36633 goto gather_gen;
36634 case IX86_BUILTIN_GATHER3ALTSIV8DI:
36635 icode = CODE_FOR_avx512f_gathersiv8di;
36636 goto gather_gen;
36637 case IX86_BUILTIN_GATHER3ALTDIV16SI:
36638 icode = CODE_FOR_avx512f_gatherdiv16si;
36639 goto gather_gen;
36640 case IX86_BUILTIN_GATHER3SIV2DF:
36641 icode = CODE_FOR_avx512vl_gathersiv2df;
36642 goto gather_gen;
36643 case IX86_BUILTIN_GATHER3SIV4DF:
36644 icode = CODE_FOR_avx512vl_gathersiv4df;
36645 goto gather_gen;
36646 case IX86_BUILTIN_GATHER3DIV2DF:
36647 icode = CODE_FOR_avx512vl_gatherdiv2df;
36648 goto gather_gen;
36649 case IX86_BUILTIN_GATHER3DIV4DF:
36650 icode = CODE_FOR_avx512vl_gatherdiv4df;
36651 goto gather_gen;
36652 case IX86_BUILTIN_GATHER3SIV4SF:
36653 icode = CODE_FOR_avx512vl_gathersiv4sf;
36654 goto gather_gen;
36655 case IX86_BUILTIN_GATHER3SIV8SF:
36656 icode = CODE_FOR_avx512vl_gathersiv8sf;
36657 goto gather_gen;
36658 case IX86_BUILTIN_GATHER3DIV4SF:
36659 icode = CODE_FOR_avx512vl_gatherdiv4sf;
36660 goto gather_gen;
36661 case IX86_BUILTIN_GATHER3DIV8SF:
36662 icode = CODE_FOR_avx512vl_gatherdiv8sf;
36663 goto gather_gen;
36664 case IX86_BUILTIN_GATHER3SIV2DI:
36665 icode = CODE_FOR_avx512vl_gathersiv2di;
36666 goto gather_gen;
36667 case IX86_BUILTIN_GATHER3SIV4DI:
36668 icode = CODE_FOR_avx512vl_gathersiv4di;
36669 goto gather_gen;
36670 case IX86_BUILTIN_GATHER3DIV2DI:
36671 icode = CODE_FOR_avx512vl_gatherdiv2di;
36672 goto gather_gen;
36673 case IX86_BUILTIN_GATHER3DIV4DI:
36674 icode = CODE_FOR_avx512vl_gatherdiv4di;
36675 goto gather_gen;
36676 case IX86_BUILTIN_GATHER3SIV4SI:
36677 icode = CODE_FOR_avx512vl_gathersiv4si;
36678 goto gather_gen;
36679 case IX86_BUILTIN_GATHER3SIV8SI:
36680 icode = CODE_FOR_avx512vl_gathersiv8si;
36681 goto gather_gen;
36682 case IX86_BUILTIN_GATHER3DIV4SI:
36683 icode = CODE_FOR_avx512vl_gatherdiv4si;
36684 goto gather_gen;
36685 case IX86_BUILTIN_GATHER3DIV8SI:
36686 icode = CODE_FOR_avx512vl_gatherdiv8si;
36687 goto gather_gen;
36688 case IX86_BUILTIN_GATHER3ALTSIV4DF:
36689 icode = CODE_FOR_avx512vl_gathersiv4df;
36690 goto gather_gen;
36691 case IX86_BUILTIN_GATHER3ALTDIV8SF:
36692 icode = CODE_FOR_avx512vl_gatherdiv8sf;
36693 goto gather_gen;
36694 case IX86_BUILTIN_GATHER3ALTSIV4DI:
36695 icode = CODE_FOR_avx512vl_gathersiv4di;
36696 goto gather_gen;
36697 case IX86_BUILTIN_GATHER3ALTDIV8SI:
36698 icode = CODE_FOR_avx512vl_gatherdiv8si;
36699 goto gather_gen;
36700 case IX86_BUILTIN_SCATTERSIV16SF:
36701 icode = CODE_FOR_avx512f_scattersiv16sf;
36702 goto scatter_gen;
36703 case IX86_BUILTIN_SCATTERSIV8DF:
36704 icode = CODE_FOR_avx512f_scattersiv8df;
36705 goto scatter_gen;
36706 case IX86_BUILTIN_SCATTERDIV16SF:
36707 icode = CODE_FOR_avx512f_scatterdiv16sf;
36708 goto scatter_gen;
36709 case IX86_BUILTIN_SCATTERDIV8DF:
36710 icode = CODE_FOR_avx512f_scatterdiv8df;
36711 goto scatter_gen;
36712 case IX86_BUILTIN_SCATTERSIV16SI:
36713 icode = CODE_FOR_avx512f_scattersiv16si;
36714 goto scatter_gen;
36715 case IX86_BUILTIN_SCATTERSIV8DI:
36716 icode = CODE_FOR_avx512f_scattersiv8di;
36717 goto scatter_gen;
36718 case IX86_BUILTIN_SCATTERDIV16SI:
36719 icode = CODE_FOR_avx512f_scatterdiv16si;
36720 goto scatter_gen;
36721 case IX86_BUILTIN_SCATTERDIV8DI:
36722 icode = CODE_FOR_avx512f_scatterdiv8di;
36723 goto scatter_gen;
36724 case IX86_BUILTIN_SCATTERSIV8SF:
36725 icode = CODE_FOR_avx512vl_scattersiv8sf;
36726 goto scatter_gen;
36727 case IX86_BUILTIN_SCATTERSIV4SF:
36728 icode = CODE_FOR_avx512vl_scattersiv4sf;
36729 goto scatter_gen;
36730 case IX86_BUILTIN_SCATTERSIV4DF:
36731 icode = CODE_FOR_avx512vl_scattersiv4df;
36732 goto scatter_gen;
36733 case IX86_BUILTIN_SCATTERSIV2DF:
36734 icode = CODE_FOR_avx512vl_scattersiv2df;
36735 goto scatter_gen;
36736 case IX86_BUILTIN_SCATTERDIV8SF:
36737 icode = CODE_FOR_avx512vl_scatterdiv8sf;
36738 goto scatter_gen;
36739 case IX86_BUILTIN_SCATTERDIV4SF:
36740 icode = CODE_FOR_avx512vl_scatterdiv4sf;
36741 goto scatter_gen;
36742 case IX86_BUILTIN_SCATTERDIV4DF:
36743 icode = CODE_FOR_avx512vl_scatterdiv4df;
36744 goto scatter_gen;
36745 case IX86_BUILTIN_SCATTERDIV2DF:
36746 icode = CODE_FOR_avx512vl_scatterdiv2df;
36747 goto scatter_gen;
36748 case IX86_BUILTIN_SCATTERSIV8SI:
36749 icode = CODE_FOR_avx512vl_scattersiv8si;
36750 goto scatter_gen;
36751 case IX86_BUILTIN_SCATTERSIV4SI:
36752 icode = CODE_FOR_avx512vl_scattersiv4si;
36753 goto scatter_gen;
36754 case IX86_BUILTIN_SCATTERSIV4DI:
36755 icode = CODE_FOR_avx512vl_scattersiv4di;
36756 goto scatter_gen;
36757 case IX86_BUILTIN_SCATTERSIV2DI:
36758 icode = CODE_FOR_avx512vl_scattersiv2di;
36759 goto scatter_gen;
36760 case IX86_BUILTIN_SCATTERDIV8SI:
36761 icode = CODE_FOR_avx512vl_scatterdiv8si;
36762 goto scatter_gen;
36763 case IX86_BUILTIN_SCATTERDIV4SI:
36764 icode = CODE_FOR_avx512vl_scatterdiv4si;
36765 goto scatter_gen;
36766 case IX86_BUILTIN_SCATTERDIV4DI:
36767 icode = CODE_FOR_avx512vl_scatterdiv4di;
36768 goto scatter_gen;
36769 case IX86_BUILTIN_SCATTERDIV2DI:
36770 icode = CODE_FOR_avx512vl_scatterdiv2di;
36771 goto scatter_gen;
36772 case IX86_BUILTIN_GATHERPFDPD:
36773 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
36774 goto vec_prefetch_gen;
36775 case IX86_BUILTIN_SCATTERALTSIV8DF:
36776 icode = CODE_FOR_avx512f_scattersiv8df;
36777 goto scatter_gen;
36778 case IX86_BUILTIN_SCATTERALTDIV16SF:
36779 icode = CODE_FOR_avx512f_scatterdiv16sf;
36780 goto scatter_gen;
36781 case IX86_BUILTIN_SCATTERALTSIV8DI:
36782 icode = CODE_FOR_avx512f_scattersiv8di;
36783 goto scatter_gen;
36784 case IX86_BUILTIN_SCATTERALTDIV16SI:
36785 icode = CODE_FOR_avx512f_scatterdiv16si;
36786 goto scatter_gen;
36787 case IX86_BUILTIN_GATHERPFDPS:
36788 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
36789 goto vec_prefetch_gen;
36790 case IX86_BUILTIN_GATHERPFQPD:
36791 icode = CODE_FOR_avx512pf_gatherpfv8didf;
36792 goto vec_prefetch_gen;
36793 case IX86_BUILTIN_GATHERPFQPS:
36794 icode = CODE_FOR_avx512pf_gatherpfv8disf;
36795 goto vec_prefetch_gen;
36796 case IX86_BUILTIN_SCATTERPFDPD:
36797 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
36798 goto vec_prefetch_gen;
36799 case IX86_BUILTIN_SCATTERPFDPS:
36800 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
36801 goto vec_prefetch_gen;
36802 case IX86_BUILTIN_SCATTERPFQPD:
36803 icode = CODE_FOR_avx512pf_scatterpfv8didf;
36804 goto vec_prefetch_gen;
36805 case IX86_BUILTIN_SCATTERPFQPS:
36806 icode = CODE_FOR_avx512pf_scatterpfv8disf;
36807 goto vec_prefetch_gen;
36808
36809 gather_gen:
36810 rtx half;
36811 rtx (*gen) (rtx, rtx);
36812
36813 arg0 = CALL_EXPR_ARG (exp, 0);
36814 arg1 = CALL_EXPR_ARG (exp, 1);
36815 arg2 = CALL_EXPR_ARG (exp, 2);
36816 arg3 = CALL_EXPR_ARG (exp, 3);
36817 arg4 = CALL_EXPR_ARG (exp, 4);
36818 op0 = expand_normal (arg0);
36819 op1 = expand_normal (arg1);
36820 op2 = expand_normal (arg2);
36821 op3 = expand_normal (arg3);
36822 op4 = expand_normal (arg4);
36823 /* Note the arg order is different from the operand order. */
36824 mode0 = insn_data[icode].operand[1].mode;
36825 mode2 = insn_data[icode].operand[3].mode;
36826 mode3 = insn_data[icode].operand[4].mode;
36827 mode4 = insn_data[icode].operand[5].mode;
36828
36829 if (target == NULL_RTX
36830 || GET_MODE (target) != insn_data[icode].operand[0].mode
36831 || !insn_data[icode].operand[0].predicate (target,
36832 GET_MODE (target)))
36833 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
36834 else
36835 subtarget = target;
36836
36837 switch (fcode)
36838 {
36839 case IX86_BUILTIN_GATHER3ALTSIV8DF:
36840 case IX86_BUILTIN_GATHER3ALTSIV8DI:
36841 half = gen_reg_rtx (V8SImode);
36842 if (!nonimmediate_operand (op2, V16SImode))
36843 op2 = copy_to_mode_reg (V16SImode, op2);
36844 emit_insn (gen_vec_extract_lo_v16si (half, op2));
36845 op2 = half;
36846 break;
36847 case IX86_BUILTIN_GATHER3ALTSIV4DF:
36848 case IX86_BUILTIN_GATHER3ALTSIV4DI:
36849 case IX86_BUILTIN_GATHERALTSIV4DF:
36850 case IX86_BUILTIN_GATHERALTSIV4DI:
36851 half = gen_reg_rtx (V4SImode);
36852 if (!nonimmediate_operand (op2, V8SImode))
36853 op2 = copy_to_mode_reg (V8SImode, op2);
36854 emit_insn (gen_vec_extract_lo_v8si (half, op2));
36855 op2 = half;
36856 break;
36857 case IX86_BUILTIN_GATHER3ALTDIV16SF:
36858 case IX86_BUILTIN_GATHER3ALTDIV16SI:
36859 half = gen_reg_rtx (mode0);
36860 if (mode0 == V8SFmode)
36861 gen = gen_vec_extract_lo_v16sf;
36862 else
36863 gen = gen_vec_extract_lo_v16si;
36864 if (!nonimmediate_operand (op0, GET_MODE (op0)))
36865 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
36866 emit_insn (gen (half, op0));
36867 op0 = half;
36868 if (GET_MODE (op3) != VOIDmode)
36869 {
36870 if (!nonimmediate_operand (op3, GET_MODE (op3)))
36871 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
36872 emit_insn (gen (half, op3));
36873 op3 = half;
36874 }
36875 break;
36876 case IX86_BUILTIN_GATHER3ALTDIV8SF:
36877 case IX86_BUILTIN_GATHER3ALTDIV8SI:
36878 case IX86_BUILTIN_GATHERALTDIV8SF:
36879 case IX86_BUILTIN_GATHERALTDIV8SI:
36880 half = gen_reg_rtx (mode0);
36881 if (mode0 == V4SFmode)
36882 gen = gen_vec_extract_lo_v8sf;
36883 else
36884 gen = gen_vec_extract_lo_v8si;
36885 if (!nonimmediate_operand (op0, GET_MODE (op0)))
36886 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
36887 emit_insn (gen (half, op0));
36888 op0 = half;
36889 if (GET_MODE (op3) != VOIDmode)
36890 {
36891 if (!nonimmediate_operand (op3, GET_MODE (op3)))
36892 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
36893 emit_insn (gen (half, op3));
36894 op3 = half;
36895 }
36896 break;
36897 default:
36898 break;
36899 }
36900
36901 /* Force memory operand only with base register here. But we
36902 don't want to do it on memory operand for other builtin
36903 functions. */
36904 op1 = ix86_zero_extend_to_Pmode (op1);
36905
36906 if (!insn_data[icode].operand[1].predicate (op0, mode0))
36907 op0 = copy_to_mode_reg (mode0, op0);
36908 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
36909 op1 = copy_to_mode_reg (Pmode, op1);
36910 if (!insn_data[icode].operand[3].predicate (op2, mode2))
36911 op2 = copy_to_mode_reg (mode2, op2);
36912
36913 op3 = fixup_modeless_constant (op3, mode3);
36914
36915 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
36916 {
36917 if (!insn_data[icode].operand[4].predicate (op3, mode3))
36918 op3 = copy_to_mode_reg (mode3, op3);
36919 }
36920 else
36921 {
36922 op3 = copy_to_reg (op3);
36923 op3 = lowpart_subreg (mode3, op3, GET_MODE (op3));
36924 }
36925 if (!insn_data[icode].operand[5].predicate (op4, mode4))
36926 {
36927 error ("the last argument must be scale 1, 2, 4, 8");
36928 return const0_rtx;
36929 }
36930
36931 /* Optimize. If mask is known to have all high bits set,
36932 replace op0 with pc_rtx to signal that the instruction
36933 overwrites the whole destination and doesn't use its
36934 previous contents. */
36935 if (optimize)
36936 {
36937 if (TREE_CODE (arg3) == INTEGER_CST)
36938 {
36939 if (integer_all_onesp (arg3))
36940 op0 = pc_rtx;
36941 }
36942 else if (TREE_CODE (arg3) == VECTOR_CST)
36943 {
36944 unsigned int negative = 0;
36945 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
36946 {
36947 tree cst = VECTOR_CST_ELT (arg3, i);
36948 if (TREE_CODE (cst) == INTEGER_CST
36949 && tree_int_cst_sign_bit (cst))
36950 negative++;
36951 else if (TREE_CODE (cst) == REAL_CST
36952 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
36953 negative++;
36954 }
36955 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
36956 op0 = pc_rtx;
36957 }
36958 else if (TREE_CODE (arg3) == SSA_NAME
36959 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
36960 {
36961 /* Recognize also when mask is like:
36962 __v2df src = _mm_setzero_pd ();
36963 __v2df mask = _mm_cmpeq_pd (src, src);
36964 or
36965 __v8sf src = _mm256_setzero_ps ();
36966 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
36967 as that is a cheaper way to load all ones into
36968 a register than having to load a constant from
36969 memory. */
36970 gimple *def_stmt = SSA_NAME_DEF_STMT (arg3);
36971 if (is_gimple_call (def_stmt))
36972 {
36973 tree fndecl = gimple_call_fndecl (def_stmt);
36974 if (fndecl
36975 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
36976 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
36977 {
36978 case IX86_BUILTIN_CMPPD:
36979 case IX86_BUILTIN_CMPPS:
36980 case IX86_BUILTIN_CMPPD256:
36981 case IX86_BUILTIN_CMPPS256:
36982 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
36983 break;
36984 /* FALLTHRU */
36985 case IX86_BUILTIN_CMPEQPD:
36986 case IX86_BUILTIN_CMPEQPS:
36987 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
36988 && initializer_zerop (gimple_call_arg (def_stmt,
36989 1)))
36990 op0 = pc_rtx;
36991 break;
36992 default:
36993 break;
36994 }
36995 }
36996 }
36997 }
36998
36999 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
37000 if (! pat)
37001 return const0_rtx;
37002 emit_insn (pat);
37003
37004 switch (fcode)
37005 {
37006 case IX86_BUILTIN_GATHER3DIV16SF:
37007 if (target == NULL_RTX)
37008 target = gen_reg_rtx (V8SFmode);
37009 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
37010 break;
37011 case IX86_BUILTIN_GATHER3DIV16SI:
37012 if (target == NULL_RTX)
37013 target = gen_reg_rtx (V8SImode);
37014 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
37015 break;
37016 case IX86_BUILTIN_GATHER3DIV8SF:
37017 case IX86_BUILTIN_GATHERDIV8SF:
37018 if (target == NULL_RTX)
37019 target = gen_reg_rtx (V4SFmode);
37020 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
37021 break;
37022 case IX86_BUILTIN_GATHER3DIV8SI:
37023 case IX86_BUILTIN_GATHERDIV8SI:
37024 if (target == NULL_RTX)
37025 target = gen_reg_rtx (V4SImode);
37026 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
37027 break;
37028 default:
37029 target = subtarget;
37030 break;
37031 }
37032 return target;
37033
37034 scatter_gen:
37035 arg0 = CALL_EXPR_ARG (exp, 0);
37036 arg1 = CALL_EXPR_ARG (exp, 1);
37037 arg2 = CALL_EXPR_ARG (exp, 2);
37038 arg3 = CALL_EXPR_ARG (exp, 3);
37039 arg4 = CALL_EXPR_ARG (exp, 4);
37040 op0 = expand_normal (arg0);
37041 op1 = expand_normal (arg1);
37042 op2 = expand_normal (arg2);
37043 op3 = expand_normal (arg3);
37044 op4 = expand_normal (arg4);
37045 mode1 = insn_data[icode].operand[1].mode;
37046 mode2 = insn_data[icode].operand[2].mode;
37047 mode3 = insn_data[icode].operand[3].mode;
37048 mode4 = insn_data[icode].operand[4].mode;
37049
37050 /* Scatter instruction stores operand op3 to memory with
37051 indices from op2 and scale from op4 under writemask op1.
37052 If index operand op2 has more elements then source operand
37053 op3 one need to use only its low half. And vice versa. */
37054 switch (fcode)
37055 {
37056 case IX86_BUILTIN_SCATTERALTSIV8DF:
37057 case IX86_BUILTIN_SCATTERALTSIV8DI:
37058 half = gen_reg_rtx (V8SImode);
37059 if (!nonimmediate_operand (op2, V16SImode))
37060 op2 = copy_to_mode_reg (V16SImode, op2);
37061 emit_insn (gen_vec_extract_lo_v16si (half, op2));
37062 op2 = half;
37063 break;
37064 case IX86_BUILTIN_SCATTERALTDIV16SF:
37065 case IX86_BUILTIN_SCATTERALTDIV16SI:
37066 half = gen_reg_rtx (mode3);
37067 if (mode3 == V8SFmode)
37068 gen = gen_vec_extract_lo_v16sf;
37069 else
37070 gen = gen_vec_extract_lo_v16si;
37071 if (!nonimmediate_operand (op3, GET_MODE (op3)))
37072 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
37073 emit_insn (gen (half, op3));
37074 op3 = half;
37075 break;
37076 default:
37077 break;
37078 }
37079
37080 /* Force memory operand only with base register here. But we
37081 don't want to do it on memory operand for other builtin
37082 functions. */
37083 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
37084
37085 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
37086 op0 = copy_to_mode_reg (Pmode, op0);
37087
37088 op1 = fixup_modeless_constant (op1, mode1);
37089
37090 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
37091 {
37092 if (!insn_data[icode].operand[1].predicate (op1, mode1))
37093 op1 = copy_to_mode_reg (mode1, op1);
37094 }
37095 else
37096 {
37097 op1 = copy_to_reg (op1);
37098 op1 = lowpart_subreg (mode1, op1, GET_MODE (op1));
37099 }
37100
37101 if (!insn_data[icode].operand[2].predicate (op2, mode2))
37102 op2 = copy_to_mode_reg (mode2, op2);
37103
37104 if (!insn_data[icode].operand[3].predicate (op3, mode3))
37105 op3 = copy_to_mode_reg (mode3, op3);
37106
37107 if (!insn_data[icode].operand[4].predicate (op4, mode4))
37108 {
37109 error ("the last argument must be scale 1, 2, 4, 8");
37110 return const0_rtx;
37111 }
37112
37113 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
37114 if (! pat)
37115 return const0_rtx;
37116
37117 emit_insn (pat);
37118 return 0;
37119
37120 vec_prefetch_gen:
37121 arg0 = CALL_EXPR_ARG (exp, 0);
37122 arg1 = CALL_EXPR_ARG (exp, 1);
37123 arg2 = CALL_EXPR_ARG (exp, 2);
37124 arg3 = CALL_EXPR_ARG (exp, 3);
37125 arg4 = CALL_EXPR_ARG (exp, 4);
37126 op0 = expand_normal (arg0);
37127 op1 = expand_normal (arg1);
37128 op2 = expand_normal (arg2);
37129 op3 = expand_normal (arg3);
37130 op4 = expand_normal (arg4);
37131 mode0 = insn_data[icode].operand[0].mode;
37132 mode1 = insn_data[icode].operand[1].mode;
37133 mode3 = insn_data[icode].operand[3].mode;
37134 mode4 = insn_data[icode].operand[4].mode;
37135
37136 op0 = fixup_modeless_constant (op0, mode0);
37137
37138 if (GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
37139 {
37140 if (!insn_data[icode].operand[0].predicate (op0, mode0))
37141 op0 = copy_to_mode_reg (mode0, op0);
37142 }
37143 else
37144 {
37145 op0 = copy_to_reg (op0);
37146 op0 = lowpart_subreg (mode0, op0, GET_MODE (op0));
37147 }
37148
37149 if (!insn_data[icode].operand[1].predicate (op1, mode1))
37150 op1 = copy_to_mode_reg (mode1, op1);
37151
37152 /* Force memory operand only with base register here. But we
37153 don't want to do it on memory operand for other builtin
37154 functions. */
37155 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
37156
37157 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
37158 op2 = copy_to_mode_reg (Pmode, op2);
37159
37160 if (!insn_data[icode].operand[3].predicate (op3, mode3))
37161 {
37162 error ("the forth argument must be scale 1, 2, 4, 8");
37163 return const0_rtx;
37164 }
37165
37166 if (!insn_data[icode].operand[4].predicate (op4, mode4))
37167 {
37168 error ("incorrect hint operand");
37169 return const0_rtx;
37170 }
37171
37172 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
37173 if (! pat)
37174 return const0_rtx;
37175
37176 emit_insn (pat);
37177
37178 return 0;
37179
37180 case IX86_BUILTIN_XABORT:
37181 icode = CODE_FOR_xabort;
37182 arg0 = CALL_EXPR_ARG (exp, 0);
37183 op0 = expand_normal (arg0);
37184 mode0 = insn_data[icode].operand[0].mode;
37185 if (!insn_data[icode].operand[0].predicate (op0, mode0))
37186 {
37187 error ("the xabort's argument must be an 8-bit immediate");
37188 return const0_rtx;
37189 }
37190 emit_insn (gen_xabort (op0));
37191 return 0;
37192
37193 case IX86_BUILTIN_RSTORSSP:
37194 case IX86_BUILTIN_CLRSSBSY:
37195 arg0 = CALL_EXPR_ARG (exp, 0);
37196 op0 = expand_normal (arg0);
37197 icode = (fcode == IX86_BUILTIN_RSTORSSP
37198 ? CODE_FOR_rstorssp
37199 : CODE_FOR_clrssbsy);
37200 if (!address_operand (op0, VOIDmode))
37201 {
37202 op1 = convert_memory_address (Pmode, op0);
37203 op0 = copy_addr_to_reg (op1);
37204 }
37205 emit_insn (GEN_FCN (icode) (gen_rtx_MEM (Pmode, op0)));
37206 return 0;
37207
37208 case IX86_BUILTIN_WRSSD:
37209 case IX86_BUILTIN_WRSSQ:
37210 case IX86_BUILTIN_WRUSSD:
37211 case IX86_BUILTIN_WRUSSQ:
37212 arg0 = CALL_EXPR_ARG (exp, 0);
37213 op0 = expand_normal (arg0);
37214 arg1 = CALL_EXPR_ARG (exp, 1);
37215 op1 = expand_normal (arg1);
37216 switch (fcode)
37217 {
37218 case IX86_BUILTIN_WRSSD:
37219 icode = CODE_FOR_wrsssi;
37220 mode = SImode;
37221 break;
37222 case IX86_BUILTIN_WRSSQ:
37223 icode = CODE_FOR_wrssdi;
37224 mode = DImode;
37225 break;
37226 case IX86_BUILTIN_WRUSSD:
37227 icode = CODE_FOR_wrusssi;
37228 mode = SImode;
37229 break;
37230 case IX86_BUILTIN_WRUSSQ:
37231 icode = CODE_FOR_wrussdi;
37232 mode = DImode;
37233 break;
37234 }
37235 op0 = force_reg (mode, op0);
37236 if (!address_operand (op1, VOIDmode))
37237 {
37238 op2 = convert_memory_address (Pmode, op1);
37239 op1 = copy_addr_to_reg (op2);
37240 }
37241 emit_insn (GEN_FCN (icode) (op0, gen_rtx_MEM (mode, op1)));
37242 return 0;
37243
37244 default:
37245 break;
37246 }
37247
37248 if (fcode >= IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST
37249 && fcode <= IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST)
37250 {
37251 i = fcode - IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST;
37252 return ix86_expand_special_args_builtin (bdesc_special_args + i, exp,
37253 target);
37254 }
37255
37256 if (fcode >= IX86_BUILTIN__BDESC_ARGS_FIRST
37257 && fcode <= IX86_BUILTIN__BDESC_ARGS_LAST)
37258 {
37259 i = fcode - IX86_BUILTIN__BDESC_ARGS_FIRST;
37260 switch (fcode)
37261 {
37262 case IX86_BUILTIN_FABSQ:
37263 case IX86_BUILTIN_COPYSIGNQ:
37264 if (!TARGET_SSE)
37265 /* Emit a normal call if SSE isn't available. */
37266 return expand_call (exp, target, ignore);
37267 /* FALLTHRU */
37268 default:
37269 return ix86_expand_args_builtin (bdesc_args + i, exp, target);
37270 }
37271 }
37272
37273 if (fcode >= IX86_BUILTIN__BDESC_ARGS2_FIRST
37274 && fcode <= IX86_BUILTIN__BDESC_ARGS2_LAST)
37275 {
37276 i = fcode - IX86_BUILTIN__BDESC_ARGS2_FIRST;
37277 rtx (*fcn) (rtx, rtx, rtx, rtx) = NULL;
37278 rtx (*fcn_mask) (rtx, rtx, rtx, rtx, rtx);
37279 rtx (*fcn_maskz) (rtx, rtx, rtx, rtx, rtx, rtx);
37280 int masked = 1;
37281 machine_mode mode, wide_mode, nar_mode;
37282
37283 nar_mode = V4SFmode;
37284 mode = V16SFmode;
37285 wide_mode = V64SFmode;
37286 fcn_mask = gen_avx5124fmaddps_4fmaddps_mask;
37287 fcn_maskz = gen_avx5124fmaddps_4fmaddps_maskz;
37288
37289 switch (fcode)
37290 {
37291 case IX86_BUILTIN_4FMAPS:
37292 fcn = gen_avx5124fmaddps_4fmaddps;
37293 masked = 0;
37294 goto v4fma_expand;
37295
37296 case IX86_BUILTIN_4DPWSSD:
37297 nar_mode = V4SImode;
37298 mode = V16SImode;
37299 wide_mode = V64SImode;
37300 fcn = gen_avx5124vnniw_vp4dpwssd;
37301 masked = 0;
37302 goto v4fma_expand;
37303
37304 case IX86_BUILTIN_4DPWSSDS:
37305 nar_mode = V4SImode;
37306 mode = V16SImode;
37307 wide_mode = V64SImode;
37308 fcn = gen_avx5124vnniw_vp4dpwssds;
37309 masked = 0;
37310 goto v4fma_expand;
37311
37312 case IX86_BUILTIN_4FNMAPS:
37313 fcn = gen_avx5124fmaddps_4fnmaddps;
37314 masked = 0;
37315 goto v4fma_expand;
37316
37317 case IX86_BUILTIN_4FNMAPS_MASK:
37318 fcn_mask = gen_avx5124fmaddps_4fnmaddps_mask;
37319 fcn_maskz = gen_avx5124fmaddps_4fnmaddps_maskz;
37320 goto v4fma_expand;
37321
37322 case IX86_BUILTIN_4DPWSSD_MASK:
37323 nar_mode = V4SImode;
37324 mode = V16SImode;
37325 wide_mode = V64SImode;
37326 fcn_mask = gen_avx5124vnniw_vp4dpwssd_mask;
37327 fcn_maskz = gen_avx5124vnniw_vp4dpwssd_maskz;
37328 goto v4fma_expand;
37329
37330 case IX86_BUILTIN_4DPWSSDS_MASK:
37331 nar_mode = V4SImode;
37332 mode = V16SImode;
37333 wide_mode = V64SImode;
37334 fcn_mask = gen_avx5124vnniw_vp4dpwssds_mask;
37335 fcn_maskz = gen_avx5124vnniw_vp4dpwssds_maskz;
37336 goto v4fma_expand;
37337
37338 case IX86_BUILTIN_4FMAPS_MASK:
37339 {
37340 tree args[4];
37341 rtx ops[4];
37342 rtx wide_reg;
37343 rtx accum;
37344 rtx addr;
37345 rtx mem;
37346
37347 v4fma_expand:
37348 wide_reg = gen_reg_rtx (wide_mode);
37349 for (i = 0; i < 4; i++)
37350 {
37351 args[i] = CALL_EXPR_ARG (exp, i);
37352 ops[i] = expand_normal (args[i]);
37353
37354 emit_move_insn (gen_rtx_SUBREG (mode, wide_reg, i * 64),
37355 ops[i]);
37356 }
37357
37358 accum = expand_normal (CALL_EXPR_ARG (exp, 4));
37359 accum = force_reg (mode, accum);
37360
37361 addr = expand_normal (CALL_EXPR_ARG (exp, 5));
37362 addr = force_reg (Pmode, addr);
37363
37364 mem = gen_rtx_MEM (nar_mode, addr);
37365
37366 target = gen_reg_rtx (mode);
37367
37368 emit_move_insn (target, accum);
37369
37370 if (! masked)
37371 emit_insn (fcn (target, accum, wide_reg, mem));
37372 else
37373 {
37374 rtx merge, mask;
37375 merge = expand_normal (CALL_EXPR_ARG (exp, 6));
37376
37377 mask = expand_normal (CALL_EXPR_ARG (exp, 7));
37378
37379 if (CONST_INT_P (mask))
37380 mask = fixup_modeless_constant (mask, HImode);
37381
37382 mask = force_reg (HImode, mask);
37383
37384 if (GET_MODE (mask) != HImode)
37385 mask = gen_rtx_SUBREG (HImode, mask, 0);
37386
37387 /* If merge is 0 then we're about to emit z-masked variant. */
37388 if (const0_operand (merge, mode))
37389 emit_insn (fcn_maskz (target, accum, wide_reg, mem, merge, mask));
37390 /* If merge is the same as accum then emit merge-masked variant. */
37391 else if (CALL_EXPR_ARG (exp, 6) == CALL_EXPR_ARG (exp, 4))
37392 {
37393 merge = force_reg (mode, merge);
37394 emit_insn (fcn_mask (target, wide_reg, mem, merge, mask));
37395 }
37396 /* Merge with something unknown might happen if we z-mask w/ -O0. */
37397 else
37398 {
37399 target = gen_reg_rtx (mode);
37400 emit_move_insn (target, merge);
37401 emit_insn (fcn_mask (target, wide_reg, mem, target, mask));
37402 }
37403 }
37404 return target;
37405 }
37406
37407 case IX86_BUILTIN_4FNMASS:
37408 fcn = gen_avx5124fmaddps_4fnmaddss;
37409 masked = 0;
37410 goto s4fma_expand;
37411
37412 case IX86_BUILTIN_4FMASS:
37413 fcn = gen_avx5124fmaddps_4fmaddss;
37414 masked = 0;
37415 goto s4fma_expand;
37416
37417 case IX86_BUILTIN_4FNMASS_MASK:
37418 fcn_mask = gen_avx5124fmaddps_4fnmaddss_mask;
37419 fcn_maskz = gen_avx5124fmaddps_4fnmaddss_maskz;
37420 goto s4fma_expand;
37421
37422 case IX86_BUILTIN_4FMASS_MASK:
37423 {
37424 tree args[4];
37425 rtx ops[4];
37426 rtx wide_reg;
37427 rtx accum;
37428 rtx addr;
37429 rtx mem;
37430
37431 fcn_mask = gen_avx5124fmaddps_4fmaddss_mask;
37432 fcn_maskz = gen_avx5124fmaddps_4fmaddss_maskz;
37433
37434 s4fma_expand:
37435 mode = V4SFmode;
37436 wide_reg = gen_reg_rtx (V64SFmode);
37437 for (i = 0; i < 4; i++)
37438 {
37439 rtx tmp;
37440 args[i] = CALL_EXPR_ARG (exp, i);
37441 ops[i] = expand_normal (args[i]);
37442
37443 tmp = gen_reg_rtx (SFmode);
37444 emit_move_insn (tmp, gen_rtx_SUBREG (SFmode, ops[i], 0));
37445
37446 emit_move_insn (gen_rtx_SUBREG (V16SFmode, wide_reg, i * 64),
37447 gen_rtx_SUBREG (V16SFmode, tmp, 0));
37448 }
37449
37450 accum = expand_normal (CALL_EXPR_ARG (exp, 4));
37451 accum = force_reg (V4SFmode, accum);
37452
37453 addr = expand_normal (CALL_EXPR_ARG (exp, 5));
37454 addr = force_reg (Pmode, addr);
37455
37456 mem = gen_rtx_MEM (V4SFmode, addr);
37457
37458 target = gen_reg_rtx (V4SFmode);
37459
37460 emit_move_insn (target, accum);
37461
37462 if (! masked)
37463 emit_insn (fcn (target, accum, wide_reg, mem));
37464 else
37465 {
37466 rtx merge, mask;
37467 merge = expand_normal (CALL_EXPR_ARG (exp, 6));
37468
37469 mask = expand_normal (CALL_EXPR_ARG (exp, 7));
37470
37471 if (CONST_INT_P (mask))
37472 mask = fixup_modeless_constant (mask, QImode);
37473
37474 mask = force_reg (QImode, mask);
37475
37476 if (GET_MODE (mask) != QImode)
37477 mask = gen_rtx_SUBREG (QImode, mask, 0);
37478
37479 /* If merge is 0 then we're about to emit z-masked variant. */
37480 if (const0_operand (merge, mode))
37481 emit_insn (fcn_maskz (target, accum, wide_reg, mem, merge, mask));
37482 /* If merge is the same as accum then emit merge-masked
37483 variant. */
37484 else if (CALL_EXPR_ARG (exp, 6) == CALL_EXPR_ARG (exp, 4))
37485 {
37486 merge = force_reg (mode, merge);
37487 emit_insn (fcn_mask (target, wide_reg, mem, merge, mask));
37488 }
37489 /* Merge with something unknown might happen if we z-mask
37490 w/ -O0. */
37491 else
37492 {
37493 target = gen_reg_rtx (mode);
37494 emit_move_insn (target, merge);
37495 emit_insn (fcn_mask (target, wide_reg, mem, target, mask));
37496 }
37497 }
37498 return target;
37499 }
37500 case IX86_BUILTIN_RDPID:
37501 return ix86_expand_special_args_builtin (bdesc_args2 + i, exp,
37502 target);
37503 default:
37504 return ix86_expand_args_builtin (bdesc_args2 + i, exp, target);
37505 }
37506 }
37507
37508 if (fcode >= IX86_BUILTIN__BDESC_SPECIAL_ARGS2_FIRST
37509 && fcode <= IX86_BUILTIN__BDESC_SPECIAL_ARGS2_LAST)
37510 {
37511 i = fcode - IX86_BUILTIN__BDESC_SPECIAL_ARGS2_FIRST;
37512 return ix86_expand_special_args_builtin (bdesc_special_args2 + i, exp,
37513 target);
37514 }
37515
37516 if (fcode >= IX86_BUILTIN__BDESC_COMI_FIRST
37517 && fcode <= IX86_BUILTIN__BDESC_COMI_LAST)
37518 {
37519 i = fcode - IX86_BUILTIN__BDESC_COMI_FIRST;
37520 return ix86_expand_sse_comi (bdesc_comi + i, exp, target);
37521 }
37522
37523 if (fcode >= IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST
37524 && fcode <= IX86_BUILTIN__BDESC_ROUND_ARGS_LAST)
37525 {
37526 i = fcode - IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST;
37527 return ix86_expand_round_builtin (bdesc_round_args + i, exp, target);
37528 }
37529
37530 if (fcode >= IX86_BUILTIN__BDESC_PCMPESTR_FIRST
37531 && fcode <= IX86_BUILTIN__BDESC_PCMPESTR_LAST)
37532 {
37533 i = fcode - IX86_BUILTIN__BDESC_PCMPESTR_FIRST;
37534 return ix86_expand_sse_pcmpestr (bdesc_pcmpestr + i, exp, target);
37535 }
37536
37537 if (fcode >= IX86_BUILTIN__BDESC_PCMPISTR_FIRST
37538 && fcode <= IX86_BUILTIN__BDESC_PCMPISTR_LAST)
37539 {
37540 i = fcode - IX86_BUILTIN__BDESC_PCMPISTR_FIRST;
37541 return ix86_expand_sse_pcmpistr (bdesc_pcmpistr + i, exp, target);
37542 }
37543
37544 if (fcode >= IX86_BUILTIN__BDESC_MULTI_ARG_FIRST
37545 && fcode <= IX86_BUILTIN__BDESC_MULTI_ARG_LAST)
37546 {
37547 i = fcode - IX86_BUILTIN__BDESC_MULTI_ARG_FIRST;
37548 const struct builtin_description *d = bdesc_multi_arg + i;
37549 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
37550 (enum ix86_builtin_func_type)
37551 d->flag, d->comparison);
37552 }
37553
37554 if (fcode >= IX86_BUILTIN__BDESC_CET_FIRST
37555 && fcode <= IX86_BUILTIN__BDESC_CET_LAST)
37556 {
37557 i = fcode - IX86_BUILTIN__BDESC_CET_FIRST;
37558 return ix86_expand_special_args_builtin (bdesc_cet + i, exp,
37559 target);
37560 }
37561
37562 if (fcode >= IX86_BUILTIN__BDESC_CET_NORMAL_FIRST
37563 && fcode <= IX86_BUILTIN__BDESC_CET_NORMAL_LAST)
37564 {
37565 i = fcode - IX86_BUILTIN__BDESC_CET_NORMAL_FIRST;
37566 return ix86_expand_args_builtin (bdesc_cet_rdssp + i, exp,
37567 target);
37568 }
37569
37570 gcc_unreachable ();
37571 }
37572
37573 /* This returns the target-specific builtin with code CODE if
37574 current_function_decl has visibility on this builtin, which is checked
37575 using isa flags. Returns NULL_TREE otherwise. */
37576
37577 static tree ix86_get_builtin (enum ix86_builtins code)
37578 {
37579 struct cl_target_option *opts;
37580 tree target_tree = NULL_TREE;
37581
37582 /* Determine the isa flags of current_function_decl. */
37583
37584 if (current_function_decl)
37585 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
37586
37587 if (target_tree == NULL)
37588 target_tree = target_option_default_node;
37589
37590 opts = TREE_TARGET_OPTION (target_tree);
37591
37592 if ((ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
37593 || (ix86_builtins_isa[(int) code].isa2 & opts->x_ix86_isa_flags2))
37594 return ix86_builtin_decl (code, true);
37595 else
37596 return NULL_TREE;
37597 }
37598
37599 /* Return function decl for target specific builtin
37600 for given MPX builtin passed i FCODE. */
37601 static tree
37602 ix86_builtin_mpx_function (unsigned fcode)
37603 {
37604 switch (fcode)
37605 {
37606 case BUILT_IN_CHKP_BNDMK:
37607 return ix86_builtins[IX86_BUILTIN_BNDMK];
37608
37609 case BUILT_IN_CHKP_BNDSTX:
37610 return ix86_builtins[IX86_BUILTIN_BNDSTX];
37611
37612 case BUILT_IN_CHKP_BNDLDX:
37613 return ix86_builtins[IX86_BUILTIN_BNDLDX];
37614
37615 case BUILT_IN_CHKP_BNDCL:
37616 return ix86_builtins[IX86_BUILTIN_BNDCL];
37617
37618 case BUILT_IN_CHKP_BNDCU:
37619 return ix86_builtins[IX86_BUILTIN_BNDCU];
37620
37621 case BUILT_IN_CHKP_BNDRET:
37622 return ix86_builtins[IX86_BUILTIN_BNDRET];
37623
37624 case BUILT_IN_CHKP_INTERSECT:
37625 return ix86_builtins[IX86_BUILTIN_BNDINT];
37626
37627 case BUILT_IN_CHKP_NARROW:
37628 return ix86_builtins[IX86_BUILTIN_BNDNARROW];
37629
37630 case BUILT_IN_CHKP_SIZEOF:
37631 return ix86_builtins[IX86_BUILTIN_SIZEOF];
37632
37633 case BUILT_IN_CHKP_EXTRACT_LOWER:
37634 return ix86_builtins[IX86_BUILTIN_BNDLOWER];
37635
37636 case BUILT_IN_CHKP_EXTRACT_UPPER:
37637 return ix86_builtins[IX86_BUILTIN_BNDUPPER];
37638
37639 default:
37640 return NULL_TREE;
37641 }
37642
37643 gcc_unreachable ();
37644 }
37645
37646 /* Helper function for ix86_load_bounds and ix86_store_bounds.
37647
37648 Return an address to be used to load/store bounds for pointer
37649 passed in SLOT.
37650
37651 SLOT_NO is an integer constant holding number of a target
37652 dependent special slot to be used in case SLOT is not a memory.
37653
37654 SPECIAL_BASE is a pointer to be used as a base of fake address
37655 to access special slots in Bounds Table. SPECIAL_BASE[-1],
37656 SPECIAL_BASE[-2] etc. will be used as fake pointer locations. */
37657
37658 static rtx
37659 ix86_get_arg_address_for_bt (rtx slot, rtx slot_no, rtx special_base)
37660 {
37661 rtx addr = NULL;
37662
37663 /* NULL slot means we pass bounds for pointer not passed to the
37664 function at all. Register slot means we pass pointer in a
37665 register. In both these cases bounds are passed via Bounds
37666 Table. Since we do not have actual pointer stored in memory,
37667 we have to use fake addresses to access Bounds Table. We
37668 start with (special_base - sizeof (void*)) and decrease this
37669 address by pointer size to get addresses for other slots. */
37670 if (!slot || REG_P (slot))
37671 {
37672 gcc_assert (CONST_INT_P (slot_no));
37673 addr = plus_constant (Pmode, special_base,
37674 -(INTVAL (slot_no) + 1) * GET_MODE_SIZE (Pmode));
37675 }
37676 /* If pointer is passed in a memory then its address is used to
37677 access Bounds Table. */
37678 else if (MEM_P (slot))
37679 {
37680 addr = XEXP (slot, 0);
37681 if (!register_operand (addr, Pmode))
37682 addr = copy_addr_to_reg (addr);
37683 }
37684 else
37685 gcc_unreachable ();
37686
37687 return addr;
37688 }
37689
37690 /* Expand pass uses this hook to load bounds for function parameter
37691 PTR passed in SLOT in case its bounds are not passed in a register.
37692
37693 If SLOT is a memory, then bounds are loaded as for regular pointer
37694 loaded from memory. PTR may be NULL in case SLOT is a memory.
37695 In such case value of PTR (if required) may be loaded from SLOT.
37696
37697 If SLOT is NULL or a register then SLOT_NO is an integer constant
37698 holding number of the target dependent special slot which should be
37699 used to obtain bounds.
37700
37701 Return loaded bounds. */
37702
37703 static rtx
37704 ix86_load_bounds (rtx slot, rtx ptr, rtx slot_no)
37705 {
37706 rtx reg = gen_reg_rtx (BNDmode);
37707 rtx addr;
37708
37709 /* Get address to be used to access Bounds Table. Special slots start
37710 at the location of return address of the current function. */
37711 addr = ix86_get_arg_address_for_bt (slot, slot_no, arg_pointer_rtx);
37712
37713 /* Load pointer value from a memory if we don't have it. */
37714 if (!ptr)
37715 {
37716 gcc_assert (MEM_P (slot));
37717 ptr = copy_addr_to_reg (slot);
37718 }
37719
37720 if (!register_operand (ptr, Pmode))
37721 ptr = ix86_zero_extend_to_Pmode (ptr);
37722
37723 emit_insn (BNDmode == BND64mode
37724 ? gen_bnd64_ldx (reg, addr, ptr)
37725 : gen_bnd32_ldx (reg, addr, ptr));
37726
37727 return reg;
37728 }
37729
37730 /* Expand pass uses this hook to store BOUNDS for call argument PTR
37731 passed in SLOT in case BOUNDS are not passed in a register.
37732
37733 If SLOT is a memory, then BOUNDS are stored as for regular pointer
37734 stored in memory. PTR may be NULL in case SLOT is a memory.
37735 In such case value of PTR (if required) may be loaded from SLOT.
37736
37737 If SLOT is NULL or a register then SLOT_NO is an integer constant
37738 holding number of the target dependent special slot which should be
37739 used to store BOUNDS. */
37740
37741 static void
37742 ix86_store_bounds (rtx ptr, rtx slot, rtx bounds, rtx slot_no)
37743 {
37744 rtx addr;
37745
37746 /* Get address to be used to access Bounds Table. Special slots start
37747 at the location of return address of a called function. */
37748 addr = ix86_get_arg_address_for_bt (slot, slot_no, stack_pointer_rtx);
37749
37750 /* Load pointer value from a memory if we don't have it. */
37751 if (!ptr)
37752 {
37753 gcc_assert (MEM_P (slot));
37754 ptr = copy_addr_to_reg (slot);
37755 }
37756
37757 if (!register_operand (ptr, Pmode))
37758 ptr = ix86_zero_extend_to_Pmode (ptr);
37759
37760 gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds)));
37761 if (!register_operand (bounds, BNDmode))
37762 bounds = copy_to_mode_reg (BNDmode, bounds);
37763
37764 emit_insn (BNDmode == BND64mode
37765 ? gen_bnd64_stx (addr, ptr, bounds)
37766 : gen_bnd32_stx (addr, ptr, bounds));
37767 }
37768
37769 /* Load and return bounds returned by function in SLOT. */
37770
37771 static rtx
37772 ix86_load_returned_bounds (rtx slot)
37773 {
37774 rtx res;
37775
37776 gcc_assert (REG_P (slot));
37777 res = gen_reg_rtx (BNDmode);
37778 emit_move_insn (res, slot);
37779
37780 return res;
37781 }
37782
37783 /* Store BOUNDS returned by function into SLOT. */
37784
37785 static void
37786 ix86_store_returned_bounds (rtx slot, rtx bounds)
37787 {
37788 gcc_assert (REG_P (slot));
37789 emit_move_insn (slot, bounds);
37790 }
37791
37792 /* Returns a function decl for a vectorized version of the combined function
37793 with combined_fn code FN and the result vector type TYPE, or NULL_TREE
37794 if it is not available. */
37795
37796 static tree
37797 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
37798 tree type_in)
37799 {
37800 machine_mode in_mode, out_mode;
37801 int in_n, out_n;
37802
37803 if (TREE_CODE (type_out) != VECTOR_TYPE
37804 || TREE_CODE (type_in) != VECTOR_TYPE)
37805 return NULL_TREE;
37806
37807 out_mode = TYPE_MODE (TREE_TYPE (type_out));
37808 out_n = TYPE_VECTOR_SUBPARTS (type_out);
37809 in_mode = TYPE_MODE (TREE_TYPE (type_in));
37810 in_n = TYPE_VECTOR_SUBPARTS (type_in);
37811
37812 switch (fn)
37813 {
37814 CASE_CFN_EXP2:
37815 if (out_mode == SFmode && in_mode == SFmode)
37816 {
37817 if (out_n == 16 && in_n == 16)
37818 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
37819 }
37820 break;
37821
37822 CASE_CFN_IFLOOR:
37823 CASE_CFN_LFLOOR:
37824 CASE_CFN_LLFLOOR:
37825 /* The round insn does not trap on denormals. */
37826 if (flag_trapping_math || !TARGET_SSE4_1)
37827 break;
37828
37829 if (out_mode == SImode && in_mode == DFmode)
37830 {
37831 if (out_n == 4 && in_n == 2)
37832 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
37833 else if (out_n == 8 && in_n == 4)
37834 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
37835 else if (out_n == 16 && in_n == 8)
37836 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
37837 }
37838 if (out_mode == SImode && in_mode == SFmode)
37839 {
37840 if (out_n == 4 && in_n == 4)
37841 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
37842 else if (out_n == 8 && in_n == 8)
37843 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
37844 else if (out_n == 16 && in_n == 16)
37845 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX512);
37846 }
37847 break;
37848
37849 CASE_CFN_ICEIL:
37850 CASE_CFN_LCEIL:
37851 CASE_CFN_LLCEIL:
37852 /* The round insn does not trap on denormals. */
37853 if (flag_trapping_math || !TARGET_SSE4_1)
37854 break;
37855
37856 if (out_mode == SImode && in_mode == DFmode)
37857 {
37858 if (out_n == 4 && in_n == 2)
37859 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
37860 else if (out_n == 8 && in_n == 4)
37861 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
37862 else if (out_n == 16 && in_n == 8)
37863 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
37864 }
37865 if (out_mode == SImode && in_mode == SFmode)
37866 {
37867 if (out_n == 4 && in_n == 4)
37868 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
37869 else if (out_n == 8 && in_n == 8)
37870 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
37871 else if (out_n == 16 && in_n == 16)
37872 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX512);
37873 }
37874 break;
37875
37876 CASE_CFN_IRINT:
37877 CASE_CFN_LRINT:
37878 CASE_CFN_LLRINT:
37879 if (out_mode == SImode && in_mode == DFmode)
37880 {
37881 if (out_n == 4 && in_n == 2)
37882 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
37883 else if (out_n == 8 && in_n == 4)
37884 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
37885 else if (out_n == 16 && in_n == 8)
37886 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX512);
37887 }
37888 if (out_mode == SImode && in_mode == SFmode)
37889 {
37890 if (out_n == 4 && in_n == 4)
37891 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
37892 else if (out_n == 8 && in_n == 8)
37893 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
37894 else if (out_n == 16 && in_n == 16)
37895 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ512);
37896 }
37897 break;
37898
37899 CASE_CFN_IROUND:
37900 CASE_CFN_LROUND:
37901 CASE_CFN_LLROUND:
37902 /* The round insn does not trap on denormals. */
37903 if (flag_trapping_math || !TARGET_SSE4_1)
37904 break;
37905
37906 if (out_mode == SImode && in_mode == DFmode)
37907 {
37908 if (out_n == 4 && in_n == 2)
37909 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
37910 else if (out_n == 8 && in_n == 4)
37911 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
37912 else if (out_n == 16 && in_n == 8)
37913 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
37914 }
37915 if (out_mode == SImode && in_mode == SFmode)
37916 {
37917 if (out_n == 4 && in_n == 4)
37918 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
37919 else if (out_n == 8 && in_n == 8)
37920 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
37921 else if (out_n == 16 && in_n == 16)
37922 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX512);
37923 }
37924 break;
37925
37926 CASE_CFN_FLOOR:
37927 /* The round insn does not trap on denormals. */
37928 if (flag_trapping_math || !TARGET_SSE4_1)
37929 break;
37930
37931 if (out_mode == DFmode && in_mode == DFmode)
37932 {
37933 if (out_n == 2 && in_n == 2)
37934 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
37935 else if (out_n == 4 && in_n == 4)
37936 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
37937 else if (out_n == 8 && in_n == 8)
37938 return ix86_get_builtin (IX86_BUILTIN_FLOORPD512);
37939 }
37940 if (out_mode == SFmode && in_mode == SFmode)
37941 {
37942 if (out_n == 4 && in_n == 4)
37943 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
37944 else if (out_n == 8 && in_n == 8)
37945 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
37946 else if (out_n == 16 && in_n == 16)
37947 return ix86_get_builtin (IX86_BUILTIN_FLOORPS512);
37948 }
37949 break;
37950
37951 CASE_CFN_CEIL:
37952 /* The round insn does not trap on denormals. */
37953 if (flag_trapping_math || !TARGET_SSE4_1)
37954 break;
37955
37956 if (out_mode == DFmode && in_mode == DFmode)
37957 {
37958 if (out_n == 2 && in_n == 2)
37959 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
37960 else if (out_n == 4 && in_n == 4)
37961 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
37962 else if (out_n == 8 && in_n == 8)
37963 return ix86_get_builtin (IX86_BUILTIN_CEILPD512);
37964 }
37965 if (out_mode == SFmode && in_mode == SFmode)
37966 {
37967 if (out_n == 4 && in_n == 4)
37968 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
37969 else if (out_n == 8 && in_n == 8)
37970 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
37971 else if (out_n == 16 && in_n == 16)
37972 return ix86_get_builtin (IX86_BUILTIN_CEILPS512);
37973 }
37974 break;
37975
37976 CASE_CFN_TRUNC:
37977 /* The round insn does not trap on denormals. */
37978 if (flag_trapping_math || !TARGET_SSE4_1)
37979 break;
37980
37981 if (out_mode == DFmode && in_mode == DFmode)
37982 {
37983 if (out_n == 2 && in_n == 2)
37984 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
37985 else if (out_n == 4 && in_n == 4)
37986 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
37987 else if (out_n == 8 && in_n == 8)
37988 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD512);
37989 }
37990 if (out_mode == SFmode && in_mode == SFmode)
37991 {
37992 if (out_n == 4 && in_n == 4)
37993 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
37994 else if (out_n == 8 && in_n == 8)
37995 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
37996 else if (out_n == 16 && in_n == 16)
37997 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS512);
37998 }
37999 break;
38000
38001 CASE_CFN_RINT:
38002 /* The round insn does not trap on denormals. */
38003 if (flag_trapping_math || !TARGET_SSE4_1)
38004 break;
38005
38006 if (out_mode == DFmode && in_mode == DFmode)
38007 {
38008 if (out_n == 2 && in_n == 2)
38009 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
38010 else if (out_n == 4 && in_n == 4)
38011 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
38012 }
38013 if (out_mode == SFmode && in_mode == SFmode)
38014 {
38015 if (out_n == 4 && in_n == 4)
38016 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
38017 else if (out_n == 8 && in_n == 8)
38018 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
38019 }
38020 break;
38021
38022 CASE_CFN_FMA:
38023 if (out_mode == DFmode && in_mode == DFmode)
38024 {
38025 if (out_n == 2 && in_n == 2)
38026 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
38027 if (out_n == 4 && in_n == 4)
38028 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
38029 }
38030 if (out_mode == SFmode && in_mode == SFmode)
38031 {
38032 if (out_n == 4 && in_n == 4)
38033 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
38034 if (out_n == 8 && in_n == 8)
38035 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
38036 }
38037 break;
38038
38039 default:
38040 break;
38041 }
38042
38043 /* Dispatch to a handler for a vectorization library. */
38044 if (ix86_veclib_handler)
38045 return ix86_veclib_handler (combined_fn (fn), type_out, type_in);
38046
38047 return NULL_TREE;
38048 }
38049
38050 /* Handler for an SVML-style interface to
38051 a library with vectorized intrinsics. */
38052
38053 static tree
38054 ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in)
38055 {
38056 char name[20];
38057 tree fntype, new_fndecl, args;
38058 unsigned arity;
38059 const char *bname;
38060 machine_mode el_mode, in_mode;
38061 int n, in_n;
38062
38063 /* The SVML is suitable for unsafe math only. */
38064 if (!flag_unsafe_math_optimizations)
38065 return NULL_TREE;
38066
38067 el_mode = TYPE_MODE (TREE_TYPE (type_out));
38068 n = TYPE_VECTOR_SUBPARTS (type_out);
38069 in_mode = TYPE_MODE (TREE_TYPE (type_in));
38070 in_n = TYPE_VECTOR_SUBPARTS (type_in);
38071 if (el_mode != in_mode
38072 || n != in_n)
38073 return NULL_TREE;
38074
38075 switch (fn)
38076 {
38077 CASE_CFN_EXP:
38078 CASE_CFN_LOG:
38079 CASE_CFN_LOG10:
38080 CASE_CFN_POW:
38081 CASE_CFN_TANH:
38082 CASE_CFN_TAN:
38083 CASE_CFN_ATAN:
38084 CASE_CFN_ATAN2:
38085 CASE_CFN_ATANH:
38086 CASE_CFN_CBRT:
38087 CASE_CFN_SINH:
38088 CASE_CFN_SIN:
38089 CASE_CFN_ASINH:
38090 CASE_CFN_ASIN:
38091 CASE_CFN_COSH:
38092 CASE_CFN_COS:
38093 CASE_CFN_ACOSH:
38094 CASE_CFN_ACOS:
38095 if ((el_mode != DFmode || n != 2)
38096 && (el_mode != SFmode || n != 4))
38097 return NULL_TREE;
38098 break;
38099
38100 default:
38101 return NULL_TREE;
38102 }
38103
38104 tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
38105 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
38106
38107 if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOGF)
38108 strcpy (name, "vmlsLn4");
38109 else if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOG)
38110 strcpy (name, "vmldLn2");
38111 else if (n == 4)
38112 {
38113 sprintf (name, "vmls%s", bname+10);
38114 name[strlen (name)-1] = '4';
38115 }
38116 else
38117 sprintf (name, "vmld%s2", bname+10);
38118
38119 /* Convert to uppercase. */
38120 name[4] &= ~0x20;
38121
38122 arity = 0;
38123 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
38124 arity++;
38125
38126 if (arity == 1)
38127 fntype = build_function_type_list (type_out, type_in, NULL);
38128 else
38129 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
38130
38131 /* Build a function declaration for the vectorized function. */
38132 new_fndecl = build_decl (BUILTINS_LOCATION,
38133 FUNCTION_DECL, get_identifier (name), fntype);
38134 TREE_PUBLIC (new_fndecl) = 1;
38135 DECL_EXTERNAL (new_fndecl) = 1;
38136 DECL_IS_NOVOPS (new_fndecl) = 1;
38137 TREE_READONLY (new_fndecl) = 1;
38138
38139 return new_fndecl;
38140 }
38141
38142 /* Handler for an ACML-style interface to
38143 a library with vectorized intrinsics. */
38144
38145 static tree
38146 ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in)
38147 {
38148 char name[20] = "__vr.._";
38149 tree fntype, new_fndecl, args;
38150 unsigned arity;
38151 const char *bname;
38152 machine_mode el_mode, in_mode;
38153 int n, in_n;
38154
38155 /* The ACML is 64bits only and suitable for unsafe math only as
38156 it does not correctly support parts of IEEE with the required
38157 precision such as denormals. */
38158 if (!TARGET_64BIT
38159 || !flag_unsafe_math_optimizations)
38160 return NULL_TREE;
38161
38162 el_mode = TYPE_MODE (TREE_TYPE (type_out));
38163 n = TYPE_VECTOR_SUBPARTS (type_out);
38164 in_mode = TYPE_MODE (TREE_TYPE (type_in));
38165 in_n = TYPE_VECTOR_SUBPARTS (type_in);
38166 if (el_mode != in_mode
38167 || n != in_n)
38168 return NULL_TREE;
38169
38170 switch (fn)
38171 {
38172 CASE_CFN_SIN:
38173 CASE_CFN_COS:
38174 CASE_CFN_EXP:
38175 CASE_CFN_LOG:
38176 CASE_CFN_LOG2:
38177 CASE_CFN_LOG10:
38178 if (el_mode == DFmode && n == 2)
38179 {
38180 name[4] = 'd';
38181 name[5] = '2';
38182 }
38183 else if (el_mode == SFmode && n == 4)
38184 {
38185 name[4] = 's';
38186 name[5] = '4';
38187 }
38188 else
38189 return NULL_TREE;
38190 break;
38191
38192 default:
38193 return NULL_TREE;
38194 }
38195
38196 tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
38197 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
38198 sprintf (name + 7, "%s", bname+10);
38199
38200 arity = 0;
38201 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
38202 arity++;
38203
38204 if (arity == 1)
38205 fntype = build_function_type_list (type_out, type_in, NULL);
38206 else
38207 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
38208
38209 /* Build a function declaration for the vectorized function. */
38210 new_fndecl = build_decl (BUILTINS_LOCATION,
38211 FUNCTION_DECL, get_identifier (name), fntype);
38212 TREE_PUBLIC (new_fndecl) = 1;
38213 DECL_EXTERNAL (new_fndecl) = 1;
38214 DECL_IS_NOVOPS (new_fndecl) = 1;
38215 TREE_READONLY (new_fndecl) = 1;
38216
38217 return new_fndecl;
38218 }
38219
38220 /* Returns a decl of a function that implements gather load with
38221 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
38222 Return NULL_TREE if it is not available. */
38223
38224 static tree
38225 ix86_vectorize_builtin_gather (const_tree mem_vectype,
38226 const_tree index_type, int scale)
38227 {
38228 bool si;
38229 enum ix86_builtins code;
38230
38231 if (! TARGET_AVX2)
38232 return NULL_TREE;
38233
38234 if ((TREE_CODE (index_type) != INTEGER_TYPE
38235 && !POINTER_TYPE_P (index_type))
38236 || (TYPE_MODE (index_type) != SImode
38237 && TYPE_MODE (index_type) != DImode))
38238 return NULL_TREE;
38239
38240 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
38241 return NULL_TREE;
38242
38243 /* v*gather* insn sign extends index to pointer mode. */
38244 if (TYPE_PRECISION (index_type) < POINTER_SIZE
38245 && TYPE_UNSIGNED (index_type))
38246 return NULL_TREE;
38247
38248 if (scale <= 0
38249 || scale > 8
38250 || (scale & (scale - 1)) != 0)
38251 return NULL_TREE;
38252
38253 si = TYPE_MODE (index_type) == SImode;
38254 switch (TYPE_MODE (mem_vectype))
38255 {
38256 case E_V2DFmode:
38257 if (TARGET_AVX512VL)
38258 code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
38259 else
38260 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
38261 break;
38262 case E_V4DFmode:
38263 if (TARGET_AVX512VL)
38264 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
38265 else
38266 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
38267 break;
38268 case E_V2DImode:
38269 if (TARGET_AVX512VL)
38270 code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
38271 else
38272 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
38273 break;
38274 case E_V4DImode:
38275 if (TARGET_AVX512VL)
38276 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
38277 else
38278 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
38279 break;
38280 case E_V4SFmode:
38281 if (TARGET_AVX512VL)
38282 code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
38283 else
38284 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
38285 break;
38286 case E_V8SFmode:
38287 if (TARGET_AVX512VL)
38288 code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
38289 else
38290 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
38291 break;
38292 case E_V4SImode:
38293 if (TARGET_AVX512VL)
38294 code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
38295 else
38296 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
38297 break;
38298 case E_V8SImode:
38299 if (TARGET_AVX512VL)
38300 code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
38301 else
38302 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
38303 break;
38304 case E_V8DFmode:
38305 if (TARGET_AVX512F)
38306 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
38307 else
38308 return NULL_TREE;
38309 break;
38310 case E_V8DImode:
38311 if (TARGET_AVX512F)
38312 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
38313 else
38314 return NULL_TREE;
38315 break;
38316 case E_V16SFmode:
38317 if (TARGET_AVX512F)
38318 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
38319 else
38320 return NULL_TREE;
38321 break;
38322 case E_V16SImode:
38323 if (TARGET_AVX512F)
38324 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
38325 else
38326 return NULL_TREE;
38327 break;
38328 default:
38329 return NULL_TREE;
38330 }
38331
38332 return ix86_get_builtin (code);
38333 }
38334
38335 /* Returns a decl of a function that implements scatter store with
38336 register type VECTYPE and index type INDEX_TYPE and SCALE.
38337 Return NULL_TREE if it is not available. */
38338
38339 static tree
38340 ix86_vectorize_builtin_scatter (const_tree vectype,
38341 const_tree index_type, int scale)
38342 {
38343 bool si;
38344 enum ix86_builtins code;
38345
38346 if (!TARGET_AVX512F)
38347 return NULL_TREE;
38348
38349 if ((TREE_CODE (index_type) != INTEGER_TYPE
38350 && !POINTER_TYPE_P (index_type))
38351 || (TYPE_MODE (index_type) != SImode
38352 && TYPE_MODE (index_type) != DImode))
38353 return NULL_TREE;
38354
38355 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
38356 return NULL_TREE;
38357
38358 /* v*scatter* insn sign extends index to pointer mode. */
38359 if (TYPE_PRECISION (index_type) < POINTER_SIZE
38360 && TYPE_UNSIGNED (index_type))
38361 return NULL_TREE;
38362
38363 /* Scale can be 1, 2, 4 or 8. */
38364 if (scale <= 0
38365 || scale > 8
38366 || (scale & (scale - 1)) != 0)
38367 return NULL_TREE;
38368
38369 si = TYPE_MODE (index_type) == SImode;
38370 switch (TYPE_MODE (vectype))
38371 {
38372 case E_V8DFmode:
38373 code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF;
38374 break;
38375 case E_V8DImode:
38376 code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI;
38377 break;
38378 case E_V16SFmode:
38379 code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF;
38380 break;
38381 case E_V16SImode:
38382 code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI;
38383 break;
38384 default:
38385 return NULL_TREE;
38386 }
38387
38388 return ix86_builtins[code];
38389 }
38390
38391 /* Return true if it is safe to use the rsqrt optabs to optimize
38392 1.0/sqrt. */
38393
38394 static bool
38395 use_rsqrt_p ()
38396 {
38397 return (TARGET_SSE_MATH
38398 && flag_finite_math_only
38399 && !flag_trapping_math
38400 && flag_unsafe_math_optimizations);
38401 }
38402
38403 /* Returns a code for a target-specific builtin that implements
38404 reciprocal of the function, or NULL_TREE if not available. */
38405
38406 static tree
38407 ix86_builtin_reciprocal (tree fndecl)
38408 {
38409 switch (DECL_FUNCTION_CODE (fndecl))
38410 {
38411 /* Vectorized version of sqrt to rsqrt conversion. */
38412 case IX86_BUILTIN_SQRTPS_NR:
38413 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
38414
38415 case IX86_BUILTIN_SQRTPS_NR256:
38416 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
38417
38418 default:
38419 return NULL_TREE;
38420 }
38421 }
38422 \f
38423 /* Helper for avx_vpermilps256_operand et al. This is also used by
38424 the expansion functions to turn the parallel back into a mask.
38425 The return value is 0 for no match and the imm8+1 for a match. */
38426
38427 int
38428 avx_vpermilp_parallel (rtx par, machine_mode mode)
38429 {
38430 unsigned i, nelt = GET_MODE_NUNITS (mode);
38431 unsigned mask = 0;
38432 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
38433
38434 if (XVECLEN (par, 0) != (int) nelt)
38435 return 0;
38436
38437 /* Validate that all of the elements are constants, and not totally
38438 out of range. Copy the data into an integral array to make the
38439 subsequent checks easier. */
38440 for (i = 0; i < nelt; ++i)
38441 {
38442 rtx er = XVECEXP (par, 0, i);
38443 unsigned HOST_WIDE_INT ei;
38444
38445 if (!CONST_INT_P (er))
38446 return 0;
38447 ei = INTVAL (er);
38448 if (ei >= nelt)
38449 return 0;
38450 ipar[i] = ei;
38451 }
38452
38453 switch (mode)
38454 {
38455 case E_V8DFmode:
38456 /* In the 512-bit DFmode case, we can only move elements within
38457 a 128-bit lane. First fill the second part of the mask,
38458 then fallthru. */
38459 for (i = 4; i < 6; ++i)
38460 {
38461 if (ipar[i] < 4 || ipar[i] >= 6)
38462 return 0;
38463 mask |= (ipar[i] - 4) << i;
38464 }
38465 for (i = 6; i < 8; ++i)
38466 {
38467 if (ipar[i] < 6)
38468 return 0;
38469 mask |= (ipar[i] - 6) << i;
38470 }
38471 /* FALLTHRU */
38472
38473 case E_V4DFmode:
38474 /* In the 256-bit DFmode case, we can only move elements within
38475 a 128-bit lane. */
38476 for (i = 0; i < 2; ++i)
38477 {
38478 if (ipar[i] >= 2)
38479 return 0;
38480 mask |= ipar[i] << i;
38481 }
38482 for (i = 2; i < 4; ++i)
38483 {
38484 if (ipar[i] < 2)
38485 return 0;
38486 mask |= (ipar[i] - 2) << i;
38487 }
38488 break;
38489
38490 case E_V16SFmode:
38491 /* In 512 bit SFmode case, permutation in the upper 256 bits
38492 must mirror the permutation in the lower 256-bits. */
38493 for (i = 0; i < 8; ++i)
38494 if (ipar[i] + 8 != ipar[i + 8])
38495 return 0;
38496 /* FALLTHRU */
38497
38498 case E_V8SFmode:
38499 /* In 256 bit SFmode case, we have full freedom of
38500 movement within the low 128-bit lane, but the high 128-bit
38501 lane must mirror the exact same pattern. */
38502 for (i = 0; i < 4; ++i)
38503 if (ipar[i] + 4 != ipar[i + 4])
38504 return 0;
38505 nelt = 4;
38506 /* FALLTHRU */
38507
38508 case E_V2DFmode:
38509 case E_V4SFmode:
38510 /* In the 128-bit case, we've full freedom in the placement of
38511 the elements from the source operand. */
38512 for (i = 0; i < nelt; ++i)
38513 mask |= ipar[i] << (i * (nelt / 2));
38514 break;
38515
38516 default:
38517 gcc_unreachable ();
38518 }
38519
38520 /* Make sure success has a non-zero value by adding one. */
38521 return mask + 1;
38522 }
38523
38524 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
38525 the expansion functions to turn the parallel back into a mask.
38526 The return value is 0 for no match and the imm8+1 for a match. */
38527
38528 int
38529 avx_vperm2f128_parallel (rtx par, machine_mode mode)
38530 {
38531 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
38532 unsigned mask = 0;
38533 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
38534
38535 if (XVECLEN (par, 0) != (int) nelt)
38536 return 0;
38537
38538 /* Validate that all of the elements are constants, and not totally
38539 out of range. Copy the data into an integral array to make the
38540 subsequent checks easier. */
38541 for (i = 0; i < nelt; ++i)
38542 {
38543 rtx er = XVECEXP (par, 0, i);
38544 unsigned HOST_WIDE_INT ei;
38545
38546 if (!CONST_INT_P (er))
38547 return 0;
38548 ei = INTVAL (er);
38549 if (ei >= 2 * nelt)
38550 return 0;
38551 ipar[i] = ei;
38552 }
38553
38554 /* Validate that the halves of the permute are halves. */
38555 for (i = 0; i < nelt2 - 1; ++i)
38556 if (ipar[i] + 1 != ipar[i + 1])
38557 return 0;
38558 for (i = nelt2; i < nelt - 1; ++i)
38559 if (ipar[i] + 1 != ipar[i + 1])
38560 return 0;
38561
38562 /* Reconstruct the mask. */
38563 for (i = 0; i < 2; ++i)
38564 {
38565 unsigned e = ipar[i * nelt2];
38566 if (e % nelt2)
38567 return 0;
38568 e /= nelt2;
38569 mask |= e << (i * 4);
38570 }
38571
38572 /* Make sure success has a non-zero value by adding one. */
38573 return mask + 1;
38574 }
38575 \f
38576 /* Return a register priority for hard reg REGNO. */
38577 static int
38578 ix86_register_priority (int hard_regno)
38579 {
38580 /* ebp and r13 as the base always wants a displacement, r12 as the
38581 base always wants an index. So discourage their usage in an
38582 address. */
38583 if (hard_regno == R12_REG || hard_regno == R13_REG)
38584 return 0;
38585 if (hard_regno == BP_REG)
38586 return 1;
38587 /* New x86-64 int registers result in bigger code size. Discourage
38588 them. */
38589 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
38590 return 2;
38591 /* New x86-64 SSE registers result in bigger code size. Discourage
38592 them. */
38593 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
38594 return 2;
38595 /* Usage of AX register results in smaller code. Prefer it. */
38596 if (hard_regno == AX_REG)
38597 return 4;
38598 return 3;
38599 }
38600
38601 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
38602
38603 Put float CONST_DOUBLE in the constant pool instead of fp regs.
38604 QImode must go into class Q_REGS.
38605 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
38606 movdf to do mem-to-mem moves through integer regs. */
38607
38608 static reg_class_t
38609 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
38610 {
38611 machine_mode mode = GET_MODE (x);
38612
38613 /* We're only allowed to return a subclass of CLASS. Many of the
38614 following checks fail for NO_REGS, so eliminate that early. */
38615 if (regclass == NO_REGS)
38616 return NO_REGS;
38617
38618 /* All classes can load zeros. */
38619 if (x == CONST0_RTX (mode))
38620 return regclass;
38621
38622 /* Force constants into memory if we are loading a (nonzero) constant into
38623 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
38624 instructions to load from a constant. */
38625 if (CONSTANT_P (x)
38626 && (MAYBE_MMX_CLASS_P (regclass)
38627 || MAYBE_SSE_CLASS_P (regclass)
38628 || MAYBE_MASK_CLASS_P (regclass)))
38629 return NO_REGS;
38630
38631 /* Floating-point constants need more complex checks. */
38632 if (CONST_DOUBLE_P (x))
38633 {
38634 /* General regs can load everything. */
38635 if (INTEGER_CLASS_P (regclass))
38636 return regclass;
38637
38638 /* Floats can load 0 and 1 plus some others. Note that we eliminated
38639 zero above. We only want to wind up preferring 80387 registers if
38640 we plan on doing computation with them. */
38641 if (IS_STACK_MODE (mode)
38642 && standard_80387_constant_p (x) > 0)
38643 {
38644 /* Limit class to FP regs. */
38645 if (FLOAT_CLASS_P (regclass))
38646 return FLOAT_REGS;
38647 else if (regclass == FP_TOP_SSE_REGS)
38648 return FP_TOP_REG;
38649 else if (regclass == FP_SECOND_SSE_REGS)
38650 return FP_SECOND_REG;
38651 }
38652
38653 return NO_REGS;
38654 }
38655
38656 /* Prefer SSE regs only, if we can use them for math. */
38657 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38658 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
38659
38660 /* Generally when we see PLUS here, it's the function invariant
38661 (plus soft-fp const_int). Which can only be computed into general
38662 regs. */
38663 if (GET_CODE (x) == PLUS)
38664 return INTEGER_CLASS_P (regclass) ? regclass : NO_REGS;
38665
38666 /* QImode constants are easy to load, but non-constant QImode data
38667 must go into Q_REGS. */
38668 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
38669 {
38670 if (Q_CLASS_P (regclass))
38671 return regclass;
38672 else if (reg_class_subset_p (Q_REGS, regclass))
38673 return Q_REGS;
38674 else
38675 return NO_REGS;
38676 }
38677
38678 return regclass;
38679 }
38680
38681 /* Discourage putting floating-point values in SSE registers unless
38682 SSE math is being used, and likewise for the 387 registers. */
38683 static reg_class_t
38684 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
38685 {
38686 machine_mode mode = GET_MODE (x);
38687
38688 /* Restrict the output reload class to the register bank that we are doing
38689 math on. If we would like not to return a subset of CLASS, reject this
38690 alternative: if reload cannot do this, it will still use its choice. */
38691 mode = GET_MODE (x);
38692 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38693 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
38694
38695 if (IS_STACK_MODE (mode))
38696 {
38697 if (regclass == FP_TOP_SSE_REGS)
38698 return FP_TOP_REG;
38699 else if (regclass == FP_SECOND_SSE_REGS)
38700 return FP_SECOND_REG;
38701 else
38702 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
38703 }
38704
38705 return regclass;
38706 }
38707
38708 static reg_class_t
38709 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
38710 machine_mode mode, secondary_reload_info *sri)
38711 {
38712 /* Double-word spills from general registers to non-offsettable memory
38713 references (zero-extended addresses) require special handling. */
38714 if (TARGET_64BIT
38715 && MEM_P (x)
38716 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
38717 && INTEGER_CLASS_P (rclass)
38718 && !offsettable_memref_p (x))
38719 {
38720 sri->icode = (in_p
38721 ? CODE_FOR_reload_noff_load
38722 : CODE_FOR_reload_noff_store);
38723 /* Add the cost of moving address to a temporary. */
38724 sri->extra_cost = 1;
38725
38726 return NO_REGS;
38727 }
38728
38729 /* QImode spills from non-QI registers require
38730 intermediate register on 32bit targets. */
38731 if (mode == QImode
38732 && ((!TARGET_64BIT && !in_p
38733 && INTEGER_CLASS_P (rclass)
38734 && MAYBE_NON_Q_CLASS_P (rclass))
38735 || (!TARGET_AVX512DQ
38736 && MAYBE_MASK_CLASS_P (rclass))))
38737 {
38738 int regno = true_regnum (x);
38739
38740 /* Return Q_REGS if the operand is in memory. */
38741 if (regno == -1)
38742 return Q_REGS;
38743
38744 return NO_REGS;
38745 }
38746
38747 /* This condition handles corner case where an expression involving
38748 pointers gets vectorized. We're trying to use the address of a
38749 stack slot as a vector initializer.
38750
38751 (set (reg:V2DI 74 [ vect_cst_.2 ])
38752 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
38753
38754 Eventually frame gets turned into sp+offset like this:
38755
38756 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
38757 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
38758 (const_int 392 [0x188]))))
38759
38760 That later gets turned into:
38761
38762 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
38763 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
38764 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
38765
38766 We'll have the following reload recorded:
38767
38768 Reload 0: reload_in (DI) =
38769 (plus:DI (reg/f:DI 7 sp)
38770 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
38771 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
38772 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
38773 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
38774 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
38775 reload_reg_rtx: (reg:V2DI 22 xmm1)
38776
38777 Which isn't going to work since SSE instructions can't handle scalar
38778 additions. Returning GENERAL_REGS forces the addition into integer
38779 register and reload can handle subsequent reloads without problems. */
38780
38781 if (in_p && GET_CODE (x) == PLUS
38782 && SSE_CLASS_P (rclass)
38783 && SCALAR_INT_MODE_P (mode))
38784 return GENERAL_REGS;
38785
38786 return NO_REGS;
38787 }
38788
38789 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
38790
38791 static bool
38792 ix86_class_likely_spilled_p (reg_class_t rclass)
38793 {
38794 switch (rclass)
38795 {
38796 case AREG:
38797 case DREG:
38798 case CREG:
38799 case BREG:
38800 case AD_REGS:
38801 case SIREG:
38802 case DIREG:
38803 case SSE_FIRST_REG:
38804 case FP_TOP_REG:
38805 case FP_SECOND_REG:
38806 case BND_REGS:
38807 return true;
38808
38809 default:
38810 break;
38811 }
38812
38813 return false;
38814 }
38815
38816 /* If we are copying between registers from different register sets
38817 (e.g. FP and integer), we may need a memory location.
38818
38819 The function can't work reliably when one of the CLASSES is a class
38820 containing registers from multiple sets. We avoid this by never combining
38821 different sets in a single alternative in the machine description.
38822 Ensure that this constraint holds to avoid unexpected surprises.
38823
38824 When STRICT is false, we are being called from REGISTER_MOVE_COST,
38825 so do not enforce these sanity checks.
38826
38827 To optimize register_move_cost performance, define inline variant. */
38828
38829 static inline bool
38830 inline_secondary_memory_needed (machine_mode mode, reg_class_t class1,
38831 reg_class_t class2, int strict)
38832 {
38833 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
38834 return false;
38835
38836 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
38837 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
38838 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
38839 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
38840 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
38841 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)
38842 || MAYBE_MASK_CLASS_P (class1) != MASK_CLASS_P (class1)
38843 || MAYBE_MASK_CLASS_P (class2) != MASK_CLASS_P (class2))
38844 {
38845 gcc_assert (!strict || lra_in_progress);
38846 return true;
38847 }
38848
38849 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
38850 return true;
38851
38852 /* Between mask and general, we have moves no larger than word size. */
38853 if ((MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
38854 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
38855 return true;
38856
38857 /* ??? This is a lie. We do have moves between mmx/general, and for
38858 mmx/sse2. But by saying we need secondary memory we discourage the
38859 register allocator from using the mmx registers unless needed. */
38860 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
38861 return true;
38862
38863 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
38864 {
38865 /* SSE1 doesn't have any direct moves from other classes. */
38866 if (!TARGET_SSE2)
38867 return true;
38868
38869 /* If the target says that inter-unit moves are more expensive
38870 than moving through memory, then don't generate them. */
38871 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
38872 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
38873 return true;
38874
38875 /* Between SSE and general, we have moves no larger than word size. */
38876 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38877 return true;
38878 }
38879
38880 return false;
38881 }
38882
38883 /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
38884
38885 static bool
38886 ix86_secondary_memory_needed (machine_mode mode, reg_class_t class1,
38887 reg_class_t class2)
38888 {
38889 return inline_secondary_memory_needed (mode, class1, class2, true);
38890 }
38891
38892 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
38893
38894 get_secondary_mem widens integral modes to BITS_PER_WORD.
38895 There is no need to emit full 64 bit move on 64 bit targets
38896 for integral modes that can be moved using 32 bit move. */
38897
38898 static machine_mode
38899 ix86_secondary_memory_needed_mode (machine_mode mode)
38900 {
38901 if (GET_MODE_BITSIZE (mode) < 32 && INTEGRAL_MODE_P (mode))
38902 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
38903 return mode;
38904 }
38905
38906 /* Implement the TARGET_CLASS_MAX_NREGS hook.
38907
38908 On the 80386, this is the size of MODE in words,
38909 except in the FP regs, where a single reg is always enough. */
38910
38911 static unsigned char
38912 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
38913 {
38914 if (MAYBE_INTEGER_CLASS_P (rclass))
38915 {
38916 if (mode == XFmode)
38917 return (TARGET_64BIT ? 2 : 3);
38918 else if (mode == XCmode)
38919 return (TARGET_64BIT ? 4 : 6);
38920 else
38921 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
38922 }
38923 else
38924 {
38925 if (COMPLEX_MODE_P (mode))
38926 return 2;
38927 else
38928 return 1;
38929 }
38930 }
38931
38932 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
38933
38934 static bool
38935 ix86_can_change_mode_class (machine_mode from, machine_mode to,
38936 reg_class_t regclass)
38937 {
38938 if (from == to)
38939 return true;
38940
38941 /* x87 registers can't do subreg at all, as all values are reformatted
38942 to extended precision. */
38943 if (MAYBE_FLOAT_CLASS_P (regclass))
38944 return false;
38945
38946 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
38947 {
38948 /* Vector registers do not support QI or HImode loads. If we don't
38949 disallow a change to these modes, reload will assume it's ok to
38950 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
38951 the vec_dupv4hi pattern. */
38952 if (GET_MODE_SIZE (from) < 4)
38953 return false;
38954 }
38955
38956 return true;
38957 }
38958
38959 /* Return index of MODE in the sse load/store tables. */
38960
38961 static inline int
38962 sse_store_index (machine_mode mode)
38963 {
38964 switch (GET_MODE_SIZE (mode))
38965 {
38966 case 4:
38967 return 0;
38968 case 8:
38969 return 1;
38970 case 16:
38971 return 2;
38972 case 32:
38973 return 3;
38974 case 64:
38975 return 4;
38976 default:
38977 return -1;
38978 }
38979 }
38980
38981 /* Return the cost of moving data of mode M between a
38982 register and memory. A value of 2 is the default; this cost is
38983 relative to those in `REGISTER_MOVE_COST'.
38984
38985 This function is used extensively by register_move_cost that is used to
38986 build tables at startup. Make it inline in this case.
38987 When IN is 2, return maximum of in and out move cost.
38988
38989 If moving between registers and memory is more expensive than
38990 between two registers, you should define this macro to express the
38991 relative cost.
38992
38993 Model also increased moving costs of QImode registers in non
38994 Q_REGS classes.
38995 */
38996 static inline int
38997 inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
38998 int in)
38999 {
39000 int cost;
39001 if (FLOAT_CLASS_P (regclass))
39002 {
39003 int index;
39004 switch (mode)
39005 {
39006 case E_SFmode:
39007 index = 0;
39008 break;
39009 case E_DFmode:
39010 index = 1;
39011 break;
39012 case E_XFmode:
39013 index = 2;
39014 break;
39015 default:
39016 return 100;
39017 }
39018 if (in == 2)
39019 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
39020 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
39021 }
39022 if (SSE_CLASS_P (regclass))
39023 {
39024 int index = sse_store_index (mode);
39025 if (index == -1)
39026 return 100;
39027 if (in == 2)
39028 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
39029 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
39030 }
39031 if (MMX_CLASS_P (regclass))
39032 {
39033 int index;
39034 switch (GET_MODE_SIZE (mode))
39035 {
39036 case 4:
39037 index = 0;
39038 break;
39039 case 8:
39040 index = 1;
39041 break;
39042 default:
39043 return 100;
39044 }
39045 if (in)
39046 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
39047 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
39048 }
39049 switch (GET_MODE_SIZE (mode))
39050 {
39051 case 1:
39052 if (Q_CLASS_P (regclass) || TARGET_64BIT)
39053 {
39054 if (!in)
39055 return ix86_cost->int_store[0];
39056 if (TARGET_PARTIAL_REG_DEPENDENCY
39057 && optimize_function_for_speed_p (cfun))
39058 cost = ix86_cost->movzbl_load;
39059 else
39060 cost = ix86_cost->int_load[0];
39061 if (in == 2)
39062 return MAX (cost, ix86_cost->int_store[0]);
39063 return cost;
39064 }
39065 else
39066 {
39067 if (in == 2)
39068 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
39069 if (in)
39070 return ix86_cost->movzbl_load;
39071 else
39072 return ix86_cost->int_store[0] + 4;
39073 }
39074 break;
39075 case 2:
39076 if (in == 2)
39077 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
39078 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
39079 default:
39080 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
39081 if (mode == TFmode)
39082 mode = XFmode;
39083 if (in == 2)
39084 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
39085 else if (in)
39086 cost = ix86_cost->int_load[2];
39087 else
39088 cost = ix86_cost->int_store[2];
39089 return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD);
39090 }
39091 }
39092
39093 static int
39094 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass,
39095 bool in)
39096 {
39097 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
39098 }
39099
39100
39101 /* Return the cost of moving data from a register in class CLASS1 to
39102 one in class CLASS2.
39103
39104 It is not required that the cost always equal 2 when FROM is the same as TO;
39105 on some machines it is expensive to move between registers if they are not
39106 general registers. */
39107
39108 static int
39109 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
39110 reg_class_t class2_i)
39111 {
39112 enum reg_class class1 = (enum reg_class) class1_i;
39113 enum reg_class class2 = (enum reg_class) class2_i;
39114
39115 /* In case we require secondary memory, compute cost of the store followed
39116 by load. In order to avoid bad register allocation choices, we need
39117 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
39118
39119 if (inline_secondary_memory_needed (mode, class1, class2, false))
39120 {
39121 int cost = 1;
39122
39123 cost += inline_memory_move_cost (mode, class1, 2);
39124 cost += inline_memory_move_cost (mode, class2, 2);
39125
39126 /* In case of copying from general_purpose_register we may emit multiple
39127 stores followed by single load causing memory size mismatch stall.
39128 Count this as arbitrarily high cost of 20. */
39129 if (GET_MODE_BITSIZE (mode) > BITS_PER_WORD
39130 && TARGET_MEMORY_MISMATCH_STALL
39131 && targetm.class_max_nregs (class1, mode)
39132 > targetm.class_max_nregs (class2, mode))
39133 cost += 20;
39134
39135 /* In the case of FP/MMX moves, the registers actually overlap, and we
39136 have to switch modes in order to treat them differently. */
39137 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
39138 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
39139 cost += 20;
39140
39141 return cost;
39142 }
39143
39144 /* Moves between SSE/MMX and integer unit are expensive. */
39145 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
39146 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
39147
39148 /* ??? By keeping returned value relatively high, we limit the number
39149 of moves between integer and MMX/SSE registers for all targets.
39150 Additionally, high value prevents problem with x86_modes_tieable_p(),
39151 where integer modes in MMX/SSE registers are not tieable
39152 because of missing QImode and HImode moves to, from or between
39153 MMX/SSE registers. */
39154 return MAX (8, MMX_CLASS_P (class1) || MMX_CLASS_P (class2)
39155 ? ix86_cost->mmxsse_to_integer : ix86_cost->ssemmx_to_integer);
39156
39157 if (MAYBE_FLOAT_CLASS_P (class1))
39158 return ix86_cost->fp_move;
39159 if (MAYBE_SSE_CLASS_P (class1))
39160 {
39161 if (GET_MODE_BITSIZE (mode) <= 128)
39162 return ix86_cost->xmm_move;
39163 if (GET_MODE_BITSIZE (mode) <= 256)
39164 return ix86_cost->ymm_move;
39165 return ix86_cost->zmm_move;
39166 }
39167 if (MAYBE_MMX_CLASS_P (class1))
39168 return ix86_cost->mmx_move;
39169 return 2;
39170 }
39171
39172 /* Implement TARGET_HARD_REGNO_NREGS. This is ordinarily the length in
39173 words of a value of mode MODE but can be less for certain modes in
39174 special long registers.
39175
39176 Actually there are no two word move instructions for consecutive
39177 registers. And only registers 0-3 may have mov byte instructions
39178 applied to them. */
39179
39180 static unsigned int
39181 ix86_hard_regno_nregs (unsigned int regno, machine_mode mode)
39182 {
39183 if (GENERAL_REGNO_P (regno))
39184 {
39185 if (mode == XFmode)
39186 return TARGET_64BIT ? 2 : 3;
39187 if (mode == XCmode)
39188 return TARGET_64BIT ? 4 : 6;
39189 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
39190 }
39191 if (COMPLEX_MODE_P (mode))
39192 return 2;
39193 if (mode == V64SFmode || mode == V64SImode)
39194 return 4;
39195 return 1;
39196 }
39197
39198 /* Implement TARGET_HARD_REGNO_MODE_OK. */
39199
39200 static bool
39201 ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
39202 {
39203 /* Flags and only flags can only hold CCmode values. */
39204 if (CC_REGNO_P (regno))
39205 return GET_MODE_CLASS (mode) == MODE_CC;
39206 if (GET_MODE_CLASS (mode) == MODE_CC
39207 || GET_MODE_CLASS (mode) == MODE_RANDOM
39208 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
39209 return false;
39210 if (STACK_REGNO_P (regno))
39211 return VALID_FP_MODE_P (mode);
39212 if (MASK_REGNO_P (regno))
39213 return (VALID_MASK_REG_MODE (mode)
39214 || (TARGET_AVX512BW
39215 && VALID_MASK_AVX512BW_MODE (mode)));
39216 if (BND_REGNO_P (regno))
39217 return VALID_BND_REG_MODE (mode);
39218 if (SSE_REGNO_P (regno))
39219 {
39220 /* We implement the move patterns for all vector modes into and
39221 out of SSE registers, even when no operation instructions
39222 are available. */
39223
39224 /* For AVX-512 we allow, regardless of regno:
39225 - XI mode
39226 - any of 512-bit wide vector mode
39227 - any scalar mode. */
39228 if (TARGET_AVX512F
39229 && (mode == XImode
39230 || VALID_AVX512F_REG_MODE (mode)
39231 || VALID_AVX512F_SCALAR_MODE (mode)))
39232 return true;
39233
39234 /* For AVX-5124FMAPS allow V64SFmode for special regnos. */
39235 if ((TARGET_AVX5124FMAPS || TARGET_AVX5124VNNIW)
39236 && MOD4_SSE_REGNO_P (regno)
39237 && mode == V64SFmode)
39238 return true;
39239
39240 /* For AVX-5124VNNIW allow V64SImode for special regnos. */
39241 if ((TARGET_AVX5124FMAPS || TARGET_AVX5124VNNIW)
39242 && MOD4_SSE_REGNO_P (regno)
39243 && mode == V64SImode)
39244 return true;
39245
39246 /* TODO check for QI/HI scalars. */
39247 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
39248 if (TARGET_AVX512VL
39249 && (mode == OImode
39250 || mode == TImode
39251 || VALID_AVX256_REG_MODE (mode)
39252 || VALID_AVX512VL_128_REG_MODE (mode)))
39253 return true;
39254
39255 /* xmm16-xmm31 are only available for AVX-512. */
39256 if (EXT_REX_SSE_REGNO_P (regno))
39257 return false;
39258
39259 /* OImode and AVX modes are available only when AVX is enabled. */
39260 return ((TARGET_AVX
39261 && VALID_AVX256_REG_OR_OI_MODE (mode))
39262 || VALID_SSE_REG_MODE (mode)
39263 || VALID_SSE2_REG_MODE (mode)
39264 || VALID_MMX_REG_MODE (mode)
39265 || VALID_MMX_REG_MODE_3DNOW (mode));
39266 }
39267 if (MMX_REGNO_P (regno))
39268 {
39269 /* We implement the move patterns for 3DNOW modes even in MMX mode,
39270 so if the register is available at all, then we can move data of
39271 the given mode into or out of it. */
39272 return (VALID_MMX_REG_MODE (mode)
39273 || VALID_MMX_REG_MODE_3DNOW (mode));
39274 }
39275
39276 if (mode == QImode)
39277 {
39278 /* Take care for QImode values - they can be in non-QI regs,
39279 but then they do cause partial register stalls. */
39280 if (ANY_QI_REGNO_P (regno))
39281 return true;
39282 if (!TARGET_PARTIAL_REG_STALL)
39283 return true;
39284 /* LRA checks if the hard register is OK for the given mode.
39285 QImode values can live in non-QI regs, so we allow all
39286 registers here. */
39287 if (lra_in_progress)
39288 return true;
39289 return !can_create_pseudo_p ();
39290 }
39291 /* We handle both integer and floats in the general purpose registers. */
39292 else if (VALID_INT_MODE_P (mode))
39293 return true;
39294 else if (VALID_FP_MODE_P (mode))
39295 return true;
39296 else if (VALID_DFP_MODE_P (mode))
39297 return true;
39298 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
39299 on to use that value in smaller contexts, this can easily force a
39300 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
39301 supporting DImode, allow it. */
39302 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
39303 return true;
39304
39305 return false;
39306 }
39307
39308 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The only ABI that
39309 saves SSE registers across calls is Win64 (thus no need to check the
39310 current ABI here), and with AVX enabled Win64 only guarantees that
39311 the low 16 bytes are saved. */
39312
39313 static bool
39314 ix86_hard_regno_call_part_clobbered (unsigned int regno, machine_mode mode)
39315 {
39316 return SSE_REGNO_P (regno) && GET_MODE_SIZE (mode) > 16;
39317 }
39318
39319 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
39320 tieable integer mode. */
39321
39322 static bool
39323 ix86_tieable_integer_mode_p (machine_mode mode)
39324 {
39325 switch (mode)
39326 {
39327 case E_HImode:
39328 case E_SImode:
39329 return true;
39330
39331 case E_QImode:
39332 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
39333
39334 case E_DImode:
39335 return TARGET_64BIT;
39336
39337 default:
39338 return false;
39339 }
39340 }
39341
39342 /* Implement TARGET_MODES_TIEABLE_P.
39343
39344 Return true if MODE1 is accessible in a register that can hold MODE2
39345 without copying. That is, all register classes that can hold MODE2
39346 can also hold MODE1. */
39347
39348 static bool
39349 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
39350 {
39351 if (mode1 == mode2)
39352 return true;
39353
39354 if (ix86_tieable_integer_mode_p (mode1)
39355 && ix86_tieable_integer_mode_p (mode2))
39356 return true;
39357
39358 /* MODE2 being XFmode implies fp stack or general regs, which means we
39359 can tie any smaller floating point modes to it. Note that we do not
39360 tie this with TFmode. */
39361 if (mode2 == XFmode)
39362 return mode1 == SFmode || mode1 == DFmode;
39363
39364 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
39365 that we can tie it with SFmode. */
39366 if (mode2 == DFmode)
39367 return mode1 == SFmode;
39368
39369 /* If MODE2 is only appropriate for an SSE register, then tie with
39370 any other mode acceptable to SSE registers. */
39371 if (GET_MODE_SIZE (mode2) == 32
39372 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
39373 return (GET_MODE_SIZE (mode1) == 32
39374 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
39375 if (GET_MODE_SIZE (mode2) == 16
39376 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
39377 return (GET_MODE_SIZE (mode1) == 16
39378 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
39379
39380 /* If MODE2 is appropriate for an MMX register, then tie
39381 with any other mode acceptable to MMX registers. */
39382 if (GET_MODE_SIZE (mode2) == 8
39383 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
39384 return (GET_MODE_SIZE (mode1) == 8
39385 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
39386
39387 return false;
39388 }
39389
39390 /* Return the cost of moving between two registers of mode MODE. */
39391
39392 static int
39393 ix86_set_reg_reg_cost (machine_mode mode)
39394 {
39395 unsigned int units = UNITS_PER_WORD;
39396
39397 switch (GET_MODE_CLASS (mode))
39398 {
39399 default:
39400 break;
39401
39402 case MODE_CC:
39403 units = GET_MODE_SIZE (CCmode);
39404 break;
39405
39406 case MODE_FLOAT:
39407 if ((TARGET_SSE && mode == TFmode)
39408 || (TARGET_80387 && mode == XFmode)
39409 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
39410 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
39411 units = GET_MODE_SIZE (mode);
39412 break;
39413
39414 case MODE_COMPLEX_FLOAT:
39415 if ((TARGET_SSE && mode == TCmode)
39416 || (TARGET_80387 && mode == XCmode)
39417 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
39418 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
39419 units = GET_MODE_SIZE (mode);
39420 break;
39421
39422 case MODE_VECTOR_INT:
39423 case MODE_VECTOR_FLOAT:
39424 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
39425 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
39426 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
39427 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
39428 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
39429 units = GET_MODE_SIZE (mode);
39430 }
39431
39432 /* Return the cost of moving between two registers of mode MODE,
39433 assuming that the move will be in pieces of at most UNITS bytes. */
39434 return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units));
39435 }
39436
39437 /* Return cost of vector operation in MODE given that scalar version has
39438 COST. If PARALLEL is true assume that CPU has more than one unit
39439 performing the operation. */
39440
39441 static int
39442 ix86_vec_cost (machine_mode mode, int cost, bool parallel)
39443 {
39444 if (!VECTOR_MODE_P (mode))
39445 return cost;
39446
39447 if (!parallel)
39448 return cost * GET_MODE_NUNITS (mode);
39449 if (GET_MODE_BITSIZE (mode) == 128
39450 && TARGET_SSE_SPLIT_REGS)
39451 return cost * 2;
39452 if (GET_MODE_BITSIZE (mode) > 128
39453 && TARGET_AVX128_OPTIMAL)
39454 return cost * GET_MODE_BITSIZE (mode) / 128;
39455 return cost;
39456 }
39457
39458 /* Return cost of multiplication in MODE. */
39459
39460 static int
39461 ix86_multiplication_cost (const struct processor_costs *cost,
39462 enum machine_mode mode)
39463 {
39464 machine_mode inner_mode = mode;
39465 if (VECTOR_MODE_P (mode))
39466 inner_mode = GET_MODE_INNER (mode);
39467
39468 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
39469 return inner_mode == DFmode ? cost->mulsd : cost->mulss;
39470 else if (X87_FLOAT_MODE_P (mode))
39471 return cost->fmul;
39472 else if (FLOAT_MODE_P (mode))
39473 return ix86_vec_cost (mode,
39474 inner_mode == DFmode
39475 ? cost->mulsd : cost->mulss, true);
39476 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
39477 {
39478 /* V*QImode is emulated with 7-13 insns. */
39479 if (mode == V16QImode || mode == V32QImode)
39480 {
39481 int extra = 11;
39482 if (TARGET_XOP && mode == V16QImode)
39483 extra = 5;
39484 else if (TARGET_SSSE3)
39485 extra = 6;
39486 return ix86_vec_cost (mode,
39487 cost->mulss * 2 + cost->sse_op * extra,
39488 true);
39489 }
39490 /* V*DImode is emulated with 5-8 insns. */
39491 else if (mode == V2DImode || mode == V4DImode)
39492 {
39493 if (TARGET_XOP && mode == V2DImode)
39494 return ix86_vec_cost (mode,
39495 cost->mulss * 2 + cost->sse_op * 3,
39496 true);
39497 else
39498 return ix86_vec_cost (mode,
39499 cost->mulss * 3 + cost->sse_op * 5,
39500 true);
39501 }
39502 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
39503 insns, including two PMULUDQ. */
39504 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
39505 return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5,
39506 true);
39507 else
39508 return ix86_vec_cost (mode, cost->mulss, true);
39509 }
39510 else
39511 return (cost->mult_init[MODE_INDEX (mode)] + cost->mult_bit * 7);
39512 }
39513
39514 /* Return cost of multiplication in MODE. */
39515
39516 static int
39517 ix86_division_cost (const struct processor_costs *cost,
39518 enum machine_mode mode)
39519 {
39520 machine_mode inner_mode = mode;
39521 if (VECTOR_MODE_P (mode))
39522 inner_mode = GET_MODE_INNER (mode);
39523
39524 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
39525 return inner_mode == DFmode ? cost->divsd : cost->divss;
39526 else if (X87_FLOAT_MODE_P (mode))
39527 return cost->fdiv;
39528 else if (FLOAT_MODE_P (mode))
39529 return ix86_vec_cost (mode,
39530 inner_mode == DFmode ? cost->divsd : cost->divss,
39531 true);
39532 else
39533 return cost->divide[MODE_INDEX (mode)];
39534 }
39535
39536 /* Return cost of shift in MODE.
39537 If CONSTANT_OP1 is true, the op1 value is known and set in OP1_VAL.
39538 AND_IN_OP1 specify in op1 is result of and and SHIFT_AND_TRUNCATE
39539 if op1 is a result of subreg.
39540
39541 SKIP_OP0/1 is set to true if cost of OP0/1 should be ignored. */
39542
39543 static int
39544 ix86_shift_rotate_cost (const struct processor_costs *cost,
39545 enum machine_mode mode, bool constant_op1,
39546 HOST_WIDE_INT op1_val,
39547 bool speed,
39548 bool and_in_op1,
39549 bool shift_and_truncate,
39550 bool *skip_op0, bool *skip_op1)
39551 {
39552 if (skip_op0)
39553 *skip_op0 = *skip_op1 = false;
39554 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
39555 {
39556 /* V*QImode is emulated with 1-11 insns. */
39557 if (mode == V16QImode || mode == V32QImode)
39558 {
39559 int count = 11;
39560 if (TARGET_XOP && mode == V16QImode)
39561 {
39562 /* For XOP we use vpshab, which requires a broadcast of the
39563 value to the variable shift insn. For constants this
39564 means a V16Q const in mem; even when we can perform the
39565 shift with one insn set the cost to prefer paddb. */
39566 if (constant_op1)
39567 {
39568 if (skip_op1)
39569 *skip_op1 = true;
39570 return ix86_vec_cost (mode,
39571 cost->sse_op
39572 + (speed
39573 ? 2
39574 : COSTS_N_BYTES
39575 (GET_MODE_UNIT_SIZE (mode))), true);
39576 }
39577 count = 3;
39578 }
39579 else if (TARGET_SSSE3)
39580 count = 7;
39581 return ix86_vec_cost (mode, cost->sse_op * count, true);
39582 }
39583 else
39584 return ix86_vec_cost (mode, cost->sse_op, true);
39585 }
39586 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
39587 {
39588 if (constant_op1)
39589 {
39590 if (op1_val > 32)
39591 return cost->shift_const + COSTS_N_INSNS (2);
39592 else
39593 return cost->shift_const * 2;
39594 }
39595 else
39596 {
39597 if (and_in_op1)
39598 return cost->shift_var * 2;
39599 else
39600 return cost->shift_var * 6 + COSTS_N_INSNS (2);
39601 }
39602 }
39603 else
39604 {
39605 if (constant_op1)
39606 return cost->shift_const;
39607 else if (shift_and_truncate)
39608 {
39609 if (skip_op0)
39610 *skip_op0 = *skip_op1 = true;
39611 /* Return the cost after shift-and truncation. */
39612 return cost->shift_var;
39613 }
39614 else
39615 return cost->shift_var;
39616 }
39617 return cost->shift_const;
39618 }
39619
39620 /* Compute a (partial) cost for rtx X. Return true if the complete
39621 cost has been computed, and false if subexpressions should be
39622 scanned. In either case, *TOTAL contains the cost result. */
39623
39624 static bool
39625 ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
39626 int *total, bool speed)
39627 {
39628 rtx mask;
39629 enum rtx_code code = GET_CODE (x);
39630 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
39631 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
39632 int src_cost;
39633
39634 switch (code)
39635 {
39636 case SET:
39637 if (register_operand (SET_DEST (x), VOIDmode)
39638 && reg_or_0_operand (SET_SRC (x), VOIDmode))
39639 {
39640 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
39641 return true;
39642 }
39643
39644 if (register_operand (SET_SRC (x), VOIDmode))
39645 /* Avoid potentially incorrect high cost from rtx_costs
39646 for non-tieable SUBREGs. */
39647 src_cost = 0;
39648 else
39649 {
39650 src_cost = rtx_cost (SET_SRC (x), mode, SET, 1, speed);
39651
39652 if (CONSTANT_P (SET_SRC (x)))
39653 /* Constant costs assume a base value of COSTS_N_INSNS (1) and add
39654 a small value, possibly zero for cheap constants. */
39655 src_cost += COSTS_N_INSNS (1);
39656 }
39657
39658 *total = src_cost + rtx_cost (SET_DEST (x), mode, SET, 0, speed);
39659 return true;
39660
39661 case CONST_INT:
39662 case CONST:
39663 case LABEL_REF:
39664 case SYMBOL_REF:
39665 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
39666 *total = 3;
39667 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
39668 *total = 2;
39669 else if (flag_pic && SYMBOLIC_CONST (x)
39670 && !(TARGET_64BIT
39671 && (GET_CODE (x) == LABEL_REF
39672 || (GET_CODE (x) == SYMBOL_REF
39673 && SYMBOL_REF_LOCAL_P (x))))
39674 /* Use 0 cost for CONST to improve its propagation. */
39675 && (TARGET_64BIT || GET_CODE (x) != CONST))
39676 *total = 1;
39677 else
39678 *total = 0;
39679 return true;
39680
39681 case CONST_DOUBLE:
39682 if (IS_STACK_MODE (mode))
39683 switch (standard_80387_constant_p (x))
39684 {
39685 case -1:
39686 case 0:
39687 break;
39688 case 1: /* 0.0 */
39689 *total = 1;
39690 return true;
39691 default: /* Other constants */
39692 *total = 2;
39693 return true;
39694 }
39695 /* FALLTHRU */
39696
39697 case CONST_VECTOR:
39698 switch (standard_sse_constant_p (x, mode))
39699 {
39700 case 0:
39701 break;
39702 case 1: /* 0: xor eliminates false dependency */
39703 *total = 0;
39704 return true;
39705 default: /* -1: cmp contains false dependency */
39706 *total = 1;
39707 return true;
39708 }
39709 /* FALLTHRU */
39710
39711 case CONST_WIDE_INT:
39712 /* Fall back to (MEM (SYMBOL_REF)), since that's where
39713 it'll probably end up. Add a penalty for size. */
39714 *total = (COSTS_N_INSNS (1)
39715 + (!TARGET_64BIT && flag_pic)
39716 + (GET_MODE_SIZE (mode) <= 4
39717 ? 0 : GET_MODE_SIZE (mode) <= 8 ? 1 : 2));
39718 return true;
39719
39720 case ZERO_EXTEND:
39721 /* The zero extensions is often completely free on x86_64, so make
39722 it as cheap as possible. */
39723 if (TARGET_64BIT && mode == DImode
39724 && GET_MODE (XEXP (x, 0)) == SImode)
39725 *total = 1;
39726 else if (TARGET_ZERO_EXTEND_WITH_AND)
39727 *total = cost->add;
39728 else
39729 *total = cost->movzx;
39730 return false;
39731
39732 case SIGN_EXTEND:
39733 *total = cost->movsx;
39734 return false;
39735
39736 case ASHIFT:
39737 if (SCALAR_INT_MODE_P (mode)
39738 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
39739 && CONST_INT_P (XEXP (x, 1)))
39740 {
39741 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
39742 if (value == 1)
39743 {
39744 *total = cost->add;
39745 return false;
39746 }
39747 if ((value == 2 || value == 3)
39748 && cost->lea <= cost->shift_const)
39749 {
39750 *total = cost->lea;
39751 return false;
39752 }
39753 }
39754 /* FALLTHRU */
39755
39756 case ROTATE:
39757 case ASHIFTRT:
39758 case LSHIFTRT:
39759 case ROTATERT:
39760 bool skip_op0, skip_op1;
39761 *total = ix86_shift_rotate_cost (cost, mode, CONSTANT_P (XEXP (x, 1)),
39762 CONST_INT_P (XEXP (x, 1))
39763 ? INTVAL (XEXP (x, 1)) : -1,
39764 speed,
39765 GET_CODE (XEXP (x, 1)) == AND,
39766 SUBREG_P (XEXP (x, 1))
39767 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND,
39768 &skip_op0, &skip_op1);
39769 if (skip_op0 || skip_op1)
39770 {
39771 if (!skip_op0)
39772 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
39773 if (!skip_op1)
39774 *total += rtx_cost (XEXP (x, 1), mode, code, 0, speed);
39775 return true;
39776 }
39777 return false;
39778
39779 case FMA:
39780 {
39781 rtx sub;
39782
39783 gcc_assert (FLOAT_MODE_P (mode));
39784 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
39785
39786 *total = ix86_vec_cost (mode,
39787 mode == SFmode ? cost->fmass : cost->fmasd,
39788 true);
39789 *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed);
39790
39791 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
39792 sub = XEXP (x, 0);
39793 if (GET_CODE (sub) == NEG)
39794 sub = XEXP (sub, 0);
39795 *total += rtx_cost (sub, mode, FMA, 0, speed);
39796
39797 sub = XEXP (x, 2);
39798 if (GET_CODE (sub) == NEG)
39799 sub = XEXP (sub, 0);
39800 *total += rtx_cost (sub, mode, FMA, 2, speed);
39801 return true;
39802 }
39803
39804 case MULT:
39805 if (!FLOAT_MODE_P (mode) && !VECTOR_MODE_P (mode))
39806 {
39807 rtx op0 = XEXP (x, 0);
39808 rtx op1 = XEXP (x, 1);
39809 int nbits;
39810 if (CONST_INT_P (XEXP (x, 1)))
39811 {
39812 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
39813 for (nbits = 0; value != 0; value &= value - 1)
39814 nbits++;
39815 }
39816 else
39817 /* This is arbitrary. */
39818 nbits = 7;
39819
39820 /* Compute costs correctly for widening multiplication. */
39821 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
39822 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
39823 == GET_MODE_SIZE (mode))
39824 {
39825 int is_mulwiden = 0;
39826 machine_mode inner_mode = GET_MODE (op0);
39827
39828 if (GET_CODE (op0) == GET_CODE (op1))
39829 is_mulwiden = 1, op1 = XEXP (op1, 0);
39830 else if (CONST_INT_P (op1))
39831 {
39832 if (GET_CODE (op0) == SIGN_EXTEND)
39833 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
39834 == INTVAL (op1);
39835 else
39836 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
39837 }
39838
39839 if (is_mulwiden)
39840 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
39841 }
39842
39843 *total = (cost->mult_init[MODE_INDEX (mode)]
39844 + nbits * cost->mult_bit
39845 + rtx_cost (op0, mode, outer_code, opno, speed)
39846 + rtx_cost (op1, mode, outer_code, opno, speed));
39847
39848 return true;
39849 }
39850 *total = ix86_multiplication_cost (cost, mode);
39851 return false;
39852
39853 case DIV:
39854 case UDIV:
39855 case MOD:
39856 case UMOD:
39857 *total = ix86_division_cost (cost, mode);
39858 return false;
39859
39860 case PLUS:
39861 if (GET_MODE_CLASS (mode) == MODE_INT
39862 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
39863 {
39864 if (GET_CODE (XEXP (x, 0)) == PLUS
39865 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
39866 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
39867 && CONSTANT_P (XEXP (x, 1)))
39868 {
39869 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
39870 if (val == 2 || val == 4 || val == 8)
39871 {
39872 *total = cost->lea;
39873 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
39874 outer_code, opno, speed);
39875 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
39876 outer_code, opno, speed);
39877 *total += rtx_cost (XEXP (x, 1), mode,
39878 outer_code, opno, speed);
39879 return true;
39880 }
39881 }
39882 else if (GET_CODE (XEXP (x, 0)) == MULT
39883 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
39884 {
39885 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
39886 if (val == 2 || val == 4 || val == 8)
39887 {
39888 *total = cost->lea;
39889 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
39890 outer_code, opno, speed);
39891 *total += rtx_cost (XEXP (x, 1), mode,
39892 outer_code, opno, speed);
39893 return true;
39894 }
39895 }
39896 else if (GET_CODE (XEXP (x, 0)) == PLUS)
39897 {
39898 /* Add with carry, ignore the cost of adding a carry flag. */
39899 if (ix86_carry_flag_operator (XEXP (XEXP (x, 0), 0), mode))
39900 *total = cost->add;
39901 else
39902 {
39903 *total = cost->lea;
39904 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
39905 outer_code, opno, speed);
39906 }
39907
39908 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
39909 outer_code, opno, speed);
39910 *total += rtx_cost (XEXP (x, 1), mode,
39911 outer_code, opno, speed);
39912 return true;
39913 }
39914 }
39915 /* FALLTHRU */
39916
39917 case MINUS:
39918 /* Subtract with borrow, ignore the cost of subtracting a carry flag. */
39919 if (GET_MODE_CLASS (mode) == MODE_INT
39920 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
39921 && GET_CODE (XEXP (x, 0)) == MINUS
39922 && ix86_carry_flag_operator (XEXP (XEXP (x, 0), 1), mode))
39923 {
39924 *total = cost->add;
39925 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
39926 outer_code, opno, speed);
39927 *total += rtx_cost (XEXP (x, 1), mode,
39928 outer_code, opno, speed);
39929 return true;
39930 }
39931
39932 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
39933 {
39934 *total = cost->addss;
39935 return false;
39936 }
39937 else if (X87_FLOAT_MODE_P (mode))
39938 {
39939 *total = cost->fadd;
39940 return false;
39941 }
39942 else if (FLOAT_MODE_P (mode))
39943 {
39944 *total = ix86_vec_cost (mode, cost->addss, true);
39945 return false;
39946 }
39947 /* FALLTHRU */
39948
39949 case AND:
39950 case IOR:
39951 case XOR:
39952 if (GET_MODE_CLASS (mode) == MODE_INT
39953 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
39954 {
39955 *total = (cost->add * 2
39956 + (rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
39957 << (GET_MODE (XEXP (x, 0)) != DImode))
39958 + (rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed)
39959 << (GET_MODE (XEXP (x, 1)) != DImode)));
39960 return true;
39961 }
39962 /* FALLTHRU */
39963
39964 case NEG:
39965 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
39966 {
39967 *total = cost->sse_op;
39968 return false;
39969 }
39970 else if (X87_FLOAT_MODE_P (mode))
39971 {
39972 *total = cost->fchs;
39973 return false;
39974 }
39975 else if (FLOAT_MODE_P (mode))
39976 {
39977 *total = ix86_vec_cost (mode, cost->sse_op, true);
39978 return false;
39979 }
39980 /* FALLTHRU */
39981
39982 case NOT:
39983 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
39984 *total = ix86_vec_cost (mode, cost->sse_op, true);
39985 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
39986 *total = cost->add * 2;
39987 else
39988 *total = cost->add;
39989 return false;
39990
39991 case COMPARE:
39992 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
39993 && XEXP (XEXP (x, 0), 1) == const1_rtx
39994 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
39995 && XEXP (x, 1) == const0_rtx)
39996 {
39997 /* This kind of construct is implemented using test[bwl].
39998 Treat it as if we had an AND. */
39999 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
40000 *total = (cost->add
40001 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, outer_code,
40002 opno, speed)
40003 + rtx_cost (const1_rtx, mode, outer_code, opno, speed));
40004 return true;
40005 }
40006
40007 /* The embedded comparison operand is completely free. */
40008 if (!general_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0)))
40009 && XEXP (x, 1) == const0_rtx)
40010 *total = 0;
40011
40012 return false;
40013
40014 case FLOAT_EXTEND:
40015 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
40016 *total = 0;
40017 else
40018 *total = ix86_vec_cost (mode, cost->addss, true);
40019 return false;
40020
40021 case FLOAT_TRUNCATE:
40022 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
40023 *total = cost->fadd;
40024 else
40025 *total = ix86_vec_cost (mode, cost->addss, true);
40026 return false;
40027
40028 case ABS:
40029 /* SSE requires memory load for the constant operand. It may make
40030 sense to account for this. Of course the constant operand may or
40031 may not be reused. */
40032 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
40033 *total = cost->sse_op;
40034 else if (X87_FLOAT_MODE_P (mode))
40035 *total = cost->fabs;
40036 else if (FLOAT_MODE_P (mode))
40037 *total = ix86_vec_cost (mode, cost->sse_op, true);
40038 return false;
40039
40040 case SQRT:
40041 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
40042 *total = mode == SFmode ? cost->sqrtss : cost->sqrtsd;
40043 else if (X87_FLOAT_MODE_P (mode))
40044 *total = cost->fsqrt;
40045 else if (FLOAT_MODE_P (mode))
40046 *total = ix86_vec_cost (mode,
40047 mode == SFmode ? cost->sqrtss : cost->sqrtsd,
40048 true);
40049 return false;
40050
40051 case UNSPEC:
40052 if (XINT (x, 1) == UNSPEC_TP)
40053 *total = 0;
40054 return false;
40055
40056 case VEC_SELECT:
40057 case VEC_CONCAT:
40058 case VEC_DUPLICATE:
40059 /* ??? Assume all of these vector manipulation patterns are
40060 recognizable. In which case they all pretty much have the
40061 same cost. */
40062 *total = cost->sse_op;
40063 return true;
40064 case VEC_MERGE:
40065 mask = XEXP (x, 2);
40066 /* This is masked instruction, assume the same cost,
40067 as nonmasked variant. */
40068 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
40069 *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed);
40070 else
40071 *total = cost->sse_op;
40072 return true;
40073
40074 default:
40075 return false;
40076 }
40077 }
40078
40079 #if TARGET_MACHO
40080
40081 static int current_machopic_label_num;
40082
40083 /* Given a symbol name and its associated stub, write out the
40084 definition of the stub. */
40085
40086 void
40087 machopic_output_stub (FILE *file, const char *symb, const char *stub)
40088 {
40089 unsigned int length;
40090 char *binder_name, *symbol_name, lazy_ptr_name[32];
40091 int label = ++current_machopic_label_num;
40092
40093 /* For 64-bit we shouldn't get here. */
40094 gcc_assert (!TARGET_64BIT);
40095
40096 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
40097 symb = targetm.strip_name_encoding (symb);
40098
40099 length = strlen (stub);
40100 binder_name = XALLOCAVEC (char, length + 32);
40101 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
40102
40103 length = strlen (symb);
40104 symbol_name = XALLOCAVEC (char, length + 32);
40105 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
40106
40107 sprintf (lazy_ptr_name, "L%d$lz", label);
40108
40109 if (MACHOPIC_ATT_STUB)
40110 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
40111 else if (MACHOPIC_PURE)
40112 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
40113 else
40114 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
40115
40116 fprintf (file, "%s:\n", stub);
40117 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
40118
40119 if (MACHOPIC_ATT_STUB)
40120 {
40121 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
40122 }
40123 else if (MACHOPIC_PURE)
40124 {
40125 /* PIC stub. */
40126 /* 25-byte PIC stub using "CALL get_pc_thunk". */
40127 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
40128 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
40129 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
40130 label, lazy_ptr_name, label);
40131 fprintf (file, "\tjmp\t*%%ecx\n");
40132 }
40133 else
40134 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
40135
40136 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
40137 it needs no stub-binding-helper. */
40138 if (MACHOPIC_ATT_STUB)
40139 return;
40140
40141 fprintf (file, "%s:\n", binder_name);
40142
40143 if (MACHOPIC_PURE)
40144 {
40145 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
40146 fprintf (file, "\tpushl\t%%ecx\n");
40147 }
40148 else
40149 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
40150
40151 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
40152
40153 /* N.B. Keep the correspondence of these
40154 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
40155 old-pic/new-pic/non-pic stubs; altering this will break
40156 compatibility with existing dylibs. */
40157 if (MACHOPIC_PURE)
40158 {
40159 /* 25-byte PIC stub using "CALL get_pc_thunk". */
40160 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
40161 }
40162 else
40163 /* 16-byte -mdynamic-no-pic stub. */
40164 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
40165
40166 fprintf (file, "%s:\n", lazy_ptr_name);
40167 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
40168 fprintf (file, ASM_LONG "%s\n", binder_name);
40169 }
40170 #endif /* TARGET_MACHO */
40171
40172 /* Order the registers for register allocator. */
40173
40174 void
40175 x86_order_regs_for_local_alloc (void)
40176 {
40177 int pos = 0;
40178 int i;
40179
40180 /* First allocate the local general purpose registers. */
40181 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
40182 if (GENERAL_REGNO_P (i) && call_used_regs[i])
40183 reg_alloc_order [pos++] = i;
40184
40185 /* Global general purpose registers. */
40186 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
40187 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
40188 reg_alloc_order [pos++] = i;
40189
40190 /* x87 registers come first in case we are doing FP math
40191 using them. */
40192 if (!TARGET_SSE_MATH)
40193 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
40194 reg_alloc_order [pos++] = i;
40195
40196 /* SSE registers. */
40197 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
40198 reg_alloc_order [pos++] = i;
40199 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
40200 reg_alloc_order [pos++] = i;
40201
40202 /* Extended REX SSE registers. */
40203 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
40204 reg_alloc_order [pos++] = i;
40205
40206 /* Mask register. */
40207 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
40208 reg_alloc_order [pos++] = i;
40209
40210 /* MPX bound registers. */
40211 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
40212 reg_alloc_order [pos++] = i;
40213
40214 /* x87 registers. */
40215 if (TARGET_SSE_MATH)
40216 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
40217 reg_alloc_order [pos++] = i;
40218
40219 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
40220 reg_alloc_order [pos++] = i;
40221
40222 /* Initialize the rest of array as we do not allocate some registers
40223 at all. */
40224 while (pos < FIRST_PSEUDO_REGISTER)
40225 reg_alloc_order [pos++] = 0;
40226 }
40227
40228 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
40229 in struct attribute_spec handler. */
40230 static tree
40231 ix86_handle_callee_pop_aggregate_return (tree *node, tree name, tree args, int,
40232 bool *no_add_attrs)
40233 {
40234 if (TREE_CODE (*node) != FUNCTION_TYPE
40235 && TREE_CODE (*node) != METHOD_TYPE
40236 && TREE_CODE (*node) != FIELD_DECL
40237 && TREE_CODE (*node) != TYPE_DECL)
40238 {
40239 warning (OPT_Wattributes, "%qE attribute only applies to functions",
40240 name);
40241 *no_add_attrs = true;
40242 return NULL_TREE;
40243 }
40244 if (TARGET_64BIT)
40245 {
40246 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
40247 name);
40248 *no_add_attrs = true;
40249 return NULL_TREE;
40250 }
40251 if (is_attribute_p ("callee_pop_aggregate_return", name))
40252 {
40253 tree cst;
40254
40255 cst = TREE_VALUE (args);
40256 if (TREE_CODE (cst) != INTEGER_CST)
40257 {
40258 warning (OPT_Wattributes,
40259 "%qE attribute requires an integer constant argument",
40260 name);
40261 *no_add_attrs = true;
40262 }
40263 else if (compare_tree_int (cst, 0) != 0
40264 && compare_tree_int (cst, 1) != 0)
40265 {
40266 warning (OPT_Wattributes,
40267 "argument to %qE attribute is neither zero, nor one",
40268 name);
40269 *no_add_attrs = true;
40270 }
40271
40272 return NULL_TREE;
40273 }
40274
40275 return NULL_TREE;
40276 }
40277
40278 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
40279 struct attribute_spec.handler. */
40280 static tree
40281 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
40282 bool *no_add_attrs)
40283 {
40284 if (TREE_CODE (*node) != FUNCTION_TYPE
40285 && TREE_CODE (*node) != METHOD_TYPE
40286 && TREE_CODE (*node) != FIELD_DECL
40287 && TREE_CODE (*node) != TYPE_DECL)
40288 {
40289 warning (OPT_Wattributes, "%qE attribute only applies to functions",
40290 name);
40291 *no_add_attrs = true;
40292 return NULL_TREE;
40293 }
40294
40295 /* Can combine regparm with all attributes but fastcall. */
40296 if (is_attribute_p ("ms_abi", name))
40297 {
40298 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
40299 {
40300 error ("ms_abi and sysv_abi attributes are not compatible");
40301 }
40302
40303 return NULL_TREE;
40304 }
40305 else if (is_attribute_p ("sysv_abi", name))
40306 {
40307 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
40308 {
40309 error ("ms_abi and sysv_abi attributes are not compatible");
40310 }
40311
40312 return NULL_TREE;
40313 }
40314
40315 return NULL_TREE;
40316 }
40317
40318 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
40319 struct attribute_spec.handler. */
40320 static tree
40321 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
40322 bool *no_add_attrs)
40323 {
40324 tree *type = NULL;
40325 if (DECL_P (*node))
40326 {
40327 if (TREE_CODE (*node) == TYPE_DECL)
40328 type = &TREE_TYPE (*node);
40329 }
40330 else
40331 type = node;
40332
40333 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
40334 {
40335 warning (OPT_Wattributes, "%qE attribute ignored",
40336 name);
40337 *no_add_attrs = true;
40338 }
40339
40340 else if ((is_attribute_p ("ms_struct", name)
40341 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
40342 || ((is_attribute_p ("gcc_struct", name)
40343 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
40344 {
40345 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
40346 name);
40347 *no_add_attrs = true;
40348 }
40349
40350 return NULL_TREE;
40351 }
40352
40353 static tree
40354 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
40355 bool *no_add_attrs)
40356 {
40357 if (TREE_CODE (*node) != FUNCTION_DECL)
40358 {
40359 warning (OPT_Wattributes, "%qE attribute only applies to functions",
40360 name);
40361 *no_add_attrs = true;
40362 }
40363 return NULL_TREE;
40364 }
40365
40366 static tree
40367 ix86_handle_no_caller_saved_registers_attribute (tree *, tree, tree,
40368 int, bool *)
40369 {
40370 return NULL_TREE;
40371 }
40372
40373 static tree
40374 ix86_handle_interrupt_attribute (tree *node, tree, tree, int, bool *)
40375 {
40376 /* DECL_RESULT and DECL_ARGUMENTS do not exist there yet,
40377 but the function type contains args and return type data. */
40378 tree func_type = *node;
40379 tree return_type = TREE_TYPE (func_type);
40380
40381 int nargs = 0;
40382 tree current_arg_type = TYPE_ARG_TYPES (func_type);
40383 while (current_arg_type
40384 && ! VOID_TYPE_P (TREE_VALUE (current_arg_type)))
40385 {
40386 if (nargs == 0)
40387 {
40388 if (! POINTER_TYPE_P (TREE_VALUE (current_arg_type)))
40389 error ("interrupt service routine should have a pointer "
40390 "as the first argument");
40391 }
40392 else if (nargs == 1)
40393 {
40394 if (TREE_CODE (TREE_VALUE (current_arg_type)) != INTEGER_TYPE
40395 || TYPE_MODE (TREE_VALUE (current_arg_type)) != word_mode)
40396 error ("interrupt service routine should have unsigned %s"
40397 "int as the second argument",
40398 TARGET_64BIT
40399 ? (TARGET_X32 ? "long long " : "long ")
40400 : "");
40401 }
40402 nargs++;
40403 current_arg_type = TREE_CHAIN (current_arg_type);
40404 }
40405 if (!nargs || nargs > 2)
40406 error ("interrupt service routine can only have a pointer argument "
40407 "and an optional integer argument");
40408 if (! VOID_TYPE_P (return_type))
40409 error ("interrupt service routine can't have non-void return value");
40410
40411 return NULL_TREE;
40412 }
40413
40414 static bool
40415 ix86_ms_bitfield_layout_p (const_tree record_type)
40416 {
40417 return ((TARGET_MS_BITFIELD_LAYOUT
40418 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
40419 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
40420 }
40421
40422 /* Returns an expression indicating where the this parameter is
40423 located on entry to the FUNCTION. */
40424
40425 static rtx
40426 x86_this_parameter (tree function)
40427 {
40428 tree type = TREE_TYPE (function);
40429 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
40430 int nregs;
40431
40432 if (TARGET_64BIT)
40433 {
40434 const int *parm_regs;
40435
40436 if (ix86_function_type_abi (type) == MS_ABI)
40437 parm_regs = x86_64_ms_abi_int_parameter_registers;
40438 else
40439 parm_regs = x86_64_int_parameter_registers;
40440 return gen_rtx_REG (Pmode, parm_regs[aggr]);
40441 }
40442
40443 nregs = ix86_function_regparm (type, function);
40444
40445 if (nregs > 0 && !stdarg_p (type))
40446 {
40447 int regno;
40448 unsigned int ccvt = ix86_get_callcvt (type);
40449
40450 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
40451 regno = aggr ? DX_REG : CX_REG;
40452 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
40453 {
40454 regno = CX_REG;
40455 if (aggr)
40456 return gen_rtx_MEM (SImode,
40457 plus_constant (Pmode, stack_pointer_rtx, 4));
40458 }
40459 else
40460 {
40461 regno = AX_REG;
40462 if (aggr)
40463 {
40464 regno = DX_REG;
40465 if (nregs == 1)
40466 return gen_rtx_MEM (SImode,
40467 plus_constant (Pmode,
40468 stack_pointer_rtx, 4));
40469 }
40470 }
40471 return gen_rtx_REG (SImode, regno);
40472 }
40473
40474 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
40475 aggr ? 8 : 4));
40476 }
40477
40478 /* Determine whether x86_output_mi_thunk can succeed. */
40479
40480 static bool
40481 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
40482 const_tree function)
40483 {
40484 /* 64-bit can handle anything. */
40485 if (TARGET_64BIT)
40486 return true;
40487
40488 /* For 32-bit, everything's fine if we have one free register. */
40489 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
40490 return true;
40491
40492 /* Need a free register for vcall_offset. */
40493 if (vcall_offset)
40494 return false;
40495
40496 /* Need a free register for GOT references. */
40497 if (flag_pic && !targetm.binds_local_p (function))
40498 return false;
40499
40500 /* Otherwise ok. */
40501 return true;
40502 }
40503
40504 /* Output the assembler code for a thunk function. THUNK_DECL is the
40505 declaration for the thunk function itself, FUNCTION is the decl for
40506 the target function. DELTA is an immediate constant offset to be
40507 added to THIS. If VCALL_OFFSET is nonzero, the word at
40508 *(*this + vcall_offset) should be added to THIS. */
40509
40510 static void
40511 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
40512 HOST_WIDE_INT vcall_offset, tree function)
40513 {
40514 rtx this_param = x86_this_parameter (function);
40515 rtx this_reg, tmp, fnaddr;
40516 unsigned int tmp_regno;
40517 rtx_insn *insn;
40518
40519 if (TARGET_64BIT)
40520 tmp_regno = R10_REG;
40521 else
40522 {
40523 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
40524 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
40525 tmp_regno = AX_REG;
40526 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
40527 tmp_regno = DX_REG;
40528 else
40529 tmp_regno = CX_REG;
40530 }
40531
40532 emit_note (NOTE_INSN_PROLOGUE_END);
40533
40534 /* CET is enabled, insert EB instruction. */
40535 if ((flag_cf_protection & CF_BRANCH) && TARGET_IBT)
40536 emit_insn (gen_nop_endbr ());
40537
40538 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
40539 pull it in now and let DELTA benefit. */
40540 if (REG_P (this_param))
40541 this_reg = this_param;
40542 else if (vcall_offset)
40543 {
40544 /* Put the this parameter into %eax. */
40545 this_reg = gen_rtx_REG (Pmode, AX_REG);
40546 emit_move_insn (this_reg, this_param);
40547 }
40548 else
40549 this_reg = NULL_RTX;
40550
40551 /* Adjust the this parameter by a fixed constant. */
40552 if (delta)
40553 {
40554 rtx delta_rtx = GEN_INT (delta);
40555 rtx delta_dst = this_reg ? this_reg : this_param;
40556
40557 if (TARGET_64BIT)
40558 {
40559 if (!x86_64_general_operand (delta_rtx, Pmode))
40560 {
40561 tmp = gen_rtx_REG (Pmode, tmp_regno);
40562 emit_move_insn (tmp, delta_rtx);
40563 delta_rtx = tmp;
40564 }
40565 }
40566
40567 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
40568 }
40569
40570 /* Adjust the this parameter by a value stored in the vtable. */
40571 if (vcall_offset)
40572 {
40573 rtx vcall_addr, vcall_mem, this_mem;
40574
40575 tmp = gen_rtx_REG (Pmode, tmp_regno);
40576
40577 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
40578 if (Pmode != ptr_mode)
40579 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
40580 emit_move_insn (tmp, this_mem);
40581
40582 /* Adjust the this parameter. */
40583 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
40584 if (TARGET_64BIT
40585 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
40586 {
40587 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
40588 emit_move_insn (tmp2, GEN_INT (vcall_offset));
40589 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
40590 }
40591
40592 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
40593 if (Pmode != ptr_mode)
40594 emit_insn (gen_addsi_1_zext (this_reg,
40595 gen_rtx_REG (ptr_mode,
40596 REGNO (this_reg)),
40597 vcall_mem));
40598 else
40599 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
40600 }
40601
40602 /* If necessary, drop THIS back to its stack slot. */
40603 if (this_reg && this_reg != this_param)
40604 emit_move_insn (this_param, this_reg);
40605
40606 fnaddr = XEXP (DECL_RTL (function), 0);
40607 if (TARGET_64BIT)
40608 {
40609 if (!flag_pic || targetm.binds_local_p (function)
40610 || TARGET_PECOFF)
40611 ;
40612 else
40613 {
40614 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
40615 tmp = gen_rtx_CONST (Pmode, tmp);
40616 fnaddr = gen_const_mem (Pmode, tmp);
40617 }
40618 }
40619 else
40620 {
40621 if (!flag_pic || targetm.binds_local_p (function))
40622 ;
40623 #if TARGET_MACHO
40624 else if (TARGET_MACHO)
40625 {
40626 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
40627 fnaddr = XEXP (fnaddr, 0);
40628 }
40629 #endif /* TARGET_MACHO */
40630 else
40631 {
40632 tmp = gen_rtx_REG (Pmode, CX_REG);
40633 output_set_got (tmp, NULL_RTX);
40634
40635 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
40636 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
40637 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
40638 fnaddr = gen_const_mem (Pmode, fnaddr);
40639 }
40640 }
40641
40642 /* Our sibling call patterns do not allow memories, because we have no
40643 predicate that can distinguish between frame and non-frame memory.
40644 For our purposes here, we can get away with (ab)using a jump pattern,
40645 because we're going to do no optimization. */
40646 if (MEM_P (fnaddr))
40647 {
40648 if (sibcall_insn_operand (fnaddr, word_mode))
40649 {
40650 fnaddr = XEXP (DECL_RTL (function), 0);
40651 tmp = gen_rtx_MEM (QImode, fnaddr);
40652 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
40653 tmp = emit_call_insn (tmp);
40654 SIBLING_CALL_P (tmp) = 1;
40655 }
40656 else
40657 emit_jump_insn (gen_indirect_jump (fnaddr));
40658 }
40659 else
40660 {
40661 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
40662 {
40663 // CM_LARGE_PIC always uses pseudo PIC register which is
40664 // uninitialized. Since FUNCTION is local and calling it
40665 // doesn't go through PLT, we use scratch register %r11 as
40666 // PIC register and initialize it here.
40667 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
40668 ix86_init_large_pic_reg (tmp_regno);
40669 fnaddr = legitimize_pic_address (fnaddr,
40670 gen_rtx_REG (Pmode, tmp_regno));
40671 }
40672
40673 if (!sibcall_insn_operand (fnaddr, word_mode))
40674 {
40675 tmp = gen_rtx_REG (word_mode, tmp_regno);
40676 if (GET_MODE (fnaddr) != word_mode)
40677 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
40678 emit_move_insn (tmp, fnaddr);
40679 fnaddr = tmp;
40680 }
40681
40682 tmp = gen_rtx_MEM (QImode, fnaddr);
40683 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
40684 tmp = emit_call_insn (tmp);
40685 SIBLING_CALL_P (tmp) = 1;
40686 }
40687 emit_barrier ();
40688
40689 /* Emit just enough of rest_of_compilation to get the insns emitted.
40690 Note that use_thunk calls assemble_start_function et al. */
40691 insn = get_insns ();
40692 shorten_branches (insn);
40693 final_start_function (insn, file, 1);
40694 final (insn, file, 1);
40695 final_end_function ();
40696 }
40697
40698 static void
40699 x86_file_start (void)
40700 {
40701 default_file_start ();
40702 if (TARGET_16BIT)
40703 fputs ("\t.code16gcc\n", asm_out_file);
40704 #if TARGET_MACHO
40705 darwin_file_start ();
40706 #endif
40707 if (X86_FILE_START_VERSION_DIRECTIVE)
40708 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
40709 if (X86_FILE_START_FLTUSED)
40710 fputs ("\t.global\t__fltused\n", asm_out_file);
40711 if (ix86_asm_dialect == ASM_INTEL)
40712 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
40713 }
40714
40715 int
40716 x86_field_alignment (tree type, int computed)
40717 {
40718 machine_mode mode;
40719
40720 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
40721 return computed;
40722 if (TARGET_IAMCU)
40723 return iamcu_alignment (type, computed);
40724 mode = TYPE_MODE (strip_array_types (type));
40725 if (mode == DFmode || mode == DCmode
40726 || GET_MODE_CLASS (mode) == MODE_INT
40727 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
40728 return MIN (32, computed);
40729 return computed;
40730 }
40731
40732 /* Print call to TARGET to FILE. */
40733
40734 static void
40735 x86_print_call_or_nop (FILE *file, const char *target)
40736 {
40737 if (flag_nop_mcount)
40738 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
40739 fprintf (file, "1:" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
40740 else
40741 fprintf (file, "1:\tcall\t%s\n", target);
40742 }
40743
40744 /* Output assembler code to FILE to increment profiler label # LABELNO
40745 for profiling a function entry. */
40746 void
40747 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
40748 {
40749 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
40750 : MCOUNT_NAME);
40751 if (TARGET_64BIT)
40752 {
40753 #ifndef NO_PROFILE_COUNTERS
40754 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
40755 #endif
40756
40757 if (!TARGET_PECOFF && flag_pic)
40758 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
40759 else
40760 x86_print_call_or_nop (file, mcount_name);
40761 }
40762 else if (flag_pic)
40763 {
40764 #ifndef NO_PROFILE_COUNTERS
40765 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
40766 LPREFIX, labelno);
40767 #endif
40768 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
40769 }
40770 else
40771 {
40772 #ifndef NO_PROFILE_COUNTERS
40773 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
40774 LPREFIX, labelno);
40775 #endif
40776 x86_print_call_or_nop (file, mcount_name);
40777 }
40778
40779 if (flag_record_mcount)
40780 {
40781 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
40782 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
40783 fprintf (file, "\t.previous\n");
40784 }
40785 }
40786
40787 /* We don't have exact information about the insn sizes, but we may assume
40788 quite safely that we are informed about all 1 byte insns and memory
40789 address sizes. This is enough to eliminate unnecessary padding in
40790 99% of cases. */
40791
40792 int
40793 ix86_min_insn_size (rtx_insn *insn)
40794 {
40795 int l = 0, len;
40796
40797 if (!INSN_P (insn) || !active_insn_p (insn))
40798 return 0;
40799
40800 /* Discard alignments we've emit and jump instructions. */
40801 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
40802 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
40803 return 0;
40804
40805 /* Important case - calls are always 5 bytes.
40806 It is common to have many calls in the row. */
40807 if (CALL_P (insn)
40808 && symbolic_reference_mentioned_p (PATTERN (insn))
40809 && !SIBLING_CALL_P (insn))
40810 return 5;
40811 len = get_attr_length (insn);
40812 if (len <= 1)
40813 return 1;
40814
40815 /* For normal instructions we rely on get_attr_length being exact,
40816 with a few exceptions. */
40817 if (!JUMP_P (insn))
40818 {
40819 enum attr_type type = get_attr_type (insn);
40820
40821 switch (type)
40822 {
40823 case TYPE_MULTI:
40824 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
40825 || asm_noperands (PATTERN (insn)) >= 0)
40826 return 0;
40827 break;
40828 case TYPE_OTHER:
40829 case TYPE_FCMP:
40830 break;
40831 default:
40832 /* Otherwise trust get_attr_length. */
40833 return len;
40834 }
40835
40836 l = get_attr_length_address (insn);
40837 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
40838 l = 4;
40839 }
40840 if (l)
40841 return 1+l;
40842 else
40843 return 2;
40844 }
40845
40846 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
40847
40848 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
40849 window. */
40850
40851 static void
40852 ix86_avoid_jump_mispredicts (void)
40853 {
40854 rtx_insn *insn, *start = get_insns ();
40855 int nbytes = 0, njumps = 0;
40856 bool isjump = false;
40857
40858 /* Look for all minimal intervals of instructions containing 4 jumps.
40859 The intervals are bounded by START and INSN. NBYTES is the total
40860 size of instructions in the interval including INSN and not including
40861 START. When the NBYTES is smaller than 16 bytes, it is possible
40862 that the end of START and INSN ends up in the same 16byte page.
40863
40864 The smallest offset in the page INSN can start is the case where START
40865 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
40866 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
40867
40868 Don't consider asm goto as jump, while it can contain a jump, it doesn't
40869 have to, control transfer to label(s) can be performed through other
40870 means, and also we estimate minimum length of all asm stmts as 0. */
40871 for (insn = start; insn; insn = NEXT_INSN (insn))
40872 {
40873 int min_size;
40874
40875 if (LABEL_P (insn))
40876 {
40877 int align = label_to_alignment (insn);
40878 int max_skip = label_to_max_skip (insn);
40879
40880 if (max_skip > 15)
40881 max_skip = 15;
40882 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
40883 already in the current 16 byte page, because otherwise
40884 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
40885 bytes to reach 16 byte boundary. */
40886 if (align <= 0
40887 || (align <= 3 && max_skip != (1 << align) - 1))
40888 max_skip = 0;
40889 if (dump_file)
40890 fprintf (dump_file, "Label %i with max_skip %i\n",
40891 INSN_UID (insn), max_skip);
40892 if (max_skip)
40893 {
40894 while (nbytes + max_skip >= 16)
40895 {
40896 start = NEXT_INSN (start);
40897 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
40898 || CALL_P (start))
40899 njumps--, isjump = true;
40900 else
40901 isjump = false;
40902 nbytes -= ix86_min_insn_size (start);
40903 }
40904 }
40905 continue;
40906 }
40907
40908 min_size = ix86_min_insn_size (insn);
40909 nbytes += min_size;
40910 if (dump_file)
40911 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
40912 INSN_UID (insn), min_size);
40913 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
40914 || CALL_P (insn))
40915 njumps++;
40916 else
40917 continue;
40918
40919 while (njumps > 3)
40920 {
40921 start = NEXT_INSN (start);
40922 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
40923 || CALL_P (start))
40924 njumps--, isjump = true;
40925 else
40926 isjump = false;
40927 nbytes -= ix86_min_insn_size (start);
40928 }
40929 gcc_assert (njumps >= 0);
40930 if (dump_file)
40931 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
40932 INSN_UID (start), INSN_UID (insn), nbytes);
40933
40934 if (njumps == 3 && isjump && nbytes < 16)
40935 {
40936 int padsize = 15 - nbytes + ix86_min_insn_size (insn);
40937
40938 if (dump_file)
40939 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
40940 INSN_UID (insn), padsize);
40941 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
40942 }
40943 }
40944 }
40945 #endif
40946
40947 /* AMD Athlon works faster
40948 when RET is not destination of conditional jump or directly preceded
40949 by other jump instruction. We avoid the penalty by inserting NOP just
40950 before the RET instructions in such cases. */
40951 static void
40952 ix86_pad_returns (void)
40953 {
40954 edge e;
40955 edge_iterator ei;
40956
40957 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
40958 {
40959 basic_block bb = e->src;
40960 rtx_insn *ret = BB_END (bb);
40961 rtx_insn *prev;
40962 bool replace = false;
40963
40964 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
40965 || optimize_bb_for_size_p (bb))
40966 continue;
40967 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
40968 if (active_insn_p (prev) || LABEL_P (prev))
40969 break;
40970 if (prev && LABEL_P (prev))
40971 {
40972 edge e;
40973 edge_iterator ei;
40974
40975 FOR_EACH_EDGE (e, ei, bb->preds)
40976 if (EDGE_FREQUENCY (e) && e->src->index >= 0
40977 && !(e->flags & EDGE_FALLTHRU))
40978 {
40979 replace = true;
40980 break;
40981 }
40982 }
40983 if (!replace)
40984 {
40985 prev = prev_active_insn (ret);
40986 if (prev
40987 && ((JUMP_P (prev) && any_condjump_p (prev))
40988 || CALL_P (prev)))
40989 replace = true;
40990 /* Empty functions get branch mispredict even when
40991 the jump destination is not visible to us. */
40992 if (!prev && !optimize_function_for_size_p (cfun))
40993 replace = true;
40994 }
40995 if (replace)
40996 {
40997 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
40998 delete_insn (ret);
40999 }
41000 }
41001 }
41002
41003 /* Count the minimum number of instructions in BB. Return 4 if the
41004 number of instructions >= 4. */
41005
41006 static int
41007 ix86_count_insn_bb (basic_block bb)
41008 {
41009 rtx_insn *insn;
41010 int insn_count = 0;
41011
41012 /* Count number of instructions in this block. Return 4 if the number
41013 of instructions >= 4. */
41014 FOR_BB_INSNS (bb, insn)
41015 {
41016 /* Only happen in exit blocks. */
41017 if (JUMP_P (insn)
41018 && ANY_RETURN_P (PATTERN (insn)))
41019 break;
41020
41021 if (NONDEBUG_INSN_P (insn)
41022 && GET_CODE (PATTERN (insn)) != USE
41023 && GET_CODE (PATTERN (insn)) != CLOBBER)
41024 {
41025 insn_count++;
41026 if (insn_count >= 4)
41027 return insn_count;
41028 }
41029 }
41030
41031 return insn_count;
41032 }
41033
41034
41035 /* Count the minimum number of instructions in code path in BB.
41036 Return 4 if the number of instructions >= 4. */
41037
41038 static int
41039 ix86_count_insn (basic_block bb)
41040 {
41041 edge e;
41042 edge_iterator ei;
41043 int min_prev_count;
41044
41045 /* Only bother counting instructions along paths with no
41046 more than 2 basic blocks between entry and exit. Given
41047 that BB has an edge to exit, determine if a predecessor
41048 of BB has an edge from entry. If so, compute the number
41049 of instructions in the predecessor block. If there
41050 happen to be multiple such blocks, compute the minimum. */
41051 min_prev_count = 4;
41052 FOR_EACH_EDGE (e, ei, bb->preds)
41053 {
41054 edge prev_e;
41055 edge_iterator prev_ei;
41056
41057 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
41058 {
41059 min_prev_count = 0;
41060 break;
41061 }
41062 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
41063 {
41064 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
41065 {
41066 int count = ix86_count_insn_bb (e->src);
41067 if (count < min_prev_count)
41068 min_prev_count = count;
41069 break;
41070 }
41071 }
41072 }
41073
41074 if (min_prev_count < 4)
41075 min_prev_count += ix86_count_insn_bb (bb);
41076
41077 return min_prev_count;
41078 }
41079
41080 /* Pad short function to 4 instructions. */
41081
41082 static void
41083 ix86_pad_short_function (void)
41084 {
41085 edge e;
41086 edge_iterator ei;
41087
41088 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
41089 {
41090 rtx_insn *ret = BB_END (e->src);
41091 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
41092 {
41093 int insn_count = ix86_count_insn (e->src);
41094
41095 /* Pad short function. */
41096 if (insn_count < 4)
41097 {
41098 rtx_insn *insn = ret;
41099
41100 /* Find epilogue. */
41101 while (insn
41102 && (!NOTE_P (insn)
41103 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
41104 insn = PREV_INSN (insn);
41105
41106 if (!insn)
41107 insn = ret;
41108
41109 /* Two NOPs count as one instruction. */
41110 insn_count = 2 * (4 - insn_count);
41111 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
41112 }
41113 }
41114 }
41115 }
41116
41117 /* Fix up a Windows system unwinder issue. If an EH region falls through into
41118 the epilogue, the Windows system unwinder will apply epilogue logic and
41119 produce incorrect offsets. This can be avoided by adding a nop between
41120 the last insn that can throw and the first insn of the epilogue. */
41121
41122 static void
41123 ix86_seh_fixup_eh_fallthru (void)
41124 {
41125 edge e;
41126 edge_iterator ei;
41127
41128 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
41129 {
41130 rtx_insn *insn, *next;
41131
41132 /* Find the beginning of the epilogue. */
41133 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
41134 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
41135 break;
41136 if (insn == NULL)
41137 continue;
41138
41139 /* We only care about preceding insns that can throw. */
41140 insn = prev_active_insn (insn);
41141 if (insn == NULL || !can_throw_internal (insn))
41142 continue;
41143
41144 /* Do not separate calls from their debug information. */
41145 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
41146 if (NOTE_P (next)
41147 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
41148 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
41149 insn = next;
41150 else
41151 break;
41152
41153 emit_insn_after (gen_nops (const1_rtx), insn);
41154 }
41155 }
41156
41157 /* Given a register number BASE, the lowest of a group of registers, update
41158 regsets IN and OUT with the registers that should be avoided in input
41159 and output operands respectively when trying to avoid generating a modr/m
41160 byte for -fmitigate-rop. */
41161
41162 static void
41163 set_rop_modrm_reg_bits (int base, HARD_REG_SET &in, HARD_REG_SET &out)
41164 {
41165 SET_HARD_REG_BIT (out, base);
41166 SET_HARD_REG_BIT (out, base + 1);
41167 SET_HARD_REG_BIT (in, base + 2);
41168 SET_HARD_REG_BIT (in, base + 3);
41169 }
41170
41171 /* Called if -fmitigate_rop is in effect. Try to rewrite instructions so
41172 that certain encodings of modr/m bytes do not occur. */
41173 static void
41174 ix86_mitigate_rop (void)
41175 {
41176 HARD_REG_SET input_risky;
41177 HARD_REG_SET output_risky;
41178 HARD_REG_SET inout_risky;
41179
41180 CLEAR_HARD_REG_SET (output_risky);
41181 CLEAR_HARD_REG_SET (input_risky);
41182 SET_HARD_REG_BIT (output_risky, AX_REG);
41183 SET_HARD_REG_BIT (output_risky, CX_REG);
41184 SET_HARD_REG_BIT (input_risky, BX_REG);
41185 SET_HARD_REG_BIT (input_risky, DX_REG);
41186 set_rop_modrm_reg_bits (FIRST_SSE_REG, input_risky, output_risky);
41187 set_rop_modrm_reg_bits (FIRST_REX_INT_REG, input_risky, output_risky);
41188 set_rop_modrm_reg_bits (FIRST_REX_SSE_REG, input_risky, output_risky);
41189 set_rop_modrm_reg_bits (FIRST_EXT_REX_SSE_REG, input_risky, output_risky);
41190 set_rop_modrm_reg_bits (FIRST_MASK_REG, input_risky, output_risky);
41191 set_rop_modrm_reg_bits (FIRST_BND_REG, input_risky, output_risky);
41192 COPY_HARD_REG_SET (inout_risky, input_risky);
41193 IOR_HARD_REG_SET (inout_risky, output_risky);
41194
41195 df_note_add_problem ();
41196 /* Fix up what stack-regs did. */
41197 df_insn_rescan_all ();
41198 df_analyze ();
41199
41200 regrename_init (true);
41201 regrename_analyze (NULL);
41202
41203 auto_vec<du_head_p> cands;
41204
41205 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
41206 {
41207 if (!NONDEBUG_INSN_P (insn))
41208 continue;
41209
41210 if (GET_CODE (PATTERN (insn)) == USE
41211 || GET_CODE (PATTERN (insn)) == CLOBBER)
41212 continue;
41213
41214 extract_insn (insn);
41215
41216 int opno0, opno1;
41217 int modrm = ix86_get_modrm_for_rop (insn, recog_data.operand,
41218 recog_data.n_operands, &opno0,
41219 &opno1);
41220
41221 if (!ix86_rop_should_change_byte_p (modrm))
41222 continue;
41223
41224 insn_rr_info *info = &insn_rr[INSN_UID (insn)];
41225
41226 /* This happens when regrename has to fail a block. */
41227 if (!info->op_info)
41228 continue;
41229
41230 if (info->op_info[opno0].n_chains != 0)
41231 {
41232 gcc_assert (info->op_info[opno0].n_chains == 1);
41233 du_head_p op0c;
41234 op0c = regrename_chain_from_id (info->op_info[opno0].heads[0]->id);
41235 if (op0c->target_data_1 + op0c->target_data_2 == 0
41236 && !op0c->cannot_rename)
41237 cands.safe_push (op0c);
41238
41239 op0c->target_data_1++;
41240 }
41241 if (info->op_info[opno1].n_chains != 0)
41242 {
41243 gcc_assert (info->op_info[opno1].n_chains == 1);
41244 du_head_p op1c;
41245 op1c = regrename_chain_from_id (info->op_info[opno1].heads[0]->id);
41246 if (op1c->target_data_1 + op1c->target_data_2 == 0
41247 && !op1c->cannot_rename)
41248 cands.safe_push (op1c);
41249
41250 op1c->target_data_2++;
41251 }
41252 }
41253
41254 int i;
41255 du_head_p head;
41256 FOR_EACH_VEC_ELT (cands, i, head)
41257 {
41258 int old_reg, best_reg;
41259 HARD_REG_SET unavailable;
41260
41261 CLEAR_HARD_REG_SET (unavailable);
41262 if (head->target_data_1)
41263 IOR_HARD_REG_SET (unavailable, output_risky);
41264 if (head->target_data_2)
41265 IOR_HARD_REG_SET (unavailable, input_risky);
41266
41267 int n_uses;
41268 reg_class superclass = regrename_find_superclass (head, &n_uses,
41269 &unavailable);
41270 old_reg = head->regno;
41271 best_reg = find_rename_reg (head, superclass, &unavailable,
41272 old_reg, false);
41273 bool ok = regrename_do_replace (head, best_reg);
41274 gcc_assert (ok);
41275 if (dump_file)
41276 fprintf (dump_file, "Chain %d renamed as %s in %s\n", head->id,
41277 reg_names[best_reg], reg_class_names[superclass]);
41278
41279 }
41280
41281 regrename_finish ();
41282
41283 df_analyze ();
41284
41285 basic_block bb;
41286 regset_head live;
41287
41288 INIT_REG_SET (&live);
41289
41290 FOR_EACH_BB_FN (bb, cfun)
41291 {
41292 rtx_insn *insn;
41293
41294 COPY_REG_SET (&live, DF_LR_OUT (bb));
41295 df_simulate_initialize_backwards (bb, &live);
41296
41297 FOR_BB_INSNS_REVERSE (bb, insn)
41298 {
41299 if (!NONDEBUG_INSN_P (insn))
41300 continue;
41301
41302 df_simulate_one_insn_backwards (bb, insn, &live);
41303
41304 if (GET_CODE (PATTERN (insn)) == USE
41305 || GET_CODE (PATTERN (insn)) == CLOBBER)
41306 continue;
41307
41308 extract_insn (insn);
41309 constrain_operands_cached (insn, reload_completed);
41310 int opno0, opno1;
41311 int modrm = ix86_get_modrm_for_rop (insn, recog_data.operand,
41312 recog_data.n_operands, &opno0,
41313 &opno1);
41314 if (modrm < 0
41315 || !ix86_rop_should_change_byte_p (modrm)
41316 || opno0 == opno1)
41317 continue;
41318
41319 rtx oldreg = recog_data.operand[opno1];
41320 preprocess_constraints (insn);
41321 const operand_alternative *alt = which_op_alt ();
41322
41323 int i;
41324 for (i = 0; i < recog_data.n_operands; i++)
41325 if (i != opno1
41326 && alt[i].earlyclobber
41327 && reg_overlap_mentioned_p (recog_data.operand[i],
41328 oldreg))
41329 break;
41330
41331 if (i < recog_data.n_operands)
41332 continue;
41333
41334 if (dump_file)
41335 fprintf (dump_file,
41336 "attempting to fix modrm byte in insn %d:"
41337 " reg %d class %s", INSN_UID (insn), REGNO (oldreg),
41338 reg_class_names[alt[opno1].cl]);
41339
41340 HARD_REG_SET unavailable;
41341 REG_SET_TO_HARD_REG_SET (unavailable, &live);
41342 SET_HARD_REG_BIT (unavailable, REGNO (oldreg));
41343 IOR_COMPL_HARD_REG_SET (unavailable, call_used_reg_set);
41344 IOR_HARD_REG_SET (unavailable, fixed_reg_set);
41345 IOR_HARD_REG_SET (unavailable, output_risky);
41346 IOR_COMPL_HARD_REG_SET (unavailable,
41347 reg_class_contents[alt[opno1].cl]);
41348
41349 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
41350 if (!TEST_HARD_REG_BIT (unavailable, i))
41351 break;
41352 if (i == FIRST_PSEUDO_REGISTER)
41353 {
41354 if (dump_file)
41355 fprintf (dump_file, ", none available\n");
41356 continue;
41357 }
41358 if (dump_file)
41359 fprintf (dump_file, " -> %d\n", i);
41360 rtx newreg = gen_rtx_REG (recog_data.operand_mode[opno1], i);
41361 validate_change (insn, recog_data.operand_loc[opno1], newreg, false);
41362 insn = emit_insn_before (gen_move_insn (newreg, oldreg), insn);
41363 }
41364 }
41365 }
41366
41367 /* Implement machine specific optimizations. We implement padding of returns
41368 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
41369 static void
41370 ix86_reorg (void)
41371 {
41372 /* We are freeing block_for_insn in the toplev to keep compatibility
41373 with old MDEP_REORGS that are not CFG based. Recompute it now. */
41374 compute_bb_for_insn ();
41375
41376 if (flag_mitigate_rop)
41377 ix86_mitigate_rop ();
41378
41379 if (TARGET_SEH && current_function_has_exception_handlers ())
41380 ix86_seh_fixup_eh_fallthru ();
41381
41382 if (optimize && optimize_function_for_speed_p (cfun))
41383 {
41384 if (TARGET_PAD_SHORT_FUNCTION)
41385 ix86_pad_short_function ();
41386 else if (TARGET_PAD_RETURNS)
41387 ix86_pad_returns ();
41388 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
41389 if (TARGET_FOUR_JUMP_LIMIT)
41390 ix86_avoid_jump_mispredicts ();
41391 #endif
41392 }
41393 }
41394
41395 /* Return nonzero when QImode register that must be represented via REX prefix
41396 is used. */
41397 bool
41398 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
41399 {
41400 int i;
41401 extract_insn_cached (insn);
41402 for (i = 0; i < recog_data.n_operands; i++)
41403 if (GENERAL_REG_P (recog_data.operand[i])
41404 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
41405 return true;
41406 return false;
41407 }
41408
41409 /* Return true when INSN mentions register that must be encoded using REX
41410 prefix. */
41411 bool
41412 x86_extended_reg_mentioned_p (rtx insn)
41413 {
41414 subrtx_iterator::array_type array;
41415 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
41416 {
41417 const_rtx x = *iter;
41418 if (REG_P (x)
41419 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
41420 return true;
41421 }
41422 return false;
41423 }
41424
41425 /* If profitable, negate (without causing overflow) integer constant
41426 of mode MODE at location LOC. Return true in this case. */
41427 bool
41428 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
41429 {
41430 HOST_WIDE_INT val;
41431
41432 if (!CONST_INT_P (*loc))
41433 return false;
41434
41435 switch (mode)
41436 {
41437 case E_DImode:
41438 /* DImode x86_64 constants must fit in 32 bits. */
41439 gcc_assert (x86_64_immediate_operand (*loc, mode));
41440
41441 mode = SImode;
41442 break;
41443
41444 case E_SImode:
41445 case E_HImode:
41446 case E_QImode:
41447 break;
41448
41449 default:
41450 gcc_unreachable ();
41451 }
41452
41453 /* Avoid overflows. */
41454 if (mode_signbit_p (mode, *loc))
41455 return false;
41456
41457 val = INTVAL (*loc);
41458
41459 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
41460 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
41461 if ((val < 0 && val != -128)
41462 || val == 128)
41463 {
41464 *loc = GEN_INT (-val);
41465 return true;
41466 }
41467
41468 return false;
41469 }
41470
41471 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
41472 optabs would emit if we didn't have TFmode patterns. */
41473
41474 void
41475 x86_emit_floatuns (rtx operands[2])
41476 {
41477 rtx_code_label *neglab, *donelab;
41478 rtx i0, i1, f0, in, out;
41479 machine_mode mode, inmode;
41480
41481 inmode = GET_MODE (operands[1]);
41482 gcc_assert (inmode == SImode || inmode == DImode);
41483
41484 out = operands[0];
41485 in = force_reg (inmode, operands[1]);
41486 mode = GET_MODE (out);
41487 neglab = gen_label_rtx ();
41488 donelab = gen_label_rtx ();
41489 f0 = gen_reg_rtx (mode);
41490
41491 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
41492
41493 expand_float (out, in, 0);
41494
41495 emit_jump_insn (gen_jump (donelab));
41496 emit_barrier ();
41497
41498 emit_label (neglab);
41499
41500 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
41501 1, OPTAB_DIRECT);
41502 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
41503 1, OPTAB_DIRECT);
41504 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
41505
41506 expand_float (f0, i0, 0);
41507
41508 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
41509
41510 emit_label (donelab);
41511 }
41512 \f
41513 static bool canonicalize_perm (struct expand_vec_perm_d *d);
41514 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
41515 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
41516 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
41517
41518 /* Get a vector mode of the same size as the original but with elements
41519 twice as wide. This is only guaranteed to apply to integral vectors. */
41520
41521 static inline machine_mode
41522 get_mode_wider_vector (machine_mode o)
41523 {
41524 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
41525 machine_mode n = GET_MODE_WIDER_MODE (o).require ();
41526 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
41527 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
41528 return n;
41529 }
41530
41531 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
41532 fill target with val via vec_duplicate. */
41533
41534 static bool
41535 ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
41536 {
41537 bool ok;
41538 rtx_insn *insn;
41539 rtx dup;
41540
41541 /* First attempt to recognize VAL as-is. */
41542 dup = gen_vec_duplicate (mode, val);
41543 insn = emit_insn (gen_rtx_SET (target, dup));
41544 if (recog_memoized (insn) < 0)
41545 {
41546 rtx_insn *seq;
41547 machine_mode innermode = GET_MODE_INNER (mode);
41548 rtx reg;
41549
41550 /* If that fails, force VAL into a register. */
41551
41552 start_sequence ();
41553 reg = force_reg (innermode, val);
41554 if (GET_MODE (reg) != innermode)
41555 reg = gen_lowpart (innermode, reg);
41556 SET_SRC (PATTERN (insn)) = gen_vec_duplicate (mode, reg);
41557 seq = get_insns ();
41558 end_sequence ();
41559 if (seq)
41560 emit_insn_before (seq, insn);
41561
41562 ok = recog_memoized (insn) >= 0;
41563 gcc_assert (ok);
41564 }
41565 return true;
41566 }
41567
41568 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
41569 with all elements equal to VAR. Return true if successful. */
41570
41571 static bool
41572 ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
41573 rtx target, rtx val)
41574 {
41575 bool ok;
41576
41577 switch (mode)
41578 {
41579 case E_V2SImode:
41580 case E_V2SFmode:
41581 if (!mmx_ok)
41582 return false;
41583 /* FALLTHRU */
41584
41585 case E_V4DFmode:
41586 case E_V4DImode:
41587 case E_V8SFmode:
41588 case E_V8SImode:
41589 case E_V2DFmode:
41590 case E_V2DImode:
41591 case E_V4SFmode:
41592 case E_V4SImode:
41593 case E_V16SImode:
41594 case E_V8DImode:
41595 case E_V16SFmode:
41596 case E_V8DFmode:
41597 return ix86_vector_duplicate_value (mode, target, val);
41598
41599 case E_V4HImode:
41600 if (!mmx_ok)
41601 return false;
41602 if (TARGET_SSE || TARGET_3DNOW_A)
41603 {
41604 rtx x;
41605
41606 val = gen_lowpart (SImode, val);
41607 x = gen_rtx_TRUNCATE (HImode, val);
41608 x = gen_rtx_VEC_DUPLICATE (mode, x);
41609 emit_insn (gen_rtx_SET (target, x));
41610 return true;
41611 }
41612 goto widen;
41613
41614 case E_V8QImode:
41615 if (!mmx_ok)
41616 return false;
41617 goto widen;
41618
41619 case E_V8HImode:
41620 if (TARGET_AVX2)
41621 return ix86_vector_duplicate_value (mode, target, val);
41622
41623 if (TARGET_SSE2)
41624 {
41625 struct expand_vec_perm_d dperm;
41626 rtx tmp1, tmp2;
41627
41628 permute:
41629 memset (&dperm, 0, sizeof (dperm));
41630 dperm.target = target;
41631 dperm.vmode = mode;
41632 dperm.nelt = GET_MODE_NUNITS (mode);
41633 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
41634 dperm.one_operand_p = true;
41635
41636 /* Extend to SImode using a paradoxical SUBREG. */
41637 tmp1 = gen_reg_rtx (SImode);
41638 emit_move_insn (tmp1, gen_lowpart (SImode, val));
41639
41640 /* Insert the SImode value as low element of a V4SImode vector. */
41641 tmp2 = gen_reg_rtx (V4SImode);
41642 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
41643 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
41644
41645 ok = (expand_vec_perm_1 (&dperm)
41646 || expand_vec_perm_broadcast_1 (&dperm));
41647 gcc_assert (ok);
41648 return ok;
41649 }
41650 goto widen;
41651
41652 case E_V16QImode:
41653 if (TARGET_AVX2)
41654 return ix86_vector_duplicate_value (mode, target, val);
41655
41656 if (TARGET_SSE2)
41657 goto permute;
41658 goto widen;
41659
41660 widen:
41661 /* Replicate the value once into the next wider mode and recurse. */
41662 {
41663 machine_mode smode, wsmode, wvmode;
41664 rtx x;
41665
41666 smode = GET_MODE_INNER (mode);
41667 wvmode = get_mode_wider_vector (mode);
41668 wsmode = GET_MODE_INNER (wvmode);
41669
41670 val = convert_modes (wsmode, smode, val, true);
41671 x = expand_simple_binop (wsmode, ASHIFT, val,
41672 GEN_INT (GET_MODE_BITSIZE (smode)),
41673 NULL_RTX, 1, OPTAB_LIB_WIDEN);
41674 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
41675
41676 x = gen_reg_rtx (wvmode);
41677 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
41678 gcc_assert (ok);
41679 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
41680 return ok;
41681 }
41682
41683 case E_V16HImode:
41684 case E_V32QImode:
41685 if (TARGET_AVX2)
41686 return ix86_vector_duplicate_value (mode, target, val);
41687 else
41688 {
41689 machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
41690 rtx x = gen_reg_rtx (hvmode);
41691
41692 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
41693 gcc_assert (ok);
41694
41695 x = gen_rtx_VEC_CONCAT (mode, x, x);
41696 emit_insn (gen_rtx_SET (target, x));
41697 }
41698 return true;
41699
41700 case E_V64QImode:
41701 case E_V32HImode:
41702 if (TARGET_AVX512BW)
41703 return ix86_vector_duplicate_value (mode, target, val);
41704 else
41705 {
41706 machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
41707 rtx x = gen_reg_rtx (hvmode);
41708
41709 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
41710 gcc_assert (ok);
41711
41712 x = gen_rtx_VEC_CONCAT (mode, x, x);
41713 emit_insn (gen_rtx_SET (target, x));
41714 }
41715 return true;
41716
41717 default:
41718 return false;
41719 }
41720 }
41721
41722 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
41723 whose ONE_VAR element is VAR, and other elements are zero. Return true
41724 if successful. */
41725
41726 static bool
41727 ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
41728 rtx target, rtx var, int one_var)
41729 {
41730 machine_mode vsimode;
41731 rtx new_target;
41732 rtx x, tmp;
41733 bool use_vector_set = false;
41734
41735 switch (mode)
41736 {
41737 case E_V2DImode:
41738 /* For SSE4.1, we normally use vector set. But if the second
41739 element is zero and inter-unit moves are OK, we use movq
41740 instead. */
41741 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
41742 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
41743 && one_var == 0));
41744 break;
41745 case E_V16QImode:
41746 case E_V4SImode:
41747 case E_V4SFmode:
41748 use_vector_set = TARGET_SSE4_1;
41749 break;
41750 case E_V8HImode:
41751 use_vector_set = TARGET_SSE2;
41752 break;
41753 case E_V4HImode:
41754 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
41755 break;
41756 case E_V32QImode:
41757 case E_V16HImode:
41758 case E_V8SImode:
41759 case E_V8SFmode:
41760 case E_V4DFmode:
41761 use_vector_set = TARGET_AVX;
41762 break;
41763 case E_V4DImode:
41764 /* Use ix86_expand_vector_set in 64bit mode only. */
41765 use_vector_set = TARGET_AVX && TARGET_64BIT;
41766 break;
41767 default:
41768 break;
41769 }
41770
41771 if (use_vector_set)
41772 {
41773 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
41774 var = force_reg (GET_MODE_INNER (mode), var);
41775 ix86_expand_vector_set (mmx_ok, target, var, one_var);
41776 return true;
41777 }
41778
41779 switch (mode)
41780 {
41781 case E_V2SFmode:
41782 case E_V2SImode:
41783 if (!mmx_ok)
41784 return false;
41785 /* FALLTHRU */
41786
41787 case E_V2DFmode:
41788 case E_V2DImode:
41789 if (one_var != 0)
41790 return false;
41791 var = force_reg (GET_MODE_INNER (mode), var);
41792 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
41793 emit_insn (gen_rtx_SET (target, x));
41794 return true;
41795
41796 case E_V4SFmode:
41797 case E_V4SImode:
41798 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
41799 new_target = gen_reg_rtx (mode);
41800 else
41801 new_target = target;
41802 var = force_reg (GET_MODE_INNER (mode), var);
41803 x = gen_rtx_VEC_DUPLICATE (mode, var);
41804 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
41805 emit_insn (gen_rtx_SET (new_target, x));
41806 if (one_var != 0)
41807 {
41808 /* We need to shuffle the value to the correct position, so
41809 create a new pseudo to store the intermediate result. */
41810
41811 /* With SSE2, we can use the integer shuffle insns. */
41812 if (mode != V4SFmode && TARGET_SSE2)
41813 {
41814 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
41815 const1_rtx,
41816 GEN_INT (one_var == 1 ? 0 : 1),
41817 GEN_INT (one_var == 2 ? 0 : 1),
41818 GEN_INT (one_var == 3 ? 0 : 1)));
41819 if (target != new_target)
41820 emit_move_insn (target, new_target);
41821 return true;
41822 }
41823
41824 /* Otherwise convert the intermediate result to V4SFmode and
41825 use the SSE1 shuffle instructions. */
41826 if (mode != V4SFmode)
41827 {
41828 tmp = gen_reg_rtx (V4SFmode);
41829 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
41830 }
41831 else
41832 tmp = new_target;
41833
41834 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
41835 const1_rtx,
41836 GEN_INT (one_var == 1 ? 0 : 1),
41837 GEN_INT (one_var == 2 ? 0+4 : 1+4),
41838 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
41839
41840 if (mode != V4SFmode)
41841 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
41842 else if (tmp != target)
41843 emit_move_insn (target, tmp);
41844 }
41845 else if (target != new_target)
41846 emit_move_insn (target, new_target);
41847 return true;
41848
41849 case E_V8HImode:
41850 case E_V16QImode:
41851 vsimode = V4SImode;
41852 goto widen;
41853 case E_V4HImode:
41854 case E_V8QImode:
41855 if (!mmx_ok)
41856 return false;
41857 vsimode = V2SImode;
41858 goto widen;
41859 widen:
41860 if (one_var != 0)
41861 return false;
41862
41863 /* Zero extend the variable element to SImode and recurse. */
41864 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
41865
41866 x = gen_reg_rtx (vsimode);
41867 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
41868 var, one_var))
41869 gcc_unreachable ();
41870
41871 emit_move_insn (target, gen_lowpart (mode, x));
41872 return true;
41873
41874 default:
41875 return false;
41876 }
41877 }
41878
41879 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
41880 consisting of the values in VALS. It is known that all elements
41881 except ONE_VAR are constants. Return true if successful. */
41882
41883 static bool
41884 ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
41885 rtx target, rtx vals, int one_var)
41886 {
41887 rtx var = XVECEXP (vals, 0, one_var);
41888 machine_mode wmode;
41889 rtx const_vec, x;
41890
41891 const_vec = copy_rtx (vals);
41892 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
41893 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
41894
41895 switch (mode)
41896 {
41897 case E_V2DFmode:
41898 case E_V2DImode:
41899 case E_V2SFmode:
41900 case E_V2SImode:
41901 /* For the two element vectors, it's just as easy to use
41902 the general case. */
41903 return false;
41904
41905 case E_V4DImode:
41906 /* Use ix86_expand_vector_set in 64bit mode only. */
41907 if (!TARGET_64BIT)
41908 return false;
41909 /* FALLTHRU */
41910 case E_V4DFmode:
41911 case E_V8SFmode:
41912 case E_V8SImode:
41913 case E_V16HImode:
41914 case E_V32QImode:
41915 case E_V4SFmode:
41916 case E_V4SImode:
41917 case E_V8HImode:
41918 case E_V4HImode:
41919 break;
41920
41921 case E_V16QImode:
41922 if (TARGET_SSE4_1)
41923 break;
41924 wmode = V8HImode;
41925 goto widen;
41926 case E_V8QImode:
41927 wmode = V4HImode;
41928 goto widen;
41929 widen:
41930 /* There's no way to set one QImode entry easily. Combine
41931 the variable value with its adjacent constant value, and
41932 promote to an HImode set. */
41933 x = XVECEXP (vals, 0, one_var ^ 1);
41934 if (one_var & 1)
41935 {
41936 var = convert_modes (HImode, QImode, var, true);
41937 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
41938 NULL_RTX, 1, OPTAB_LIB_WIDEN);
41939 x = GEN_INT (INTVAL (x) & 0xff);
41940 }
41941 else
41942 {
41943 var = convert_modes (HImode, QImode, var, true);
41944 x = gen_int_mode (INTVAL (x) << 8, HImode);
41945 }
41946 if (x != const0_rtx)
41947 var = expand_simple_binop (HImode, IOR, var, x, var,
41948 1, OPTAB_LIB_WIDEN);
41949
41950 x = gen_reg_rtx (wmode);
41951 emit_move_insn (x, gen_lowpart (wmode, const_vec));
41952 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
41953
41954 emit_move_insn (target, gen_lowpart (mode, x));
41955 return true;
41956
41957 default:
41958 return false;
41959 }
41960
41961 emit_move_insn (target, const_vec);
41962 ix86_expand_vector_set (mmx_ok, target, var, one_var);
41963 return true;
41964 }
41965
41966 /* A subroutine of ix86_expand_vector_init_general. Use vector
41967 concatenate to handle the most general case: all values variable,
41968 and none identical. */
41969
41970 static void
41971 ix86_expand_vector_init_concat (machine_mode mode,
41972 rtx target, rtx *ops, int n)
41973 {
41974 machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
41975 rtx first[16], second[8], third[4];
41976 rtvec v;
41977 int i, j;
41978
41979 switch (n)
41980 {
41981 case 2:
41982 switch (mode)
41983 {
41984 case E_V16SImode:
41985 cmode = V8SImode;
41986 break;
41987 case E_V16SFmode:
41988 cmode = V8SFmode;
41989 break;
41990 case E_V8DImode:
41991 cmode = V4DImode;
41992 break;
41993 case E_V8DFmode:
41994 cmode = V4DFmode;
41995 break;
41996 case E_V8SImode:
41997 cmode = V4SImode;
41998 break;
41999 case E_V8SFmode:
42000 cmode = V4SFmode;
42001 break;
42002 case E_V4DImode:
42003 cmode = V2DImode;
42004 break;
42005 case E_V4DFmode:
42006 cmode = V2DFmode;
42007 break;
42008 case E_V4SImode:
42009 cmode = V2SImode;
42010 break;
42011 case E_V4SFmode:
42012 cmode = V2SFmode;
42013 break;
42014 case E_V2DImode:
42015 cmode = DImode;
42016 break;
42017 case E_V2SImode:
42018 cmode = SImode;
42019 break;
42020 case E_V2DFmode:
42021 cmode = DFmode;
42022 break;
42023 case E_V2SFmode:
42024 cmode = SFmode;
42025 break;
42026 default:
42027 gcc_unreachable ();
42028 }
42029
42030 if (!register_operand (ops[1], cmode))
42031 ops[1] = force_reg (cmode, ops[1]);
42032 if (!register_operand (ops[0], cmode))
42033 ops[0] = force_reg (cmode, ops[0]);
42034 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, ops[0],
42035 ops[1])));
42036 break;
42037
42038 case 4:
42039 switch (mode)
42040 {
42041 case E_V4DImode:
42042 cmode = V2DImode;
42043 break;
42044 case E_V4DFmode:
42045 cmode = V2DFmode;
42046 break;
42047 case E_V4SImode:
42048 cmode = V2SImode;
42049 break;
42050 case E_V4SFmode:
42051 cmode = V2SFmode;
42052 break;
42053 default:
42054 gcc_unreachable ();
42055 }
42056 goto half;
42057
42058 case 8:
42059 switch (mode)
42060 {
42061 case E_V8DImode:
42062 cmode = V2DImode;
42063 hmode = V4DImode;
42064 break;
42065 case E_V8DFmode:
42066 cmode = V2DFmode;
42067 hmode = V4DFmode;
42068 break;
42069 case E_V8SImode:
42070 cmode = V2SImode;
42071 hmode = V4SImode;
42072 break;
42073 case E_V8SFmode:
42074 cmode = V2SFmode;
42075 hmode = V4SFmode;
42076 break;
42077 default:
42078 gcc_unreachable ();
42079 }
42080 goto half;
42081
42082 case 16:
42083 switch (mode)
42084 {
42085 case E_V16SImode:
42086 cmode = V2SImode;
42087 hmode = V4SImode;
42088 gmode = V8SImode;
42089 break;
42090 case E_V16SFmode:
42091 cmode = V2SFmode;
42092 hmode = V4SFmode;
42093 gmode = V8SFmode;
42094 break;
42095 default:
42096 gcc_unreachable ();
42097 }
42098 goto half;
42099
42100 half:
42101 /* FIXME: We process inputs backward to help RA. PR 36222. */
42102 i = n - 1;
42103 j = (n >> 1) - 1;
42104 for (; i > 0; i -= 2, j--)
42105 {
42106 first[j] = gen_reg_rtx (cmode);
42107 v = gen_rtvec (2, ops[i - 1], ops[i]);
42108 ix86_expand_vector_init (false, first[j],
42109 gen_rtx_PARALLEL (cmode, v));
42110 }
42111
42112 n >>= 1;
42113 if (n > 4)
42114 {
42115 gcc_assert (hmode != VOIDmode);
42116 gcc_assert (gmode != VOIDmode);
42117 for (i = j = 0; i < n; i += 2, j++)
42118 {
42119 second[j] = gen_reg_rtx (hmode);
42120 ix86_expand_vector_init_concat (hmode, second [j],
42121 &first [i], 2);
42122 }
42123 n >>= 1;
42124 for (i = j = 0; i < n; i += 2, j++)
42125 {
42126 third[j] = gen_reg_rtx (gmode);
42127 ix86_expand_vector_init_concat (gmode, third[j],
42128 &second[i], 2);
42129 }
42130 n >>= 1;
42131 ix86_expand_vector_init_concat (mode, target, third, n);
42132 }
42133 else if (n > 2)
42134 {
42135 gcc_assert (hmode != VOIDmode);
42136 for (i = j = 0; i < n; i += 2, j++)
42137 {
42138 second[j] = gen_reg_rtx (hmode);
42139 ix86_expand_vector_init_concat (hmode, second [j],
42140 &first [i], 2);
42141 }
42142 n >>= 1;
42143 ix86_expand_vector_init_concat (mode, target, second, n);
42144 }
42145 else
42146 ix86_expand_vector_init_concat (mode, target, first, n);
42147 break;
42148
42149 default:
42150 gcc_unreachable ();
42151 }
42152 }
42153
42154 /* A subroutine of ix86_expand_vector_init_general. Use vector
42155 interleave to handle the most general case: all values variable,
42156 and none identical. */
42157
42158 static void
42159 ix86_expand_vector_init_interleave (machine_mode mode,
42160 rtx target, rtx *ops, int n)
42161 {
42162 machine_mode first_imode, second_imode, third_imode, inner_mode;
42163 int i, j;
42164 rtx op0, op1;
42165 rtx (*gen_load_even) (rtx, rtx, rtx);
42166 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
42167 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
42168
42169 switch (mode)
42170 {
42171 case E_V8HImode:
42172 gen_load_even = gen_vec_setv8hi;
42173 gen_interleave_first_low = gen_vec_interleave_lowv4si;
42174 gen_interleave_second_low = gen_vec_interleave_lowv2di;
42175 inner_mode = HImode;
42176 first_imode = V4SImode;
42177 second_imode = V2DImode;
42178 third_imode = VOIDmode;
42179 break;
42180 case E_V16QImode:
42181 gen_load_even = gen_vec_setv16qi;
42182 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
42183 gen_interleave_second_low = gen_vec_interleave_lowv4si;
42184 inner_mode = QImode;
42185 first_imode = V8HImode;
42186 second_imode = V4SImode;
42187 third_imode = V2DImode;
42188 break;
42189 default:
42190 gcc_unreachable ();
42191 }
42192
42193 for (i = 0; i < n; i++)
42194 {
42195 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
42196 op0 = gen_reg_rtx (SImode);
42197 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
42198
42199 /* Insert the SImode value as low element of V4SImode vector. */
42200 op1 = gen_reg_rtx (V4SImode);
42201 op0 = gen_rtx_VEC_MERGE (V4SImode,
42202 gen_rtx_VEC_DUPLICATE (V4SImode,
42203 op0),
42204 CONST0_RTX (V4SImode),
42205 const1_rtx);
42206 emit_insn (gen_rtx_SET (op1, op0));
42207
42208 /* Cast the V4SImode vector back to a vector in orignal mode. */
42209 op0 = gen_reg_rtx (mode);
42210 emit_move_insn (op0, gen_lowpart (mode, op1));
42211
42212 /* Load even elements into the second position. */
42213 emit_insn (gen_load_even (op0,
42214 force_reg (inner_mode,
42215 ops [i + i + 1]),
42216 const1_rtx));
42217
42218 /* Cast vector to FIRST_IMODE vector. */
42219 ops[i] = gen_reg_rtx (first_imode);
42220 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
42221 }
42222
42223 /* Interleave low FIRST_IMODE vectors. */
42224 for (i = j = 0; i < n; i += 2, j++)
42225 {
42226 op0 = gen_reg_rtx (first_imode);
42227 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
42228
42229 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
42230 ops[j] = gen_reg_rtx (second_imode);
42231 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
42232 }
42233
42234 /* Interleave low SECOND_IMODE vectors. */
42235 switch (second_imode)
42236 {
42237 case E_V4SImode:
42238 for (i = j = 0; i < n / 2; i += 2, j++)
42239 {
42240 op0 = gen_reg_rtx (second_imode);
42241 emit_insn (gen_interleave_second_low (op0, ops[i],
42242 ops[i + 1]));
42243
42244 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
42245 vector. */
42246 ops[j] = gen_reg_rtx (third_imode);
42247 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
42248 }
42249 second_imode = V2DImode;
42250 gen_interleave_second_low = gen_vec_interleave_lowv2di;
42251 /* FALLTHRU */
42252
42253 case E_V2DImode:
42254 op0 = gen_reg_rtx (second_imode);
42255 emit_insn (gen_interleave_second_low (op0, ops[0],
42256 ops[1]));
42257
42258 /* Cast the SECOND_IMODE vector back to a vector on original
42259 mode. */
42260 emit_insn (gen_rtx_SET (target, gen_lowpart (mode, op0)));
42261 break;
42262
42263 default:
42264 gcc_unreachable ();
42265 }
42266 }
42267
42268 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
42269 all values variable, and none identical. */
42270
42271 static void
42272 ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
42273 rtx target, rtx vals)
42274 {
42275 rtx ops[64], op0, op1, op2, op3, op4, op5;
42276 machine_mode half_mode = VOIDmode;
42277 machine_mode quarter_mode = VOIDmode;
42278 int n, i;
42279
42280 switch (mode)
42281 {
42282 case E_V2SFmode:
42283 case E_V2SImode:
42284 if (!mmx_ok && !TARGET_SSE)
42285 break;
42286 /* FALLTHRU */
42287
42288 case E_V16SImode:
42289 case E_V16SFmode:
42290 case E_V8DFmode:
42291 case E_V8DImode:
42292 case E_V8SFmode:
42293 case E_V8SImode:
42294 case E_V4DFmode:
42295 case E_V4DImode:
42296 case E_V4SFmode:
42297 case E_V4SImode:
42298 case E_V2DFmode:
42299 case E_V2DImode:
42300 n = GET_MODE_NUNITS (mode);
42301 for (i = 0; i < n; i++)
42302 ops[i] = XVECEXP (vals, 0, i);
42303 ix86_expand_vector_init_concat (mode, target, ops, n);
42304 return;
42305
42306 case E_V2TImode:
42307 for (i = 0; i < 2; i++)
42308 ops[i] = gen_lowpart (V2DImode, XVECEXP (vals, 0, i));
42309 op0 = gen_reg_rtx (V4DImode);
42310 ix86_expand_vector_init_concat (V4DImode, op0, ops, 2);
42311 emit_move_insn (target, gen_lowpart (GET_MODE (target), op0));
42312 return;
42313
42314 case E_V4TImode:
42315 for (i = 0; i < 4; i++)
42316 ops[i] = gen_lowpart (V2DImode, XVECEXP (vals, 0, i));
42317 ops[4] = gen_reg_rtx (V4DImode);
42318 ix86_expand_vector_init_concat (V4DImode, ops[4], ops, 2);
42319 ops[5] = gen_reg_rtx (V4DImode);
42320 ix86_expand_vector_init_concat (V4DImode, ops[5], ops + 2, 2);
42321 op0 = gen_reg_rtx (V8DImode);
42322 ix86_expand_vector_init_concat (V8DImode, op0, ops + 4, 2);
42323 emit_move_insn (target, gen_lowpart (GET_MODE (target), op0));
42324 return;
42325
42326 case E_V32QImode:
42327 half_mode = V16QImode;
42328 goto half;
42329
42330 case E_V16HImode:
42331 half_mode = V8HImode;
42332 goto half;
42333
42334 half:
42335 n = GET_MODE_NUNITS (mode);
42336 for (i = 0; i < n; i++)
42337 ops[i] = XVECEXP (vals, 0, i);
42338 op0 = gen_reg_rtx (half_mode);
42339 op1 = gen_reg_rtx (half_mode);
42340 ix86_expand_vector_init_interleave (half_mode, op0, ops,
42341 n >> 2);
42342 ix86_expand_vector_init_interleave (half_mode, op1,
42343 &ops [n >> 1], n >> 2);
42344 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op0, op1)));
42345 return;
42346
42347 case E_V64QImode:
42348 quarter_mode = V16QImode;
42349 half_mode = V32QImode;
42350 goto quarter;
42351
42352 case E_V32HImode:
42353 quarter_mode = V8HImode;
42354 half_mode = V16HImode;
42355 goto quarter;
42356
42357 quarter:
42358 n = GET_MODE_NUNITS (mode);
42359 for (i = 0; i < n; i++)
42360 ops[i] = XVECEXP (vals, 0, i);
42361 op0 = gen_reg_rtx (quarter_mode);
42362 op1 = gen_reg_rtx (quarter_mode);
42363 op2 = gen_reg_rtx (quarter_mode);
42364 op3 = gen_reg_rtx (quarter_mode);
42365 op4 = gen_reg_rtx (half_mode);
42366 op5 = gen_reg_rtx (half_mode);
42367 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
42368 n >> 3);
42369 ix86_expand_vector_init_interleave (quarter_mode, op1,
42370 &ops [n >> 2], n >> 3);
42371 ix86_expand_vector_init_interleave (quarter_mode, op2,
42372 &ops [n >> 1], n >> 3);
42373 ix86_expand_vector_init_interleave (quarter_mode, op3,
42374 &ops [(n >> 1) | (n >> 2)], n >> 3);
42375 emit_insn (gen_rtx_SET (op4, gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
42376 emit_insn (gen_rtx_SET (op5, gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
42377 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op4, op5)));
42378 return;
42379
42380 case E_V16QImode:
42381 if (!TARGET_SSE4_1)
42382 break;
42383 /* FALLTHRU */
42384
42385 case E_V8HImode:
42386 if (!TARGET_SSE2)
42387 break;
42388
42389 /* Don't use ix86_expand_vector_init_interleave if we can't
42390 move from GPR to SSE register directly. */
42391 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
42392 break;
42393
42394 n = GET_MODE_NUNITS (mode);
42395 for (i = 0; i < n; i++)
42396 ops[i] = XVECEXP (vals, 0, i);
42397 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
42398 return;
42399
42400 case E_V4HImode:
42401 case E_V8QImode:
42402 break;
42403
42404 default:
42405 gcc_unreachable ();
42406 }
42407
42408 {
42409 int i, j, n_elts, n_words, n_elt_per_word;
42410 machine_mode inner_mode;
42411 rtx words[4], shift;
42412
42413 inner_mode = GET_MODE_INNER (mode);
42414 n_elts = GET_MODE_NUNITS (mode);
42415 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
42416 n_elt_per_word = n_elts / n_words;
42417 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
42418
42419 for (i = 0; i < n_words; ++i)
42420 {
42421 rtx word = NULL_RTX;
42422
42423 for (j = 0; j < n_elt_per_word; ++j)
42424 {
42425 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
42426 elt = convert_modes (word_mode, inner_mode, elt, true);
42427
42428 if (j == 0)
42429 word = elt;
42430 else
42431 {
42432 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
42433 word, 1, OPTAB_LIB_WIDEN);
42434 word = expand_simple_binop (word_mode, IOR, word, elt,
42435 word, 1, OPTAB_LIB_WIDEN);
42436 }
42437 }
42438
42439 words[i] = word;
42440 }
42441
42442 if (n_words == 1)
42443 emit_move_insn (target, gen_lowpart (mode, words[0]));
42444 else if (n_words == 2)
42445 {
42446 rtx tmp = gen_reg_rtx (mode);
42447 emit_clobber (tmp);
42448 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
42449 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
42450 emit_move_insn (target, tmp);
42451 }
42452 else if (n_words == 4)
42453 {
42454 rtx tmp = gen_reg_rtx (V4SImode);
42455 gcc_assert (word_mode == SImode);
42456 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
42457 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
42458 emit_move_insn (target, gen_lowpart (mode, tmp));
42459 }
42460 else
42461 gcc_unreachable ();
42462 }
42463 }
42464
42465 /* Initialize vector TARGET via VALS. Suppress the use of MMX
42466 instructions unless MMX_OK is true. */
42467
42468 void
42469 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
42470 {
42471 machine_mode mode = GET_MODE (target);
42472 machine_mode inner_mode = GET_MODE_INNER (mode);
42473 int n_elts = GET_MODE_NUNITS (mode);
42474 int n_var = 0, one_var = -1;
42475 bool all_same = true, all_const_zero = true;
42476 int i;
42477 rtx x;
42478
42479 /* Handle first initialization from vector elts. */
42480 if (n_elts != XVECLEN (vals, 0))
42481 {
42482 rtx subtarget = target;
42483 x = XVECEXP (vals, 0, 0);
42484 gcc_assert (GET_MODE_INNER (GET_MODE (x)) == inner_mode);
42485 if (GET_MODE_NUNITS (GET_MODE (x)) * 2 == n_elts)
42486 {
42487 rtx ops[2] = { XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1) };
42488 if (inner_mode == QImode || inner_mode == HImode)
42489 {
42490 unsigned int n_bits = n_elts * GET_MODE_SIZE (inner_mode);
42491 mode = mode_for_vector (SImode, n_bits / 4).require ();
42492 inner_mode = mode_for_vector (SImode, n_bits / 8).require ();
42493 ops[0] = gen_lowpart (inner_mode, ops[0]);
42494 ops[1] = gen_lowpart (inner_mode, ops[1]);
42495 subtarget = gen_reg_rtx (mode);
42496 }
42497 ix86_expand_vector_init_concat (mode, subtarget, ops, 2);
42498 if (subtarget != target)
42499 emit_move_insn (target, gen_lowpart (GET_MODE (target), subtarget));
42500 return;
42501 }
42502 gcc_unreachable ();
42503 }
42504
42505 for (i = 0; i < n_elts; ++i)
42506 {
42507 x = XVECEXP (vals, 0, i);
42508 if (!(CONST_SCALAR_INT_P (x)
42509 || CONST_DOUBLE_P (x)
42510 || CONST_FIXED_P (x)))
42511 n_var++, one_var = i;
42512 else if (x != CONST0_RTX (inner_mode))
42513 all_const_zero = false;
42514 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
42515 all_same = false;
42516 }
42517
42518 /* Constants are best loaded from the constant pool. */
42519 if (n_var == 0)
42520 {
42521 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
42522 return;
42523 }
42524
42525 /* If all values are identical, broadcast the value. */
42526 if (all_same
42527 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
42528 XVECEXP (vals, 0, 0)))
42529 return;
42530
42531 /* Values where only one field is non-constant are best loaded from
42532 the pool and overwritten via move later. */
42533 if (n_var == 1)
42534 {
42535 if (all_const_zero
42536 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
42537 XVECEXP (vals, 0, one_var),
42538 one_var))
42539 return;
42540
42541 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
42542 return;
42543 }
42544
42545 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
42546 }
42547
42548 void
42549 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
42550 {
42551 machine_mode mode = GET_MODE (target);
42552 machine_mode inner_mode = GET_MODE_INNER (mode);
42553 machine_mode half_mode;
42554 bool use_vec_merge = false;
42555 rtx tmp;
42556 static rtx (*gen_extract[6][2]) (rtx, rtx)
42557 = {
42558 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
42559 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
42560 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
42561 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
42562 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
42563 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
42564 };
42565 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
42566 = {
42567 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
42568 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
42569 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
42570 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
42571 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
42572 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
42573 };
42574 int i, j, n;
42575 machine_mode mmode = VOIDmode;
42576 rtx (*gen_blendm) (rtx, rtx, rtx, rtx);
42577
42578 switch (mode)
42579 {
42580 case E_V2SFmode:
42581 case E_V2SImode:
42582 if (mmx_ok)
42583 {
42584 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
42585 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
42586 if (elt == 0)
42587 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
42588 else
42589 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
42590 emit_insn (gen_rtx_SET (target, tmp));
42591 return;
42592 }
42593 break;
42594
42595 case E_V2DImode:
42596 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
42597 if (use_vec_merge)
42598 break;
42599
42600 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
42601 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
42602 if (elt == 0)
42603 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
42604 else
42605 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
42606 emit_insn (gen_rtx_SET (target, tmp));
42607 return;
42608
42609 case E_V2DFmode:
42610 {
42611 rtx op0, op1;
42612
42613 /* For the two element vectors, we implement a VEC_CONCAT with
42614 the extraction of the other element. */
42615
42616 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
42617 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
42618
42619 if (elt == 0)
42620 op0 = val, op1 = tmp;
42621 else
42622 op0 = tmp, op1 = val;
42623
42624 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
42625 emit_insn (gen_rtx_SET (target, tmp));
42626 }
42627 return;
42628
42629 case E_V4SFmode:
42630 use_vec_merge = TARGET_SSE4_1;
42631 if (use_vec_merge)
42632 break;
42633
42634 switch (elt)
42635 {
42636 case 0:
42637 use_vec_merge = true;
42638 break;
42639
42640 case 1:
42641 /* tmp = target = A B C D */
42642 tmp = copy_to_reg (target);
42643 /* target = A A B B */
42644 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
42645 /* target = X A B B */
42646 ix86_expand_vector_set (false, target, val, 0);
42647 /* target = A X C D */
42648 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
42649 const1_rtx, const0_rtx,
42650 GEN_INT (2+4), GEN_INT (3+4)));
42651 return;
42652
42653 case 2:
42654 /* tmp = target = A B C D */
42655 tmp = copy_to_reg (target);
42656 /* tmp = X B C D */
42657 ix86_expand_vector_set (false, tmp, val, 0);
42658 /* target = A B X D */
42659 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
42660 const0_rtx, const1_rtx,
42661 GEN_INT (0+4), GEN_INT (3+4)));
42662 return;
42663
42664 case 3:
42665 /* tmp = target = A B C D */
42666 tmp = copy_to_reg (target);
42667 /* tmp = X B C D */
42668 ix86_expand_vector_set (false, tmp, val, 0);
42669 /* target = A B X D */
42670 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
42671 const0_rtx, const1_rtx,
42672 GEN_INT (2+4), GEN_INT (0+4)));
42673 return;
42674
42675 default:
42676 gcc_unreachable ();
42677 }
42678 break;
42679
42680 case E_V4SImode:
42681 use_vec_merge = TARGET_SSE4_1;
42682 if (use_vec_merge)
42683 break;
42684
42685 /* Element 0 handled by vec_merge below. */
42686 if (elt == 0)
42687 {
42688 use_vec_merge = true;
42689 break;
42690 }
42691
42692 if (TARGET_SSE2)
42693 {
42694 /* With SSE2, use integer shuffles to swap element 0 and ELT,
42695 store into element 0, then shuffle them back. */
42696
42697 rtx order[4];
42698
42699 order[0] = GEN_INT (elt);
42700 order[1] = const1_rtx;
42701 order[2] = const2_rtx;
42702 order[3] = GEN_INT (3);
42703 order[elt] = const0_rtx;
42704
42705 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
42706 order[1], order[2], order[3]));
42707
42708 ix86_expand_vector_set (false, target, val, 0);
42709
42710 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
42711 order[1], order[2], order[3]));
42712 }
42713 else
42714 {
42715 /* For SSE1, we have to reuse the V4SF code. */
42716 rtx t = gen_reg_rtx (V4SFmode);
42717 emit_move_insn (t, gen_lowpart (V4SFmode, target));
42718 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
42719 emit_move_insn (target, gen_lowpart (mode, t));
42720 }
42721 return;
42722
42723 case E_V8HImode:
42724 use_vec_merge = TARGET_SSE2;
42725 break;
42726 case E_V4HImode:
42727 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
42728 break;
42729
42730 case E_V16QImode:
42731 use_vec_merge = TARGET_SSE4_1;
42732 break;
42733
42734 case E_V8QImode:
42735 break;
42736
42737 case E_V32QImode:
42738 half_mode = V16QImode;
42739 j = 0;
42740 n = 16;
42741 goto half;
42742
42743 case E_V16HImode:
42744 half_mode = V8HImode;
42745 j = 1;
42746 n = 8;
42747 goto half;
42748
42749 case E_V8SImode:
42750 half_mode = V4SImode;
42751 j = 2;
42752 n = 4;
42753 goto half;
42754
42755 case E_V4DImode:
42756 half_mode = V2DImode;
42757 j = 3;
42758 n = 2;
42759 goto half;
42760
42761 case E_V8SFmode:
42762 half_mode = V4SFmode;
42763 j = 4;
42764 n = 4;
42765 goto half;
42766
42767 case E_V4DFmode:
42768 half_mode = V2DFmode;
42769 j = 5;
42770 n = 2;
42771 goto half;
42772
42773 half:
42774 /* Compute offset. */
42775 i = elt / n;
42776 elt %= n;
42777
42778 gcc_assert (i <= 1);
42779
42780 /* Extract the half. */
42781 tmp = gen_reg_rtx (half_mode);
42782 emit_insn (gen_extract[j][i] (tmp, target));
42783
42784 /* Put val in tmp at elt. */
42785 ix86_expand_vector_set (false, tmp, val, elt);
42786
42787 /* Put it back. */
42788 emit_insn (gen_insert[j][i] (target, target, tmp));
42789 return;
42790
42791 case E_V8DFmode:
42792 if (TARGET_AVX512F)
42793 {
42794 mmode = QImode;
42795 gen_blendm = gen_avx512f_blendmv8df;
42796 }
42797 break;
42798
42799 case E_V8DImode:
42800 if (TARGET_AVX512F)
42801 {
42802 mmode = QImode;
42803 gen_blendm = gen_avx512f_blendmv8di;
42804 }
42805 break;
42806
42807 case E_V16SFmode:
42808 if (TARGET_AVX512F)
42809 {
42810 mmode = HImode;
42811 gen_blendm = gen_avx512f_blendmv16sf;
42812 }
42813 break;
42814
42815 case E_V16SImode:
42816 if (TARGET_AVX512F)
42817 {
42818 mmode = HImode;
42819 gen_blendm = gen_avx512f_blendmv16si;
42820 }
42821 break;
42822
42823 case E_V32HImode:
42824 if (TARGET_AVX512F && TARGET_AVX512BW)
42825 {
42826 mmode = SImode;
42827 gen_blendm = gen_avx512bw_blendmv32hi;
42828 }
42829 break;
42830
42831 case E_V64QImode:
42832 if (TARGET_AVX512F && TARGET_AVX512BW)
42833 {
42834 mmode = DImode;
42835 gen_blendm = gen_avx512bw_blendmv64qi;
42836 }
42837 break;
42838
42839 default:
42840 break;
42841 }
42842
42843 if (mmode != VOIDmode)
42844 {
42845 tmp = gen_reg_rtx (mode);
42846 emit_insn (gen_rtx_SET (tmp, gen_rtx_VEC_DUPLICATE (mode, val)));
42847 /* The avx512*_blendm<mode> expanders have different operand order
42848 from VEC_MERGE. In VEC_MERGE, the first input operand is used for
42849 elements where the mask is set and second input operand otherwise,
42850 in {sse,avx}*_*blend* the first input operand is used for elements
42851 where the mask is clear and second input operand otherwise. */
42852 emit_insn (gen_blendm (target, target, tmp,
42853 force_reg (mmode,
42854 gen_int_mode (1 << elt, mmode))));
42855 }
42856 else if (use_vec_merge)
42857 {
42858 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
42859 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
42860 emit_insn (gen_rtx_SET (target, tmp));
42861 }
42862 else
42863 {
42864 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
42865
42866 emit_move_insn (mem, target);
42867
42868 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
42869 emit_move_insn (tmp, val);
42870
42871 emit_move_insn (target, mem);
42872 }
42873 }
42874
42875 void
42876 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
42877 {
42878 machine_mode mode = GET_MODE (vec);
42879 machine_mode inner_mode = GET_MODE_INNER (mode);
42880 bool use_vec_extr = false;
42881 rtx tmp;
42882
42883 switch (mode)
42884 {
42885 case E_V2SImode:
42886 case E_V2SFmode:
42887 if (!mmx_ok)
42888 break;
42889 /* FALLTHRU */
42890
42891 case E_V2DFmode:
42892 case E_V2DImode:
42893 case E_V2TImode:
42894 case E_V4TImode:
42895 use_vec_extr = true;
42896 break;
42897
42898 case E_V4SFmode:
42899 use_vec_extr = TARGET_SSE4_1;
42900 if (use_vec_extr)
42901 break;
42902
42903 switch (elt)
42904 {
42905 case 0:
42906 tmp = vec;
42907 break;
42908
42909 case 1:
42910 case 3:
42911 tmp = gen_reg_rtx (mode);
42912 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
42913 GEN_INT (elt), GEN_INT (elt),
42914 GEN_INT (elt+4), GEN_INT (elt+4)));
42915 break;
42916
42917 case 2:
42918 tmp = gen_reg_rtx (mode);
42919 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
42920 break;
42921
42922 default:
42923 gcc_unreachable ();
42924 }
42925 vec = tmp;
42926 use_vec_extr = true;
42927 elt = 0;
42928 break;
42929
42930 case E_V4SImode:
42931 use_vec_extr = TARGET_SSE4_1;
42932 if (use_vec_extr)
42933 break;
42934
42935 if (TARGET_SSE2)
42936 {
42937 switch (elt)
42938 {
42939 case 0:
42940 tmp = vec;
42941 break;
42942
42943 case 1:
42944 case 3:
42945 tmp = gen_reg_rtx (mode);
42946 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
42947 GEN_INT (elt), GEN_INT (elt),
42948 GEN_INT (elt), GEN_INT (elt)));
42949 break;
42950
42951 case 2:
42952 tmp = gen_reg_rtx (mode);
42953 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
42954 break;
42955
42956 default:
42957 gcc_unreachable ();
42958 }
42959 vec = tmp;
42960 use_vec_extr = true;
42961 elt = 0;
42962 }
42963 else
42964 {
42965 /* For SSE1, we have to reuse the V4SF code. */
42966 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
42967 gen_lowpart (V4SFmode, vec), elt);
42968 return;
42969 }
42970 break;
42971
42972 case E_V8HImode:
42973 use_vec_extr = TARGET_SSE2;
42974 break;
42975 case E_V4HImode:
42976 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
42977 break;
42978
42979 case E_V16QImode:
42980 use_vec_extr = TARGET_SSE4_1;
42981 break;
42982
42983 case E_V8SFmode:
42984 if (TARGET_AVX)
42985 {
42986 tmp = gen_reg_rtx (V4SFmode);
42987 if (elt < 4)
42988 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
42989 else
42990 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
42991 ix86_expand_vector_extract (false, target, tmp, elt & 3);
42992 return;
42993 }
42994 break;
42995
42996 case E_V4DFmode:
42997 if (TARGET_AVX)
42998 {
42999 tmp = gen_reg_rtx (V2DFmode);
43000 if (elt < 2)
43001 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
43002 else
43003 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
43004 ix86_expand_vector_extract (false, target, tmp, elt & 1);
43005 return;
43006 }
43007 break;
43008
43009 case E_V32QImode:
43010 if (TARGET_AVX)
43011 {
43012 tmp = gen_reg_rtx (V16QImode);
43013 if (elt < 16)
43014 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
43015 else
43016 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
43017 ix86_expand_vector_extract (false, target, tmp, elt & 15);
43018 return;
43019 }
43020 break;
43021
43022 case E_V16HImode:
43023 if (TARGET_AVX)
43024 {
43025 tmp = gen_reg_rtx (V8HImode);
43026 if (elt < 8)
43027 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
43028 else
43029 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
43030 ix86_expand_vector_extract (false, target, tmp, elt & 7);
43031 return;
43032 }
43033 break;
43034
43035 case E_V8SImode:
43036 if (TARGET_AVX)
43037 {
43038 tmp = gen_reg_rtx (V4SImode);
43039 if (elt < 4)
43040 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
43041 else
43042 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
43043 ix86_expand_vector_extract (false, target, tmp, elt & 3);
43044 return;
43045 }
43046 break;
43047
43048 case E_V4DImode:
43049 if (TARGET_AVX)
43050 {
43051 tmp = gen_reg_rtx (V2DImode);
43052 if (elt < 2)
43053 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
43054 else
43055 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
43056 ix86_expand_vector_extract (false, target, tmp, elt & 1);
43057 return;
43058 }
43059 break;
43060
43061 case E_V32HImode:
43062 if (TARGET_AVX512BW)
43063 {
43064 tmp = gen_reg_rtx (V16HImode);
43065 if (elt < 16)
43066 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
43067 else
43068 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
43069 ix86_expand_vector_extract (false, target, tmp, elt & 15);
43070 return;
43071 }
43072 break;
43073
43074 case E_V64QImode:
43075 if (TARGET_AVX512BW)
43076 {
43077 tmp = gen_reg_rtx (V32QImode);
43078 if (elt < 32)
43079 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
43080 else
43081 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
43082 ix86_expand_vector_extract (false, target, tmp, elt & 31);
43083 return;
43084 }
43085 break;
43086
43087 case E_V16SFmode:
43088 tmp = gen_reg_rtx (V8SFmode);
43089 if (elt < 8)
43090 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
43091 else
43092 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
43093 ix86_expand_vector_extract (false, target, tmp, elt & 7);
43094 return;
43095
43096 case E_V8DFmode:
43097 tmp = gen_reg_rtx (V4DFmode);
43098 if (elt < 4)
43099 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
43100 else
43101 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
43102 ix86_expand_vector_extract (false, target, tmp, elt & 3);
43103 return;
43104
43105 case E_V16SImode:
43106 tmp = gen_reg_rtx (V8SImode);
43107 if (elt < 8)
43108 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
43109 else
43110 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
43111 ix86_expand_vector_extract (false, target, tmp, elt & 7);
43112 return;
43113
43114 case E_V8DImode:
43115 tmp = gen_reg_rtx (V4DImode);
43116 if (elt < 4)
43117 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
43118 else
43119 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
43120 ix86_expand_vector_extract (false, target, tmp, elt & 3);
43121 return;
43122
43123 case E_V8QImode:
43124 /* ??? Could extract the appropriate HImode element and shift. */
43125 default:
43126 break;
43127 }
43128
43129 if (use_vec_extr)
43130 {
43131 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
43132 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
43133
43134 /* Let the rtl optimizers know about the zero extension performed. */
43135 if (inner_mode == QImode || inner_mode == HImode)
43136 {
43137 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
43138 target = gen_lowpart (SImode, target);
43139 }
43140
43141 emit_insn (gen_rtx_SET (target, tmp));
43142 }
43143 else
43144 {
43145 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
43146
43147 emit_move_insn (mem, vec);
43148
43149 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
43150 emit_move_insn (target, tmp);
43151 }
43152 }
43153
43154 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
43155 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
43156 The upper bits of DEST are undefined, though they shouldn't cause
43157 exceptions (some bits from src or all zeros are ok). */
43158
43159 static void
43160 emit_reduc_half (rtx dest, rtx src, int i)
43161 {
43162 rtx tem, d = dest;
43163 switch (GET_MODE (src))
43164 {
43165 case E_V4SFmode:
43166 if (i == 128)
43167 tem = gen_sse_movhlps (dest, src, src);
43168 else
43169 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
43170 GEN_INT (1 + 4), GEN_INT (1 + 4));
43171 break;
43172 case E_V2DFmode:
43173 tem = gen_vec_interleave_highv2df (dest, src, src);
43174 break;
43175 case E_V16QImode:
43176 case E_V8HImode:
43177 case E_V4SImode:
43178 case E_V2DImode:
43179 d = gen_reg_rtx (V1TImode);
43180 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
43181 GEN_INT (i / 2));
43182 break;
43183 case E_V8SFmode:
43184 if (i == 256)
43185 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
43186 else
43187 tem = gen_avx_shufps256 (dest, src, src,
43188 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
43189 break;
43190 case E_V4DFmode:
43191 if (i == 256)
43192 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
43193 else
43194 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
43195 break;
43196 case E_V32QImode:
43197 case E_V16HImode:
43198 case E_V8SImode:
43199 case E_V4DImode:
43200 if (i == 256)
43201 {
43202 if (GET_MODE (dest) != V4DImode)
43203 d = gen_reg_rtx (V4DImode);
43204 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
43205 gen_lowpart (V4DImode, src),
43206 const1_rtx);
43207 }
43208 else
43209 {
43210 d = gen_reg_rtx (V2TImode);
43211 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
43212 GEN_INT (i / 2));
43213 }
43214 break;
43215 case E_V64QImode:
43216 case E_V32HImode:
43217 case E_V16SImode:
43218 case E_V16SFmode:
43219 case E_V8DImode:
43220 case E_V8DFmode:
43221 if (i > 128)
43222 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
43223 gen_lowpart (V16SImode, src),
43224 gen_lowpart (V16SImode, src),
43225 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
43226 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
43227 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
43228 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
43229 GEN_INT (0xC), GEN_INT (0xD),
43230 GEN_INT (0xE), GEN_INT (0xF),
43231 GEN_INT (0x10), GEN_INT (0x11),
43232 GEN_INT (0x12), GEN_INT (0x13),
43233 GEN_INT (0x14), GEN_INT (0x15),
43234 GEN_INT (0x16), GEN_INT (0x17));
43235 else
43236 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
43237 gen_lowpart (V16SImode, src),
43238 GEN_INT (i == 128 ? 0x2 : 0x1),
43239 GEN_INT (0x3),
43240 GEN_INT (0x3),
43241 GEN_INT (0x3),
43242 GEN_INT (i == 128 ? 0x6 : 0x5),
43243 GEN_INT (0x7),
43244 GEN_INT (0x7),
43245 GEN_INT (0x7),
43246 GEN_INT (i == 128 ? 0xA : 0x9),
43247 GEN_INT (0xB),
43248 GEN_INT (0xB),
43249 GEN_INT (0xB),
43250 GEN_INT (i == 128 ? 0xE : 0xD),
43251 GEN_INT (0xF),
43252 GEN_INT (0xF),
43253 GEN_INT (0xF));
43254 break;
43255 default:
43256 gcc_unreachable ();
43257 }
43258 emit_insn (tem);
43259 if (d != dest)
43260 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
43261 }
43262
43263 /* Expand a vector reduction. FN is the binary pattern to reduce;
43264 DEST is the destination; IN is the input vector. */
43265
43266 void
43267 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
43268 {
43269 rtx half, dst, vec = in;
43270 machine_mode mode = GET_MODE (in);
43271 int i;
43272
43273 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
43274 if (TARGET_SSE4_1
43275 && mode == V8HImode
43276 && fn == gen_uminv8hi3)
43277 {
43278 emit_insn (gen_sse4_1_phminposuw (dest, in));
43279 return;
43280 }
43281
43282 for (i = GET_MODE_BITSIZE (mode);
43283 i > GET_MODE_UNIT_BITSIZE (mode);
43284 i >>= 1)
43285 {
43286 half = gen_reg_rtx (mode);
43287 emit_reduc_half (half, vec, i);
43288 if (i == GET_MODE_UNIT_BITSIZE (mode) * 2)
43289 dst = dest;
43290 else
43291 dst = gen_reg_rtx (mode);
43292 emit_insn (fn (dst, half, vec));
43293 vec = dst;
43294 }
43295 }
43296 \f
43297 /* Target hook for scalar_mode_supported_p. */
43298 static bool
43299 ix86_scalar_mode_supported_p (scalar_mode mode)
43300 {
43301 if (DECIMAL_FLOAT_MODE_P (mode))
43302 return default_decimal_float_supported_p ();
43303 else if (mode == TFmode)
43304 return true;
43305 else
43306 return default_scalar_mode_supported_p (mode);
43307 }
43308
43309 /* Implements target hook vector_mode_supported_p. */
43310 static bool
43311 ix86_vector_mode_supported_p (machine_mode mode)
43312 {
43313 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
43314 return true;
43315 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
43316 return true;
43317 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
43318 return true;
43319 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
43320 return true;
43321 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
43322 return true;
43323 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
43324 return true;
43325 return false;
43326 }
43327
43328 /* Target hook for c_mode_for_suffix. */
43329 static machine_mode
43330 ix86_c_mode_for_suffix (char suffix)
43331 {
43332 if (suffix == 'q')
43333 return TFmode;
43334 if (suffix == 'w')
43335 return XFmode;
43336
43337 return VOIDmode;
43338 }
43339
43340 /* Worker function for TARGET_MD_ASM_ADJUST.
43341
43342 We implement asm flag outputs, and maintain source compatibility
43343 with the old cc0-based compiler. */
43344
43345 static rtx_insn *
43346 ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &/*inputs*/,
43347 vec<const char *> &constraints,
43348 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
43349 {
43350 clobbers.safe_push (gen_rtx_REG (CCFPmode, FPSR_REG));
43351 SET_HARD_REG_BIT (clobbered_regs, FPSR_REG);
43352
43353 bool saw_asm_flag = false;
43354
43355 start_sequence ();
43356 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
43357 {
43358 const char *con = constraints[i];
43359 if (strncmp (con, "=@cc", 4) != 0)
43360 continue;
43361 con += 4;
43362 if (strchr (con, ',') != NULL)
43363 {
43364 error ("alternatives not allowed in asm flag output");
43365 continue;
43366 }
43367
43368 bool invert = false;
43369 if (con[0] == 'n')
43370 invert = true, con++;
43371
43372 machine_mode mode = CCmode;
43373 rtx_code code = UNKNOWN;
43374
43375 switch (con[0])
43376 {
43377 case 'a':
43378 if (con[1] == 0)
43379 mode = CCAmode, code = EQ;
43380 else if (con[1] == 'e' && con[2] == 0)
43381 mode = CCCmode, code = NE;
43382 break;
43383 case 'b':
43384 if (con[1] == 0)
43385 mode = CCCmode, code = EQ;
43386 else if (con[1] == 'e' && con[2] == 0)
43387 mode = CCAmode, code = NE;
43388 break;
43389 case 'c':
43390 if (con[1] == 0)
43391 mode = CCCmode, code = EQ;
43392 break;
43393 case 'e':
43394 if (con[1] == 0)
43395 mode = CCZmode, code = EQ;
43396 break;
43397 case 'g':
43398 if (con[1] == 0)
43399 mode = CCGCmode, code = GT;
43400 else if (con[1] == 'e' && con[2] == 0)
43401 mode = CCGCmode, code = GE;
43402 break;
43403 case 'l':
43404 if (con[1] == 0)
43405 mode = CCGCmode, code = LT;
43406 else if (con[1] == 'e' && con[2] == 0)
43407 mode = CCGCmode, code = LE;
43408 break;
43409 case 'o':
43410 if (con[1] == 0)
43411 mode = CCOmode, code = EQ;
43412 break;
43413 case 'p':
43414 if (con[1] == 0)
43415 mode = CCPmode, code = EQ;
43416 break;
43417 case 's':
43418 if (con[1] == 0)
43419 mode = CCSmode, code = EQ;
43420 break;
43421 case 'z':
43422 if (con[1] == 0)
43423 mode = CCZmode, code = EQ;
43424 break;
43425 }
43426 if (code == UNKNOWN)
43427 {
43428 error ("unknown asm flag output %qs", constraints[i]);
43429 continue;
43430 }
43431 if (invert)
43432 code = reverse_condition (code);
43433
43434 rtx dest = outputs[i];
43435 if (!saw_asm_flag)
43436 {
43437 /* This is the first asm flag output. Here we put the flags
43438 register in as the real output and adjust the condition to
43439 allow it. */
43440 constraints[i] = "=Bf";
43441 outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
43442 saw_asm_flag = true;
43443 }
43444 else
43445 {
43446 /* We don't need the flags register as output twice. */
43447 constraints[i] = "=X";
43448 outputs[i] = gen_rtx_SCRATCH (SImode);
43449 }
43450
43451 rtx x = gen_rtx_REG (mode, FLAGS_REG);
43452 x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
43453
43454 machine_mode dest_mode = GET_MODE (dest);
43455 if (!SCALAR_INT_MODE_P (dest_mode))
43456 {
43457 error ("invalid type for asm flag output");
43458 continue;
43459 }
43460
43461 if (dest_mode == DImode && !TARGET_64BIT)
43462 dest_mode = SImode;
43463
43464 if (dest_mode != QImode)
43465 {
43466 rtx destqi = gen_reg_rtx (QImode);
43467 emit_insn (gen_rtx_SET (destqi, x));
43468
43469 if (TARGET_ZERO_EXTEND_WITH_AND
43470 && optimize_function_for_speed_p (cfun))
43471 {
43472 x = force_reg (dest_mode, const0_rtx);
43473
43474 emit_insn (gen_movstrictqi
43475 (gen_lowpart (QImode, x), destqi));
43476 }
43477 else
43478 x = gen_rtx_ZERO_EXTEND (dest_mode, destqi);
43479 }
43480
43481 if (dest_mode != GET_MODE (dest))
43482 {
43483 rtx tmp = gen_reg_rtx (SImode);
43484
43485 emit_insn (gen_rtx_SET (tmp, x));
43486 emit_insn (gen_zero_extendsidi2 (dest, tmp));
43487 }
43488 else
43489 emit_insn (gen_rtx_SET (dest, x));
43490 }
43491 rtx_insn *seq = get_insns ();
43492 end_sequence ();
43493
43494 if (saw_asm_flag)
43495 return seq;
43496 else
43497 {
43498 /* If we had no asm flag outputs, clobber the flags. */
43499 clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
43500 SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
43501 return NULL;
43502 }
43503 }
43504
43505 /* Implements target vector targetm.asm.encode_section_info. */
43506
43507 static void ATTRIBUTE_UNUSED
43508 ix86_encode_section_info (tree decl, rtx rtl, int first)
43509 {
43510 default_encode_section_info (decl, rtl, first);
43511
43512 if (ix86_in_large_data_p (decl))
43513 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
43514 }
43515
43516 /* Worker function for REVERSE_CONDITION. */
43517
43518 enum rtx_code
43519 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
43520 {
43521 return (mode == CCFPmode
43522 ? reverse_condition_maybe_unordered (code)
43523 : reverse_condition (code));
43524 }
43525
43526 /* Output code to perform an x87 FP register move, from OPERANDS[1]
43527 to OPERANDS[0]. */
43528
43529 const char *
43530 output_387_reg_move (rtx_insn *insn, rtx *operands)
43531 {
43532 if (REG_P (operands[0]))
43533 {
43534 if (REG_P (operands[1])
43535 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
43536 {
43537 if (REGNO (operands[0]) == FIRST_STACK_REG)
43538 return output_387_ffreep (operands, 0);
43539 return "fstp\t%y0";
43540 }
43541 if (STACK_TOP_P (operands[0]))
43542 return "fld%Z1\t%y1";
43543 return "fst\t%y0";
43544 }
43545 else if (MEM_P (operands[0]))
43546 {
43547 gcc_assert (REG_P (operands[1]));
43548 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
43549 return "fstp%Z0\t%y0";
43550 else
43551 {
43552 /* There is no non-popping store to memory for XFmode.
43553 So if we need one, follow the store with a load. */
43554 if (GET_MODE (operands[0]) == XFmode)
43555 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
43556 else
43557 return "fst%Z0\t%y0";
43558 }
43559 }
43560 else
43561 gcc_unreachable();
43562 }
43563
43564 /* Output code to perform a conditional jump to LABEL, if C2 flag in
43565 FP status register is set. */
43566
43567 void
43568 ix86_emit_fp_unordered_jump (rtx label)
43569 {
43570 rtx reg = gen_reg_rtx (HImode);
43571 rtx temp;
43572
43573 emit_insn (gen_x86_fnstsw_1 (reg));
43574
43575 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
43576 {
43577 emit_insn (gen_x86_sahf_1 (reg));
43578
43579 temp = gen_rtx_REG (CCmode, FLAGS_REG);
43580 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
43581 }
43582 else
43583 {
43584 emit_insn (gen_testqi_ext_1_ccno (reg, GEN_INT (0x04)));
43585
43586 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
43587 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
43588 }
43589
43590 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
43591 gen_rtx_LABEL_REF (VOIDmode, label),
43592 pc_rtx);
43593 temp = gen_rtx_SET (pc_rtx, temp);
43594
43595 emit_jump_insn (temp);
43596 predict_jump (REG_BR_PROB_BASE * 10 / 100);
43597 }
43598
43599 /* Output code to perform a log1p XFmode calculation. */
43600
43601 void ix86_emit_i387_log1p (rtx op0, rtx op1)
43602 {
43603 rtx_code_label *label1 = gen_label_rtx ();
43604 rtx_code_label *label2 = gen_label_rtx ();
43605
43606 rtx tmp = gen_reg_rtx (XFmode);
43607 rtx tmp2 = gen_reg_rtx (XFmode);
43608 rtx test;
43609
43610 emit_insn (gen_absxf2 (tmp, op1));
43611 test = gen_rtx_GE (VOIDmode, tmp,
43612 const_double_from_real_value (
43613 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
43614 XFmode));
43615 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
43616
43617 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
43618 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
43619 emit_jump (label2);
43620
43621 emit_label (label1);
43622 emit_move_insn (tmp, CONST1_RTX (XFmode));
43623 emit_insn (gen_addxf3 (tmp, op1, tmp));
43624 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
43625 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
43626
43627 emit_label (label2);
43628 }
43629
43630 /* Emit code for round calculation. */
43631 void ix86_emit_i387_round (rtx op0, rtx op1)
43632 {
43633 machine_mode inmode = GET_MODE (op1);
43634 machine_mode outmode = GET_MODE (op0);
43635 rtx e1, e2, res, tmp, tmp1, half;
43636 rtx scratch = gen_reg_rtx (HImode);
43637 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
43638 rtx_code_label *jump_label = gen_label_rtx ();
43639 rtx insn;
43640 rtx (*gen_abs) (rtx, rtx);
43641 rtx (*gen_neg) (rtx, rtx);
43642
43643 switch (inmode)
43644 {
43645 case E_SFmode:
43646 gen_abs = gen_abssf2;
43647 break;
43648 case E_DFmode:
43649 gen_abs = gen_absdf2;
43650 break;
43651 case E_XFmode:
43652 gen_abs = gen_absxf2;
43653 break;
43654 default:
43655 gcc_unreachable ();
43656 }
43657
43658 switch (outmode)
43659 {
43660 case E_SFmode:
43661 gen_neg = gen_negsf2;
43662 break;
43663 case E_DFmode:
43664 gen_neg = gen_negdf2;
43665 break;
43666 case E_XFmode:
43667 gen_neg = gen_negxf2;
43668 break;
43669 case E_HImode:
43670 gen_neg = gen_neghi2;
43671 break;
43672 case E_SImode:
43673 gen_neg = gen_negsi2;
43674 break;
43675 case E_DImode:
43676 gen_neg = gen_negdi2;
43677 break;
43678 default:
43679 gcc_unreachable ();
43680 }
43681
43682 e1 = gen_reg_rtx (inmode);
43683 e2 = gen_reg_rtx (inmode);
43684 res = gen_reg_rtx (outmode);
43685
43686 half = const_double_from_real_value (dconsthalf, inmode);
43687
43688 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
43689
43690 /* scratch = fxam(op1) */
43691 emit_insn (gen_rtx_SET (scratch,
43692 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
43693 UNSPEC_FXAM)));
43694 /* e1 = fabs(op1) */
43695 emit_insn (gen_abs (e1, op1));
43696
43697 /* e2 = e1 + 0.5 */
43698 half = force_reg (inmode, half);
43699 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (inmode, e1, half)));
43700
43701 /* res = floor(e2) */
43702 if (inmode != XFmode)
43703 {
43704 tmp1 = gen_reg_rtx (XFmode);
43705
43706 emit_insn (gen_rtx_SET (tmp1, gen_rtx_FLOAT_EXTEND (XFmode, e2)));
43707 }
43708 else
43709 tmp1 = e2;
43710
43711 switch (outmode)
43712 {
43713 case E_SFmode:
43714 case E_DFmode:
43715 {
43716 rtx tmp0 = gen_reg_rtx (XFmode);
43717
43718 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
43719
43720 emit_insn (gen_rtx_SET (res,
43721 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
43722 UNSPEC_TRUNC_NOOP)));
43723 }
43724 break;
43725 case E_XFmode:
43726 emit_insn (gen_frndintxf2_floor (res, tmp1));
43727 break;
43728 case E_HImode:
43729 emit_insn (gen_lfloorxfhi2 (res, tmp1));
43730 break;
43731 case E_SImode:
43732 emit_insn (gen_lfloorxfsi2 (res, tmp1));
43733 break;
43734 case E_DImode:
43735 emit_insn (gen_lfloorxfdi2 (res, tmp1));
43736 break;
43737 default:
43738 gcc_unreachable ();
43739 }
43740
43741 /* flags = signbit(a) */
43742 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x02)));
43743
43744 /* if (flags) then res = -res */
43745 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
43746 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
43747 gen_rtx_LABEL_REF (VOIDmode, jump_label),
43748 pc_rtx);
43749 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
43750 predict_jump (REG_BR_PROB_BASE * 50 / 100);
43751 JUMP_LABEL (insn) = jump_label;
43752
43753 emit_insn (gen_neg (res, res));
43754
43755 emit_label (jump_label);
43756 LABEL_NUSES (jump_label) = 1;
43757
43758 emit_move_insn (op0, res);
43759 }
43760
43761 /* Output code to perform a Newton-Rhapson approximation of a single precision
43762 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
43763
43764 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
43765 {
43766 rtx x0, x1, e0, e1;
43767
43768 x0 = gen_reg_rtx (mode);
43769 e0 = gen_reg_rtx (mode);
43770 e1 = gen_reg_rtx (mode);
43771 x1 = gen_reg_rtx (mode);
43772
43773 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
43774
43775 b = force_reg (mode, b);
43776
43777 /* x0 = rcp(b) estimate */
43778 if (mode == V16SFmode || mode == V8DFmode)
43779 {
43780 if (TARGET_AVX512ER)
43781 {
43782 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
43783 UNSPEC_RCP28)));
43784 /* res = a * x0 */
43785 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x0)));
43786 return;
43787 }
43788 else
43789 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
43790 UNSPEC_RCP14)));
43791 }
43792 else
43793 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
43794 UNSPEC_RCP)));
43795
43796 /* e0 = x0 * b */
43797 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b)));
43798
43799 /* e0 = x0 * e0 */
43800 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0)));
43801
43802 /* e1 = x0 + x0 */
43803 emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0)));
43804
43805 /* x1 = e1 - e0 */
43806 emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0)));
43807
43808 /* res = a * x1 */
43809 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1)));
43810 }
43811
43812 /* Output code to perform a Newton-Rhapson approximation of a
43813 single precision floating point [reciprocal] square root. */
43814
43815 void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode, bool recip)
43816 {
43817 rtx x0, e0, e1, e2, e3, mthree, mhalf;
43818 REAL_VALUE_TYPE r;
43819 int unspec;
43820
43821 x0 = gen_reg_rtx (mode);
43822 e0 = gen_reg_rtx (mode);
43823 e1 = gen_reg_rtx (mode);
43824 e2 = gen_reg_rtx (mode);
43825 e3 = gen_reg_rtx (mode);
43826
43827 if (TARGET_AVX512ER && mode == V16SFmode)
43828 {
43829 if (recip)
43830 /* res = rsqrt28(a) estimate */
43831 emit_insn (gen_rtx_SET (res, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
43832 UNSPEC_RSQRT28)));
43833 else
43834 {
43835 /* x0 = rsqrt28(a) estimate */
43836 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
43837 UNSPEC_RSQRT28)));
43838 /* res = rcp28(x0) estimate */
43839 emit_insn (gen_rtx_SET (res, gen_rtx_UNSPEC (mode, gen_rtvec (1, x0),
43840 UNSPEC_RCP28)));
43841 }
43842 return;
43843 }
43844
43845 real_from_integer (&r, VOIDmode, -3, SIGNED);
43846 mthree = const_double_from_real_value (r, SFmode);
43847
43848 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
43849 mhalf = const_double_from_real_value (r, SFmode);
43850 unspec = UNSPEC_RSQRT;
43851
43852 if (VECTOR_MODE_P (mode))
43853 {
43854 mthree = ix86_build_const_vector (mode, true, mthree);
43855 mhalf = ix86_build_const_vector (mode, true, mhalf);
43856 /* There is no 512-bit rsqrt. There is however rsqrt14. */
43857 if (GET_MODE_SIZE (mode) == 64)
43858 unspec = UNSPEC_RSQRT14;
43859 }
43860
43861 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
43862 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
43863
43864 a = force_reg (mode, a);
43865
43866 /* x0 = rsqrt(a) estimate */
43867 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
43868 unspec)));
43869
43870 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
43871 if (!recip)
43872 {
43873 rtx zero = force_reg (mode, CONST0_RTX(mode));
43874 rtx mask;
43875
43876 /* Handle masked compare. */
43877 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
43878 {
43879 mask = gen_reg_rtx (HImode);
43880 /* Imm value 0x4 corresponds to not-equal comparison. */
43881 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
43882 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
43883 }
43884 else
43885 {
43886 mask = gen_reg_rtx (mode);
43887 emit_insn (gen_rtx_SET (mask, gen_rtx_NE (mode, zero, a)));
43888 emit_insn (gen_rtx_SET (x0, gen_rtx_AND (mode, x0, mask)));
43889 }
43890 }
43891
43892 /* e0 = x0 * a */
43893 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a)));
43894 /* e1 = e0 * x0 */
43895 emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, e0, x0)));
43896
43897 /* e2 = e1 - 3. */
43898 mthree = force_reg (mode, mthree);
43899 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (mode, e1, mthree)));
43900
43901 mhalf = force_reg (mode, mhalf);
43902 if (recip)
43903 /* e3 = -.5 * x0 */
43904 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, x0, mhalf)));
43905 else
43906 /* e3 = -.5 * e0 */
43907 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, e0, mhalf)));
43908 /* ret = e2 * e3 */
43909 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e2, e3)));
43910 }
43911
43912 #ifdef TARGET_SOLARIS
43913 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
43914
43915 static void
43916 i386_solaris_elf_named_section (const char *name, unsigned int flags,
43917 tree decl)
43918 {
43919 /* With Binutils 2.15, the "@unwind" marker must be specified on
43920 every occurrence of the ".eh_frame" section, not just the first
43921 one. */
43922 if (TARGET_64BIT
43923 && strcmp (name, ".eh_frame") == 0)
43924 {
43925 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
43926 flags & SECTION_WRITE ? "aw" : "a");
43927 return;
43928 }
43929
43930 #ifndef USE_GAS
43931 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
43932 {
43933 solaris_elf_asm_comdat_section (name, flags, decl);
43934 return;
43935 }
43936 #endif
43937
43938 default_elf_asm_named_section (name, flags, decl);
43939 }
43940 #endif /* TARGET_SOLARIS */
43941
43942 /* Return the mangling of TYPE if it is an extended fundamental type. */
43943
43944 static const char *
43945 ix86_mangle_type (const_tree type)
43946 {
43947 type = TYPE_MAIN_VARIANT (type);
43948
43949 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
43950 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
43951 return NULL;
43952
43953 switch (TYPE_MODE (type))
43954 {
43955 case E_TFmode:
43956 /* __float128 is "g". */
43957 return "g";
43958 case E_XFmode:
43959 /* "long double" or __float80 is "e". */
43960 return "e";
43961 default:
43962 return NULL;
43963 }
43964 }
43965
43966 static GTY(()) tree ix86_tls_stack_chk_guard_decl;
43967
43968 static tree
43969 ix86_stack_protect_guard (void)
43970 {
43971 if (TARGET_SSP_TLS_GUARD)
43972 {
43973 tree type_node = lang_hooks.types.type_for_mode (ptr_mode, 1);
43974 int qual = ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg);
43975 tree type = build_qualified_type (type_node, qual);
43976 tree t;
43977
43978 if (global_options_set.x_ix86_stack_protector_guard_symbol_str)
43979 {
43980 t = ix86_tls_stack_chk_guard_decl;
43981
43982 if (t == NULL)
43983 {
43984 rtx x;
43985
43986 t = build_decl
43987 (UNKNOWN_LOCATION, VAR_DECL,
43988 get_identifier (ix86_stack_protector_guard_symbol_str),
43989 type);
43990 TREE_STATIC (t) = 1;
43991 TREE_PUBLIC (t) = 1;
43992 DECL_EXTERNAL (t) = 1;
43993 TREE_USED (t) = 1;
43994 TREE_THIS_VOLATILE (t) = 1;
43995 DECL_ARTIFICIAL (t) = 1;
43996 DECL_IGNORED_P (t) = 1;
43997
43998 /* Do not share RTL as the declaration is visible outside of
43999 current function. */
44000 x = DECL_RTL (t);
44001 RTX_FLAG (x, used) = 1;
44002
44003 ix86_tls_stack_chk_guard_decl = t;
44004 }
44005 }
44006 else
44007 {
44008 tree asptrtype = build_pointer_type (type);
44009
44010 t = build_int_cst (asptrtype, ix86_stack_protector_guard_offset);
44011 t = build2 (MEM_REF, asptrtype, t,
44012 build_int_cst (asptrtype, 0));
44013 }
44014
44015 return t;
44016 }
44017
44018 return default_stack_protect_guard ();
44019 }
44020
44021 /* For 32-bit code we can save PIC register setup by using
44022 __stack_chk_fail_local hidden function instead of calling
44023 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
44024 register, so it is better to call __stack_chk_fail directly. */
44025
44026 static tree ATTRIBUTE_UNUSED
44027 ix86_stack_protect_fail (void)
44028 {
44029 return TARGET_64BIT
44030 ? default_external_stack_protect_fail ()
44031 : default_hidden_stack_protect_fail ();
44032 }
44033
44034 /* Select a format to encode pointers in exception handling data. CODE
44035 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
44036 true if the symbol may be affected by dynamic relocations.
44037
44038 ??? All x86 object file formats are capable of representing this.
44039 After all, the relocation needed is the same as for the call insn.
44040 Whether or not a particular assembler allows us to enter such, I
44041 guess we'll have to see. */
44042 int
44043 asm_preferred_eh_data_format (int code, int global)
44044 {
44045 if (flag_pic)
44046 {
44047 int type = DW_EH_PE_sdata8;
44048 if (!TARGET_64BIT
44049 || ix86_cmodel == CM_SMALL_PIC
44050 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
44051 type = DW_EH_PE_sdata4;
44052 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
44053 }
44054 if (ix86_cmodel == CM_SMALL
44055 || (ix86_cmodel == CM_MEDIUM && code))
44056 return DW_EH_PE_udata4;
44057 return DW_EH_PE_absptr;
44058 }
44059 \f
44060 /* Expand copysign from SIGN to the positive value ABS_VALUE
44061 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
44062 the sign-bit. */
44063 static void
44064 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
44065 {
44066 machine_mode mode = GET_MODE (sign);
44067 rtx sgn = gen_reg_rtx (mode);
44068 if (mask == NULL_RTX)
44069 {
44070 machine_mode vmode;
44071
44072 if (mode == SFmode)
44073 vmode = V4SFmode;
44074 else if (mode == DFmode)
44075 vmode = V2DFmode;
44076 else
44077 vmode = mode;
44078
44079 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
44080 if (!VECTOR_MODE_P (mode))
44081 {
44082 /* We need to generate a scalar mode mask in this case. */
44083 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
44084 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
44085 mask = gen_reg_rtx (mode);
44086 emit_insn (gen_rtx_SET (mask, tmp));
44087 }
44088 }
44089 else
44090 mask = gen_rtx_NOT (mode, mask);
44091 emit_insn (gen_rtx_SET (sgn, gen_rtx_AND (mode, mask, sign)));
44092 emit_insn (gen_rtx_SET (result, gen_rtx_IOR (mode, abs_value, sgn)));
44093 }
44094
44095 /* Expand fabs (OP0) and return a new rtx that holds the result. The
44096 mask for masking out the sign-bit is stored in *SMASK, if that is
44097 non-null. */
44098 static rtx
44099 ix86_expand_sse_fabs (rtx op0, rtx *smask)
44100 {
44101 machine_mode vmode, mode = GET_MODE (op0);
44102 rtx xa, mask;
44103
44104 xa = gen_reg_rtx (mode);
44105 if (mode == SFmode)
44106 vmode = V4SFmode;
44107 else if (mode == DFmode)
44108 vmode = V2DFmode;
44109 else
44110 vmode = mode;
44111 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
44112 if (!VECTOR_MODE_P (mode))
44113 {
44114 /* We need to generate a scalar mode mask in this case. */
44115 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
44116 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
44117 mask = gen_reg_rtx (mode);
44118 emit_insn (gen_rtx_SET (mask, tmp));
44119 }
44120 emit_insn (gen_rtx_SET (xa, gen_rtx_AND (mode, op0, mask)));
44121
44122 if (smask)
44123 *smask = mask;
44124
44125 return xa;
44126 }
44127
44128 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
44129 swapping the operands if SWAP_OPERANDS is true. The expanded
44130 code is a forward jump to a newly created label in case the
44131 comparison is true. The generated label rtx is returned. */
44132 static rtx_code_label *
44133 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
44134 bool swap_operands)
44135 {
44136 bool unordered_compare = ix86_unordered_fp_compare (code);
44137 rtx_code_label *label;
44138 rtx tmp, reg;
44139
44140 if (swap_operands)
44141 std::swap (op0, op1);
44142
44143 label = gen_label_rtx ();
44144 tmp = gen_rtx_COMPARE (CCFPmode, op0, op1);
44145 if (unordered_compare)
44146 tmp = gen_rtx_UNSPEC (CCFPmode, gen_rtvec (1, tmp), UNSPEC_NOTRAP);
44147 reg = gen_rtx_REG (CCFPmode, FLAGS_REG);
44148 emit_insn (gen_rtx_SET (reg, tmp));
44149 tmp = gen_rtx_fmt_ee (code, VOIDmode, reg, const0_rtx);
44150 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
44151 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
44152 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
44153 JUMP_LABEL (tmp) = label;
44154
44155 return label;
44156 }
44157
44158 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
44159 using comparison code CODE. Operands are swapped for the comparison if
44160 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
44161 static rtx
44162 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
44163 bool swap_operands)
44164 {
44165 rtx (*insn)(rtx, rtx, rtx, rtx);
44166 machine_mode mode = GET_MODE (op0);
44167 rtx mask = gen_reg_rtx (mode);
44168
44169 if (swap_operands)
44170 std::swap (op0, op1);
44171
44172 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
44173
44174 emit_insn (insn (mask, op0, op1,
44175 gen_rtx_fmt_ee (code, mode, op0, op1)));
44176 return mask;
44177 }
44178
44179 /* Generate and return a rtx of mode MODE for 2**n where n is the number
44180 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
44181 static rtx
44182 ix86_gen_TWO52 (machine_mode mode)
44183 {
44184 REAL_VALUE_TYPE TWO52r;
44185 rtx TWO52;
44186
44187 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
44188 TWO52 = const_double_from_real_value (TWO52r, mode);
44189 TWO52 = force_reg (mode, TWO52);
44190
44191 return TWO52;
44192 }
44193
44194 /* Expand SSE sequence for computing lround from OP1 storing
44195 into OP0. */
44196 void
44197 ix86_expand_lround (rtx op0, rtx op1)
44198 {
44199 /* C code for the stuff we're doing below:
44200 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
44201 return (long)tmp;
44202 */
44203 machine_mode mode = GET_MODE (op1);
44204 const struct real_format *fmt;
44205 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
44206 rtx adj;
44207
44208 /* load nextafter (0.5, 0.0) */
44209 fmt = REAL_MODE_FORMAT (mode);
44210 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
44211 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
44212
44213 /* adj = copysign (0.5, op1) */
44214 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
44215 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
44216
44217 /* adj = op1 + adj */
44218 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
44219
44220 /* op0 = (imode)adj */
44221 expand_fix (op0, adj, 0);
44222 }
44223
44224 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
44225 into OPERAND0. */
44226 void
44227 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
44228 {
44229 /* C code for the stuff we're doing below (for do_floor):
44230 xi = (long)op1;
44231 xi -= (double)xi > op1 ? 1 : 0;
44232 return xi;
44233 */
44234 machine_mode fmode = GET_MODE (op1);
44235 machine_mode imode = GET_MODE (op0);
44236 rtx ireg, freg, tmp;
44237 rtx_code_label *label;
44238
44239 /* reg = (long)op1 */
44240 ireg = gen_reg_rtx (imode);
44241 expand_fix (ireg, op1, 0);
44242
44243 /* freg = (double)reg */
44244 freg = gen_reg_rtx (fmode);
44245 expand_float (freg, ireg, 0);
44246
44247 /* ireg = (freg > op1) ? ireg - 1 : ireg */
44248 label = ix86_expand_sse_compare_and_jump (UNLE,
44249 freg, op1, !do_floor);
44250 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
44251 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
44252 emit_move_insn (ireg, tmp);
44253
44254 emit_label (label);
44255 LABEL_NUSES (label) = 1;
44256
44257 emit_move_insn (op0, ireg);
44258 }
44259
44260 /* Expand rint rounding OPERAND1 and storing the result in OPERAND0. */
44261 void
44262 ix86_expand_rint (rtx operand0, rtx operand1)
44263 {
44264 /* C code for the stuff we're doing below:
44265 xa = fabs (operand1);
44266 if (!isless (xa, 2**52))
44267 return operand1;
44268 two52 = 2**52;
44269 if (flag_rounding_math)
44270 {
44271 two52 = copysign (two52, operand1);
44272 xa = operand1;
44273 }
44274 xa = xa + two52 - two52;
44275 return copysign (xa, operand1);
44276 */
44277 machine_mode mode = GET_MODE (operand0);
44278 rtx res, xa, TWO52, two52, mask;
44279 rtx_code_label *label;
44280
44281 res = gen_reg_rtx (mode);
44282 emit_move_insn (res, operand1);
44283
44284 /* xa = abs (operand1) */
44285 xa = ix86_expand_sse_fabs (res, &mask);
44286
44287 /* if (!isless (xa, TWO52)) goto label; */
44288 TWO52 = ix86_gen_TWO52 (mode);
44289 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
44290
44291 two52 = TWO52;
44292 if (flag_rounding_math)
44293 {
44294 two52 = gen_reg_rtx (mode);
44295 ix86_sse_copysign_to_positive (two52, TWO52, res, mask);
44296 xa = res;
44297 }
44298
44299 xa = expand_simple_binop (mode, PLUS, xa, two52, NULL_RTX, 0, OPTAB_DIRECT);
44300 xa = expand_simple_binop (mode, MINUS, xa, two52, xa, 0, OPTAB_DIRECT);
44301
44302 ix86_sse_copysign_to_positive (res, xa, res, mask);
44303
44304 emit_label (label);
44305 LABEL_NUSES (label) = 1;
44306
44307 emit_move_insn (operand0, res);
44308 }
44309
44310 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
44311 into OPERAND0. */
44312 void
44313 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
44314 {
44315 /* C code for the stuff we expand below.
44316 double xa = fabs (x), x2;
44317 if (!isless (xa, TWO52))
44318 return x;
44319 xa = xa + TWO52 - TWO52;
44320 x2 = copysign (xa, x);
44321 Compensate. Floor:
44322 if (x2 > x)
44323 x2 -= 1;
44324 Compensate. Ceil:
44325 if (x2 < x)
44326 x2 -= -1;
44327 return x2;
44328 */
44329 machine_mode mode = GET_MODE (operand0);
44330 rtx xa, TWO52, tmp, one, res, mask;
44331 rtx_code_label *label;
44332
44333 TWO52 = ix86_gen_TWO52 (mode);
44334
44335 /* Temporary for holding the result, initialized to the input
44336 operand to ease control flow. */
44337 res = gen_reg_rtx (mode);
44338 emit_move_insn (res, operand1);
44339
44340 /* xa = abs (operand1) */
44341 xa = ix86_expand_sse_fabs (res, &mask);
44342
44343 /* if (!isless (xa, TWO52)) goto label; */
44344 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
44345
44346 /* xa = xa + TWO52 - TWO52; */
44347 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
44348 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
44349
44350 /* xa = copysign (xa, operand1) */
44351 ix86_sse_copysign_to_positive (xa, xa, res, mask);
44352
44353 /* generate 1.0 or -1.0 */
44354 one = force_reg (mode,
44355 const_double_from_real_value (do_floor
44356 ? dconst1 : dconstm1, mode));
44357
44358 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
44359 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
44360 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
44361 /* We always need to subtract here to preserve signed zero. */
44362 tmp = expand_simple_binop (mode, MINUS,
44363 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
44364 emit_move_insn (res, tmp);
44365
44366 emit_label (label);
44367 LABEL_NUSES (label) = 1;
44368
44369 emit_move_insn (operand0, res);
44370 }
44371
44372 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
44373 into OPERAND0. */
44374 void
44375 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
44376 {
44377 /* C code for the stuff we expand below.
44378 double xa = fabs (x), x2;
44379 if (!isless (xa, TWO52))
44380 return x;
44381 x2 = (double)(long)x;
44382 Compensate. Floor:
44383 if (x2 > x)
44384 x2 -= 1;
44385 Compensate. Ceil:
44386 if (x2 < x)
44387 x2 += 1;
44388 if (HONOR_SIGNED_ZEROS (mode))
44389 return copysign (x2, x);
44390 return x2;
44391 */
44392 machine_mode mode = GET_MODE (operand0);
44393 rtx xa, xi, TWO52, tmp, one, res, mask;
44394 rtx_code_label *label;
44395
44396 TWO52 = ix86_gen_TWO52 (mode);
44397
44398 /* Temporary for holding the result, initialized to the input
44399 operand to ease control flow. */
44400 res = gen_reg_rtx (mode);
44401 emit_move_insn (res, operand1);
44402
44403 /* xa = abs (operand1) */
44404 xa = ix86_expand_sse_fabs (res, &mask);
44405
44406 /* if (!isless (xa, TWO52)) goto label; */
44407 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
44408
44409 /* xa = (double)(long)x */
44410 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
44411 expand_fix (xi, res, 0);
44412 expand_float (xa, xi, 0);
44413
44414 /* generate 1.0 */
44415 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
44416
44417 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
44418 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
44419 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
44420 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
44421 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
44422 emit_move_insn (res, tmp);
44423
44424 if (HONOR_SIGNED_ZEROS (mode))
44425 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
44426
44427 emit_label (label);
44428 LABEL_NUSES (label) = 1;
44429
44430 emit_move_insn (operand0, res);
44431 }
44432
44433 /* Expand SSE sequence for computing round from OPERAND1 storing
44434 into OPERAND0. Sequence that works without relying on DImode truncation
44435 via cvttsd2siq that is only available on 64bit targets. */
44436 void
44437 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
44438 {
44439 /* C code for the stuff we expand below.
44440 double xa = fabs (x), xa2, x2;
44441 if (!isless (xa, TWO52))
44442 return x;
44443 Using the absolute value and copying back sign makes
44444 -0.0 -> -0.0 correct.
44445 xa2 = xa + TWO52 - TWO52;
44446 Compensate.
44447 dxa = xa2 - xa;
44448 if (dxa <= -0.5)
44449 xa2 += 1;
44450 else if (dxa > 0.5)
44451 xa2 -= 1;
44452 x2 = copysign (xa2, x);
44453 return x2;
44454 */
44455 machine_mode mode = GET_MODE (operand0);
44456 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
44457 rtx_code_label *label;
44458
44459 TWO52 = ix86_gen_TWO52 (mode);
44460
44461 /* Temporary for holding the result, initialized to the input
44462 operand to ease control flow. */
44463 res = gen_reg_rtx (mode);
44464 emit_move_insn (res, operand1);
44465
44466 /* xa = abs (operand1) */
44467 xa = ix86_expand_sse_fabs (res, &mask);
44468
44469 /* if (!isless (xa, TWO52)) goto label; */
44470 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
44471
44472 /* xa2 = xa + TWO52 - TWO52; */
44473 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
44474 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
44475
44476 /* dxa = xa2 - xa; */
44477 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
44478
44479 /* generate 0.5, 1.0 and -0.5 */
44480 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
44481 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
44482 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
44483 0, OPTAB_DIRECT);
44484
44485 /* Compensate. */
44486 tmp = gen_reg_rtx (mode);
44487 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
44488 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
44489 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
44490 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
44491 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
44492 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
44493 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
44494 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
44495
44496 /* res = copysign (xa2, operand1) */
44497 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
44498
44499 emit_label (label);
44500 LABEL_NUSES (label) = 1;
44501
44502 emit_move_insn (operand0, res);
44503 }
44504
44505 /* Expand SSE sequence for computing trunc from OPERAND1 storing
44506 into OPERAND0. */
44507 void
44508 ix86_expand_trunc (rtx operand0, rtx operand1)
44509 {
44510 /* C code for SSE variant we expand below.
44511 double xa = fabs (x), x2;
44512 if (!isless (xa, TWO52))
44513 return x;
44514 x2 = (double)(long)x;
44515 if (HONOR_SIGNED_ZEROS (mode))
44516 return copysign (x2, x);
44517 return x2;
44518 */
44519 machine_mode mode = GET_MODE (operand0);
44520 rtx xa, xi, TWO52, res, mask;
44521 rtx_code_label *label;
44522
44523 TWO52 = ix86_gen_TWO52 (mode);
44524
44525 /* Temporary for holding the result, initialized to the input
44526 operand to ease control flow. */
44527 res = gen_reg_rtx (mode);
44528 emit_move_insn (res, operand1);
44529
44530 /* xa = abs (operand1) */
44531 xa = ix86_expand_sse_fabs (res, &mask);
44532
44533 /* if (!isless (xa, TWO52)) goto label; */
44534 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
44535
44536 /* x = (double)(long)x */
44537 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
44538 expand_fix (xi, res, 0);
44539 expand_float (res, xi, 0);
44540
44541 if (HONOR_SIGNED_ZEROS (mode))
44542 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
44543
44544 emit_label (label);
44545 LABEL_NUSES (label) = 1;
44546
44547 emit_move_insn (operand0, res);
44548 }
44549
44550 /* Expand SSE sequence for computing trunc from OPERAND1 storing
44551 into OPERAND0. */
44552 void
44553 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
44554 {
44555 machine_mode mode = GET_MODE (operand0);
44556 rtx xa, mask, TWO52, one, res, smask, tmp;
44557 rtx_code_label *label;
44558
44559 /* C code for SSE variant we expand below.
44560 double xa = fabs (x), x2;
44561 if (!isless (xa, TWO52))
44562 return x;
44563 xa2 = xa + TWO52 - TWO52;
44564 Compensate:
44565 if (xa2 > xa)
44566 xa2 -= 1.0;
44567 x2 = copysign (xa2, x);
44568 return x2;
44569 */
44570
44571 TWO52 = ix86_gen_TWO52 (mode);
44572
44573 /* Temporary for holding the result, initialized to the input
44574 operand to ease control flow. */
44575 res = gen_reg_rtx (mode);
44576 emit_move_insn (res, operand1);
44577
44578 /* xa = abs (operand1) */
44579 xa = ix86_expand_sse_fabs (res, &smask);
44580
44581 /* if (!isless (xa, TWO52)) goto label; */
44582 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
44583
44584 /* res = xa + TWO52 - TWO52; */
44585 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
44586 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
44587 emit_move_insn (res, tmp);
44588
44589 /* generate 1.0 */
44590 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
44591
44592 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
44593 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
44594 emit_insn (gen_rtx_SET (mask, gen_rtx_AND (mode, mask, one)));
44595 tmp = expand_simple_binop (mode, MINUS,
44596 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
44597 emit_move_insn (res, tmp);
44598
44599 /* res = copysign (res, operand1) */
44600 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
44601
44602 emit_label (label);
44603 LABEL_NUSES (label) = 1;
44604
44605 emit_move_insn (operand0, res);
44606 }
44607
44608 /* Expand SSE sequence for computing round from OPERAND1 storing
44609 into OPERAND0. */
44610 void
44611 ix86_expand_round (rtx operand0, rtx operand1)
44612 {
44613 /* C code for the stuff we're doing below:
44614 double xa = fabs (x);
44615 if (!isless (xa, TWO52))
44616 return x;
44617 xa = (double)(long)(xa + nextafter (0.5, 0.0));
44618 return copysign (xa, x);
44619 */
44620 machine_mode mode = GET_MODE (operand0);
44621 rtx res, TWO52, xa, xi, half, mask;
44622 rtx_code_label *label;
44623 const struct real_format *fmt;
44624 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
44625
44626 /* Temporary for holding the result, initialized to the input
44627 operand to ease control flow. */
44628 res = gen_reg_rtx (mode);
44629 emit_move_insn (res, operand1);
44630
44631 TWO52 = ix86_gen_TWO52 (mode);
44632 xa = ix86_expand_sse_fabs (res, &mask);
44633 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
44634
44635 /* load nextafter (0.5, 0.0) */
44636 fmt = REAL_MODE_FORMAT (mode);
44637 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
44638 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
44639
44640 /* xa = xa + 0.5 */
44641 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
44642 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
44643
44644 /* xa = (double)(int64_t)xa */
44645 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
44646 expand_fix (xi, xa, 0);
44647 expand_float (xa, xi, 0);
44648
44649 /* res = copysign (xa, operand1) */
44650 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
44651
44652 emit_label (label);
44653 LABEL_NUSES (label) = 1;
44654
44655 emit_move_insn (operand0, res);
44656 }
44657
44658 /* Expand SSE sequence for computing round
44659 from OP1 storing into OP0 using sse4 round insn. */
44660 void
44661 ix86_expand_round_sse4 (rtx op0, rtx op1)
44662 {
44663 machine_mode mode = GET_MODE (op0);
44664 rtx e1, e2, res, half;
44665 const struct real_format *fmt;
44666 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
44667 rtx (*gen_copysign) (rtx, rtx, rtx);
44668 rtx (*gen_round) (rtx, rtx, rtx);
44669
44670 switch (mode)
44671 {
44672 case E_SFmode:
44673 gen_copysign = gen_copysignsf3;
44674 gen_round = gen_sse4_1_roundsf2;
44675 break;
44676 case E_DFmode:
44677 gen_copysign = gen_copysigndf3;
44678 gen_round = gen_sse4_1_rounddf2;
44679 break;
44680 default:
44681 gcc_unreachable ();
44682 }
44683
44684 /* round (a) = trunc (a + copysign (0.5, a)) */
44685
44686 /* load nextafter (0.5, 0.0) */
44687 fmt = REAL_MODE_FORMAT (mode);
44688 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
44689 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
44690 half = const_double_from_real_value (pred_half, mode);
44691
44692 /* e1 = copysign (0.5, op1) */
44693 e1 = gen_reg_rtx (mode);
44694 emit_insn (gen_copysign (e1, half, op1));
44695
44696 /* e2 = op1 + e1 */
44697 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
44698
44699 /* res = trunc (e2) */
44700 res = gen_reg_rtx (mode);
44701 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
44702
44703 emit_move_insn (op0, res);
44704 }
44705 \f
44706
44707 /* Table of valid machine attributes. */
44708 static const struct attribute_spec ix86_attribute_table[] =
44709 {
44710 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
44711 affects_type_identity, handler, exclude } */
44712 /* Stdcall attribute says callee is responsible for popping arguments
44713 if they are not variable. */
44714 { "stdcall", 0, 0, false, true, true, true, ix86_handle_cconv_attribute,
44715 NULL },
44716 /* Fastcall attribute says callee is responsible for popping arguments
44717 if they are not variable. */
44718 { "fastcall", 0, 0, false, true, true, true, ix86_handle_cconv_attribute,
44719 NULL },
44720 /* Thiscall attribute says callee is responsible for popping arguments
44721 if they are not variable. */
44722 { "thiscall", 0, 0, false, true, true, true, ix86_handle_cconv_attribute,
44723 NULL },
44724 /* Cdecl attribute says the callee is a normal C declaration */
44725 { "cdecl", 0, 0, false, true, true, true, ix86_handle_cconv_attribute,
44726 NULL },
44727 /* Regparm attribute specifies how many integer arguments are to be
44728 passed in registers. */
44729 { "regparm", 1, 1, false, true, true, true, ix86_handle_cconv_attribute,
44730 NULL },
44731 /* Sseregparm attribute says we are using x86_64 calling conventions
44732 for FP arguments. */
44733 { "sseregparm", 0, 0, false, true, true, true, ix86_handle_cconv_attribute,
44734 NULL },
44735 /* The transactional memory builtins are implicitly regparm or fastcall
44736 depending on the ABI. Override the generic do-nothing attribute that
44737 these builtins were declared with. */
44738 { "*tm regparm", 0, 0, false, true, true, true,
44739 ix86_handle_tm_regparm_attribute, NULL },
44740 /* force_align_arg_pointer says this function realigns the stack at entry. */
44741 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
44742 false, true, true, false, ix86_handle_force_align_arg_pointer_attribute,
44743 NULL },
44744 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
44745 { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute,
44746 NULL },
44747 { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute,
44748 NULL },
44749 { "shared", 0, 0, true, false, false, false,
44750 ix86_handle_shared_attribute, false, NULL },
44751 #endif
44752 { "ms_struct", 0, 0, false, false, false, false,
44753 ix86_handle_struct_attribute, NULL },
44754 { "gcc_struct", 0, 0, false, false, false, false,
44755 ix86_handle_struct_attribute, NULL },
44756 #ifdef SUBTARGET_ATTRIBUTE_TABLE
44757 SUBTARGET_ATTRIBUTE_TABLE,
44758 #endif
44759 /* ms_abi and sysv_abi calling convention function attributes. */
44760 { "ms_abi", 0, 0, false, true, true, true, ix86_handle_abi_attribute, NULL },
44761 { "sysv_abi", 0, 0, false, true, true, true, ix86_handle_abi_attribute,
44762 NULL },
44763 { "ms_abi va_list", 0, 0, false, false, false, false, NULL, NULL },
44764 { "sysv_abi va_list", 0, 0, false, false, false, false, NULL, NULL },
44765 { "ms_hook_prologue", 0, 0, true, false, false, false,
44766 ix86_handle_fndecl_attribute, NULL },
44767 { "callee_pop_aggregate_return", 1, 1, false, true, true, true,
44768 ix86_handle_callee_pop_aggregate_return, NULL },
44769 { "interrupt", 0, 0, false, true, true, false,
44770 ix86_handle_interrupt_attribute, NULL },
44771 { "no_caller_saved_registers", 0, 0, false, true, true, false,
44772 ix86_handle_no_caller_saved_registers_attribute, NULL },
44773 { "naked", 0, 0, true, false, false, false,
44774 ix86_handle_fndecl_attribute, NULL },
44775
44776 /* End element. */
44777 { NULL, 0, 0, false, false, false, false, NULL, NULL }
44778 };
44779
44780 /* Implement targetm.vectorize.builtin_vectorization_cost. */
44781 static int
44782 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
44783 tree vectype, int)
44784 {
44785 bool fp = false;
44786 machine_mode mode = TImode;
44787 int index;
44788 if (vectype != NULL)
44789 {
44790 fp = FLOAT_TYPE_P (vectype);
44791 mode = TYPE_MODE (vectype);
44792 }
44793
44794 switch (type_of_cost)
44795 {
44796 case scalar_stmt:
44797 return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
44798
44799 case scalar_load:
44800 /* load/store costs are relative to register move which is 2. Recompute
44801 it to COSTS_N_INSNS so everything have same base. */
44802 return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0]
44803 : ix86_cost->int_load [2]) / 2;
44804
44805 case scalar_store:
44806 return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0]
44807 : ix86_cost->int_store [2]) / 2;
44808
44809 case vector_stmt:
44810 return ix86_vec_cost (mode,
44811 fp ? ix86_cost->addss : ix86_cost->sse_op,
44812 true);
44813
44814 case vector_load:
44815 index = sse_store_index (mode);
44816 /* See PR82713 - we may end up being called on non-vector type. */
44817 if (index < 0)
44818 index = 2;
44819 return ix86_vec_cost (mode,
44820 COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2,
44821 true);
44822
44823 case vector_store:
44824 index = sse_store_index (mode);
44825 /* See PR82713 - we may end up being called on non-vector type. */
44826 if (index < 0)
44827 index = 2;
44828 return ix86_vec_cost (mode,
44829 COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2,
44830 true);
44831
44832 case vec_to_scalar:
44833 case scalar_to_vec:
44834 return ix86_vec_cost (mode, ix86_cost->sse_op, true);
44835
44836 /* We should have separate costs for unaligned loads and gather/scatter.
44837 Do that incrementally. */
44838 case unaligned_load:
44839 index = sse_store_index (mode);
44840 /* See PR82713 - we may end up being called on non-vector type. */
44841 if (index < 0)
44842 index = 2;
44843 return ix86_vec_cost (mode,
44844 COSTS_N_INSNS
44845 (ix86_cost->sse_unaligned_load[index]) / 2,
44846 true);
44847
44848 case unaligned_store:
44849 index = sse_store_index (mode);
44850 /* See PR82713 - we may end up being called on non-vector type. */
44851 if (index < 0)
44852 index = 2;
44853 return ix86_vec_cost (mode,
44854 COSTS_N_INSNS
44855 (ix86_cost->sse_unaligned_store[index]) / 2,
44856 true);
44857
44858 case vector_gather_load:
44859 return ix86_vec_cost (mode,
44860 COSTS_N_INSNS
44861 (ix86_cost->gather_static
44862 + ix86_cost->gather_per_elt
44863 * TYPE_VECTOR_SUBPARTS (vectype)) / 2,
44864 true);
44865
44866 case vector_scatter_store:
44867 return ix86_vec_cost (mode,
44868 COSTS_N_INSNS
44869 (ix86_cost->scatter_static
44870 + ix86_cost->scatter_per_elt
44871 * TYPE_VECTOR_SUBPARTS (vectype)) / 2,
44872 true);
44873
44874 case cond_branch_taken:
44875 return ix86_cost->cond_taken_branch_cost;
44876
44877 case cond_branch_not_taken:
44878 return ix86_cost->cond_not_taken_branch_cost;
44879
44880 case vec_perm:
44881 case vec_promote_demote:
44882 return ix86_vec_cost (mode,
44883 ix86_cost->sse_op, true);
44884
44885 case vec_construct:
44886 return ix86_vec_cost (mode, ix86_cost->sse_op, false);
44887
44888 default:
44889 gcc_unreachable ();
44890 }
44891 }
44892
44893 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
44894 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
44895 insn every time. */
44896
44897 static GTY(()) rtx_insn *vselect_insn;
44898
44899 /* Initialize vselect_insn. */
44900
44901 static void
44902 init_vselect_insn (void)
44903 {
44904 unsigned i;
44905 rtx x;
44906
44907 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
44908 for (i = 0; i < MAX_VECT_LEN; ++i)
44909 XVECEXP (x, 0, i) = const0_rtx;
44910 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
44911 const0_rtx), x);
44912 x = gen_rtx_SET (const0_rtx, x);
44913 start_sequence ();
44914 vselect_insn = emit_insn (x);
44915 end_sequence ();
44916 }
44917
44918 /* Construct (set target (vec_select op0 (parallel perm))) and
44919 return true if that's a valid instruction in the active ISA. */
44920
44921 static bool
44922 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
44923 unsigned nelt, bool testing_p)
44924 {
44925 unsigned int i;
44926 rtx x, save_vconcat;
44927 int icode;
44928
44929 if (vselect_insn == NULL_RTX)
44930 init_vselect_insn ();
44931
44932 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
44933 PUT_NUM_ELEM (XVEC (x, 0), nelt);
44934 for (i = 0; i < nelt; ++i)
44935 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
44936 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
44937 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
44938 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
44939 SET_DEST (PATTERN (vselect_insn)) = target;
44940 icode = recog_memoized (vselect_insn);
44941
44942 if (icode >= 0 && !testing_p)
44943 emit_insn (copy_rtx (PATTERN (vselect_insn)));
44944
44945 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
44946 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
44947 INSN_CODE (vselect_insn) = -1;
44948
44949 return icode >= 0;
44950 }
44951
44952 /* Similar, but generate a vec_concat from op0 and op1 as well. */
44953
44954 static bool
44955 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
44956 const unsigned char *perm, unsigned nelt,
44957 bool testing_p)
44958 {
44959 machine_mode v2mode;
44960 rtx x;
44961 bool ok;
44962
44963 if (vselect_insn == NULL_RTX)
44964 init_vselect_insn ();
44965
44966 if (!GET_MODE_2XWIDER_MODE (GET_MODE (op0)).exists (&v2mode))
44967 return false;
44968 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
44969 PUT_MODE (x, v2mode);
44970 XEXP (x, 0) = op0;
44971 XEXP (x, 1) = op1;
44972 ok = expand_vselect (target, x, perm, nelt, testing_p);
44973 XEXP (x, 0) = const0_rtx;
44974 XEXP (x, 1) = const0_rtx;
44975 return ok;
44976 }
44977
44978 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
44979 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
44980
44981 static bool
44982 expand_vec_perm_blend (struct expand_vec_perm_d *d)
44983 {
44984 machine_mode mmode, vmode = d->vmode;
44985 unsigned i, mask, nelt = d->nelt;
44986 rtx target, op0, op1, maskop, x;
44987 rtx rperm[32], vperm;
44988
44989 if (d->one_operand_p)
44990 return false;
44991 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
44992 && (TARGET_AVX512BW
44993 || GET_MODE_UNIT_SIZE (vmode) >= 4))
44994 ;
44995 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
44996 ;
44997 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
44998 ;
44999 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
45000 ;
45001 else
45002 return false;
45003
45004 /* This is a blend, not a permute. Elements must stay in their
45005 respective lanes. */
45006 for (i = 0; i < nelt; ++i)
45007 {
45008 unsigned e = d->perm[i];
45009 if (!(e == i || e == i + nelt))
45010 return false;
45011 }
45012
45013 if (d->testing_p)
45014 return true;
45015
45016 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
45017 decision should be extracted elsewhere, so that we only try that
45018 sequence once all budget==3 options have been tried. */
45019 target = d->target;
45020 op0 = d->op0;
45021 op1 = d->op1;
45022 mask = 0;
45023
45024 switch (vmode)
45025 {
45026 case E_V8DFmode:
45027 case E_V16SFmode:
45028 case E_V4DFmode:
45029 case E_V8SFmode:
45030 case E_V2DFmode:
45031 case E_V4SFmode:
45032 case E_V8HImode:
45033 case E_V8SImode:
45034 case E_V32HImode:
45035 case E_V64QImode:
45036 case E_V16SImode:
45037 case E_V8DImode:
45038 for (i = 0; i < nelt; ++i)
45039 mask |= (d->perm[i] >= nelt) << i;
45040 break;
45041
45042 case E_V2DImode:
45043 for (i = 0; i < 2; ++i)
45044 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
45045 vmode = V8HImode;
45046 goto do_subreg;
45047
45048 case E_V4SImode:
45049 for (i = 0; i < 4; ++i)
45050 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
45051 vmode = V8HImode;
45052 goto do_subreg;
45053
45054 case E_V16QImode:
45055 /* See if bytes move in pairs so we can use pblendw with
45056 an immediate argument, rather than pblendvb with a vector
45057 argument. */
45058 for (i = 0; i < 16; i += 2)
45059 if (d->perm[i] + 1 != d->perm[i + 1])
45060 {
45061 use_pblendvb:
45062 for (i = 0; i < nelt; ++i)
45063 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
45064
45065 finish_pblendvb:
45066 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
45067 vperm = force_reg (vmode, vperm);
45068
45069 if (GET_MODE_SIZE (vmode) == 16)
45070 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
45071 else
45072 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
45073 if (target != d->target)
45074 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
45075 return true;
45076 }
45077
45078 for (i = 0; i < 8; ++i)
45079 mask |= (d->perm[i * 2] >= 16) << i;
45080 vmode = V8HImode;
45081 /* FALLTHRU */
45082
45083 do_subreg:
45084 target = gen_reg_rtx (vmode);
45085 op0 = gen_lowpart (vmode, op0);
45086 op1 = gen_lowpart (vmode, op1);
45087 break;
45088
45089 case E_V32QImode:
45090 /* See if bytes move in pairs. If not, vpblendvb must be used. */
45091 for (i = 0; i < 32; i += 2)
45092 if (d->perm[i] + 1 != d->perm[i + 1])
45093 goto use_pblendvb;
45094 /* See if bytes move in quadruplets. If yes, vpblendd
45095 with immediate can be used. */
45096 for (i = 0; i < 32; i += 4)
45097 if (d->perm[i] + 2 != d->perm[i + 2])
45098 break;
45099 if (i < 32)
45100 {
45101 /* See if bytes move the same in both lanes. If yes,
45102 vpblendw with immediate can be used. */
45103 for (i = 0; i < 16; i += 2)
45104 if (d->perm[i] + 16 != d->perm[i + 16])
45105 goto use_pblendvb;
45106
45107 /* Use vpblendw. */
45108 for (i = 0; i < 16; ++i)
45109 mask |= (d->perm[i * 2] >= 32) << i;
45110 vmode = V16HImode;
45111 goto do_subreg;
45112 }
45113
45114 /* Use vpblendd. */
45115 for (i = 0; i < 8; ++i)
45116 mask |= (d->perm[i * 4] >= 32) << i;
45117 vmode = V8SImode;
45118 goto do_subreg;
45119
45120 case E_V16HImode:
45121 /* See if words move in pairs. If yes, vpblendd can be used. */
45122 for (i = 0; i < 16; i += 2)
45123 if (d->perm[i] + 1 != d->perm[i + 1])
45124 break;
45125 if (i < 16)
45126 {
45127 /* See if words move the same in both lanes. If not,
45128 vpblendvb must be used. */
45129 for (i = 0; i < 8; i++)
45130 if (d->perm[i] + 8 != d->perm[i + 8])
45131 {
45132 /* Use vpblendvb. */
45133 for (i = 0; i < 32; ++i)
45134 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
45135
45136 vmode = V32QImode;
45137 nelt = 32;
45138 target = gen_reg_rtx (vmode);
45139 op0 = gen_lowpart (vmode, op0);
45140 op1 = gen_lowpart (vmode, op1);
45141 goto finish_pblendvb;
45142 }
45143
45144 /* Use vpblendw. */
45145 for (i = 0; i < 16; ++i)
45146 mask |= (d->perm[i] >= 16) << i;
45147 break;
45148 }
45149
45150 /* Use vpblendd. */
45151 for (i = 0; i < 8; ++i)
45152 mask |= (d->perm[i * 2] >= 16) << i;
45153 vmode = V8SImode;
45154 goto do_subreg;
45155
45156 case E_V4DImode:
45157 /* Use vpblendd. */
45158 for (i = 0; i < 4; ++i)
45159 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
45160 vmode = V8SImode;
45161 goto do_subreg;
45162
45163 default:
45164 gcc_unreachable ();
45165 }
45166
45167 switch (vmode)
45168 {
45169 case E_V8DFmode:
45170 case E_V8DImode:
45171 mmode = QImode;
45172 break;
45173 case E_V16SFmode:
45174 case E_V16SImode:
45175 mmode = HImode;
45176 break;
45177 case E_V32HImode:
45178 mmode = SImode;
45179 break;
45180 case E_V64QImode:
45181 mmode = DImode;
45182 break;
45183 default:
45184 mmode = VOIDmode;
45185 }
45186
45187 if (mmode != VOIDmode)
45188 maskop = force_reg (mmode, gen_int_mode (mask, mmode));
45189 else
45190 maskop = GEN_INT (mask);
45191
45192 /* This matches five different patterns with the different modes. */
45193 x = gen_rtx_VEC_MERGE (vmode, op1, op0, maskop);
45194 x = gen_rtx_SET (target, x);
45195 emit_insn (x);
45196 if (target != d->target)
45197 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
45198
45199 return true;
45200 }
45201
45202 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
45203 in terms of the variable form of vpermilps.
45204
45205 Note that we will have already failed the immediate input vpermilps,
45206 which requires that the high and low part shuffle be identical; the
45207 variable form doesn't require that. */
45208
45209 static bool
45210 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
45211 {
45212 rtx rperm[8], vperm;
45213 unsigned i;
45214
45215 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
45216 return false;
45217
45218 /* We can only permute within the 128-bit lane. */
45219 for (i = 0; i < 8; ++i)
45220 {
45221 unsigned e = d->perm[i];
45222 if (i < 4 ? e >= 4 : e < 4)
45223 return false;
45224 }
45225
45226 if (d->testing_p)
45227 return true;
45228
45229 for (i = 0; i < 8; ++i)
45230 {
45231 unsigned e = d->perm[i];
45232
45233 /* Within each 128-bit lane, the elements of op0 are numbered
45234 from 0 and the elements of op1 are numbered from 4. */
45235 if (e >= 8 + 4)
45236 e -= 8;
45237 else if (e >= 4)
45238 e -= 4;
45239
45240 rperm[i] = GEN_INT (e);
45241 }
45242
45243 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
45244 vperm = force_reg (V8SImode, vperm);
45245 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
45246
45247 return true;
45248 }
45249
45250 /* Return true if permutation D can be performed as VMODE permutation
45251 instead. */
45252
45253 static bool
45254 valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d)
45255 {
45256 unsigned int i, j, chunk;
45257
45258 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
45259 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
45260 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
45261 return false;
45262
45263 if (GET_MODE_NUNITS (vmode) >= d->nelt)
45264 return true;
45265
45266 chunk = d->nelt / GET_MODE_NUNITS (vmode);
45267 for (i = 0; i < d->nelt; i += chunk)
45268 if (d->perm[i] & (chunk - 1))
45269 return false;
45270 else
45271 for (j = 1; j < chunk; ++j)
45272 if (d->perm[i] + j != d->perm[i + j])
45273 return false;
45274
45275 return true;
45276 }
45277
45278 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
45279 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
45280
45281 static bool
45282 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
45283 {
45284 unsigned i, nelt, eltsz, mask;
45285 unsigned char perm[64];
45286 machine_mode vmode = V16QImode;
45287 rtx rperm[64], vperm, target, op0, op1;
45288
45289 nelt = d->nelt;
45290
45291 if (!d->one_operand_p)
45292 {
45293 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
45294 {
45295 if (TARGET_AVX2
45296 && valid_perm_using_mode_p (V2TImode, d))
45297 {
45298 if (d->testing_p)
45299 return true;
45300
45301 /* Use vperm2i128 insn. The pattern uses
45302 V4DImode instead of V2TImode. */
45303 target = d->target;
45304 if (d->vmode != V4DImode)
45305 target = gen_reg_rtx (V4DImode);
45306 op0 = gen_lowpart (V4DImode, d->op0);
45307 op1 = gen_lowpart (V4DImode, d->op1);
45308 rperm[0]
45309 = GEN_INT ((d->perm[0] / (nelt / 2))
45310 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
45311 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
45312 if (target != d->target)
45313 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
45314 return true;
45315 }
45316 return false;
45317 }
45318 }
45319 else
45320 {
45321 if (GET_MODE_SIZE (d->vmode) == 16)
45322 {
45323 if (!TARGET_SSSE3)
45324 return false;
45325 }
45326 else if (GET_MODE_SIZE (d->vmode) == 32)
45327 {
45328 if (!TARGET_AVX2)
45329 return false;
45330
45331 /* V4DImode should be already handled through
45332 expand_vselect by vpermq instruction. */
45333 gcc_assert (d->vmode != V4DImode);
45334
45335 vmode = V32QImode;
45336 if (d->vmode == V8SImode
45337 || d->vmode == V16HImode
45338 || d->vmode == V32QImode)
45339 {
45340 /* First see if vpermq can be used for
45341 V8SImode/V16HImode/V32QImode. */
45342 if (valid_perm_using_mode_p (V4DImode, d))
45343 {
45344 for (i = 0; i < 4; i++)
45345 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
45346 if (d->testing_p)
45347 return true;
45348 target = gen_reg_rtx (V4DImode);
45349 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
45350 perm, 4, false))
45351 {
45352 emit_move_insn (d->target,
45353 gen_lowpart (d->vmode, target));
45354 return true;
45355 }
45356 return false;
45357 }
45358
45359 /* Next see if vpermd can be used. */
45360 if (valid_perm_using_mode_p (V8SImode, d))
45361 vmode = V8SImode;
45362 }
45363 /* Or if vpermps can be used. */
45364 else if (d->vmode == V8SFmode)
45365 vmode = V8SImode;
45366
45367 if (vmode == V32QImode)
45368 {
45369 /* vpshufb only works intra lanes, it is not
45370 possible to shuffle bytes in between the lanes. */
45371 for (i = 0; i < nelt; ++i)
45372 if ((d->perm[i] ^ i) & (nelt / 2))
45373 return false;
45374 }
45375 }
45376 else if (GET_MODE_SIZE (d->vmode) == 64)
45377 {
45378 if (!TARGET_AVX512BW)
45379 return false;
45380
45381 /* If vpermq didn't work, vpshufb won't work either. */
45382 if (d->vmode == V8DFmode || d->vmode == V8DImode)
45383 return false;
45384
45385 vmode = V64QImode;
45386 if (d->vmode == V16SImode
45387 || d->vmode == V32HImode
45388 || d->vmode == V64QImode)
45389 {
45390 /* First see if vpermq can be used for
45391 V16SImode/V32HImode/V64QImode. */
45392 if (valid_perm_using_mode_p (V8DImode, d))
45393 {
45394 for (i = 0; i < 8; i++)
45395 perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7;
45396 if (d->testing_p)
45397 return true;
45398 target = gen_reg_rtx (V8DImode);
45399 if (expand_vselect (target, gen_lowpart (V8DImode, d->op0),
45400 perm, 8, false))
45401 {
45402 emit_move_insn (d->target,
45403 gen_lowpart (d->vmode, target));
45404 return true;
45405 }
45406 return false;
45407 }
45408
45409 /* Next see if vpermd can be used. */
45410 if (valid_perm_using_mode_p (V16SImode, d))
45411 vmode = V16SImode;
45412 }
45413 /* Or if vpermps can be used. */
45414 else if (d->vmode == V16SFmode)
45415 vmode = V16SImode;
45416 if (vmode == V64QImode)
45417 {
45418 /* vpshufb only works intra lanes, it is not
45419 possible to shuffle bytes in between the lanes. */
45420 for (i = 0; i < nelt; ++i)
45421 if ((d->perm[i] ^ i) & (nelt / 4))
45422 return false;
45423 }
45424 }
45425 else
45426 return false;
45427 }
45428
45429 if (d->testing_p)
45430 return true;
45431
45432 if (vmode == V8SImode)
45433 for (i = 0; i < 8; ++i)
45434 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
45435 else if (vmode == V16SImode)
45436 for (i = 0; i < 16; ++i)
45437 rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
45438 else
45439 {
45440 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
45441 if (!d->one_operand_p)
45442 mask = 2 * nelt - 1;
45443 else if (vmode == V16QImode)
45444 mask = nelt - 1;
45445 else if (vmode == V64QImode)
45446 mask = nelt / 4 - 1;
45447 else
45448 mask = nelt / 2 - 1;
45449
45450 for (i = 0; i < nelt; ++i)
45451 {
45452 unsigned j, e = d->perm[i] & mask;
45453 for (j = 0; j < eltsz; ++j)
45454 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
45455 }
45456 }
45457
45458 vperm = gen_rtx_CONST_VECTOR (vmode,
45459 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
45460 vperm = force_reg (vmode, vperm);
45461
45462 target = d->target;
45463 if (d->vmode != vmode)
45464 target = gen_reg_rtx (vmode);
45465 op0 = gen_lowpart (vmode, d->op0);
45466 if (d->one_operand_p)
45467 {
45468 if (vmode == V16QImode)
45469 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
45470 else if (vmode == V32QImode)
45471 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
45472 else if (vmode == V64QImode)
45473 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
45474 else if (vmode == V8SFmode)
45475 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
45476 else if (vmode == V8SImode)
45477 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
45478 else if (vmode == V16SFmode)
45479 emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
45480 else if (vmode == V16SImode)
45481 emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
45482 else
45483 gcc_unreachable ();
45484 }
45485 else
45486 {
45487 op1 = gen_lowpart (vmode, d->op1);
45488 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
45489 }
45490 if (target != d->target)
45491 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
45492
45493 return true;
45494 }
45495
45496 /* For V*[QHS]Imode permutations, check if the same permutation
45497 can't be performed in a 2x, 4x or 8x wider inner mode. */
45498
45499 static bool
45500 canonicalize_vector_int_perm (const struct expand_vec_perm_d *d,
45501 struct expand_vec_perm_d *nd)
45502 {
45503 int i;
45504 machine_mode mode = VOIDmode;
45505
45506 switch (d->vmode)
45507 {
45508 case E_V16QImode: mode = V8HImode; break;
45509 case E_V32QImode: mode = V16HImode; break;
45510 case E_V64QImode: mode = V32HImode; break;
45511 case E_V8HImode: mode = V4SImode; break;
45512 case E_V16HImode: mode = V8SImode; break;
45513 case E_V32HImode: mode = V16SImode; break;
45514 case E_V4SImode: mode = V2DImode; break;
45515 case E_V8SImode: mode = V4DImode; break;
45516 case E_V16SImode: mode = V8DImode; break;
45517 default: return false;
45518 }
45519 for (i = 0; i < d->nelt; i += 2)
45520 if ((d->perm[i] & 1) || d->perm[i + 1] != d->perm[i] + 1)
45521 return false;
45522 nd->vmode = mode;
45523 nd->nelt = d->nelt / 2;
45524 for (i = 0; i < nd->nelt; i++)
45525 nd->perm[i] = d->perm[2 * i] / 2;
45526 if (GET_MODE_INNER (mode) != DImode)
45527 canonicalize_vector_int_perm (nd, nd);
45528 if (nd != d)
45529 {
45530 nd->one_operand_p = d->one_operand_p;
45531 nd->testing_p = d->testing_p;
45532 if (d->op0 == d->op1)
45533 nd->op0 = nd->op1 = gen_lowpart (nd->vmode, d->op0);
45534 else
45535 {
45536 nd->op0 = gen_lowpart (nd->vmode, d->op0);
45537 nd->op1 = gen_lowpart (nd->vmode, d->op1);
45538 }
45539 if (d->testing_p)
45540 nd->target = gen_raw_REG (nd->vmode, LAST_VIRTUAL_REGISTER + 1);
45541 else
45542 nd->target = gen_reg_rtx (nd->vmode);
45543 }
45544 return true;
45545 }
45546
45547 /* Try to expand one-operand permutation with constant mask. */
45548
45549 static bool
45550 ix86_expand_vec_one_operand_perm_avx512 (struct expand_vec_perm_d *d)
45551 {
45552 machine_mode mode = GET_MODE (d->op0);
45553 machine_mode maskmode = mode;
45554 rtx (*gen) (rtx, rtx, rtx) = NULL;
45555 rtx target, op0, mask;
45556 rtx vec[64];
45557
45558 if (!rtx_equal_p (d->op0, d->op1))
45559 return false;
45560
45561 if (!TARGET_AVX512F)
45562 return false;
45563
45564 switch (mode)
45565 {
45566 case E_V16SImode:
45567 gen = gen_avx512f_permvarv16si;
45568 break;
45569 case E_V16SFmode:
45570 gen = gen_avx512f_permvarv16sf;
45571 maskmode = V16SImode;
45572 break;
45573 case E_V8DImode:
45574 gen = gen_avx512f_permvarv8di;
45575 break;
45576 case E_V8DFmode:
45577 gen = gen_avx512f_permvarv8df;
45578 maskmode = V8DImode;
45579 break;
45580 default:
45581 return false;
45582 }
45583
45584 target = d->target;
45585 op0 = d->op0;
45586 for (int i = 0; i < d->nelt; ++i)
45587 vec[i] = GEN_INT (d->perm[i]);
45588 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
45589 emit_insn (gen (target, op0, force_reg (maskmode, mask)));
45590 return true;
45591 }
45592
45593 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
45594 in a single instruction. */
45595
45596 static bool
45597 expand_vec_perm_1 (struct expand_vec_perm_d *d)
45598 {
45599 unsigned i, nelt = d->nelt;
45600 struct expand_vec_perm_d nd;
45601
45602 /* Check plain VEC_SELECT first, because AVX has instructions that could
45603 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
45604 input where SEL+CONCAT may not. */
45605 if (d->one_operand_p)
45606 {
45607 int mask = nelt - 1;
45608 bool identity_perm = true;
45609 bool broadcast_perm = true;
45610
45611 for (i = 0; i < nelt; i++)
45612 {
45613 nd.perm[i] = d->perm[i] & mask;
45614 if (nd.perm[i] != i)
45615 identity_perm = false;
45616 if (nd.perm[i])
45617 broadcast_perm = false;
45618 }
45619
45620 if (identity_perm)
45621 {
45622 if (!d->testing_p)
45623 emit_move_insn (d->target, d->op0);
45624 return true;
45625 }
45626 else if (broadcast_perm && TARGET_AVX2)
45627 {
45628 /* Use vpbroadcast{b,w,d}. */
45629 rtx (*gen) (rtx, rtx) = NULL;
45630 switch (d->vmode)
45631 {
45632 case E_V64QImode:
45633 if (TARGET_AVX512BW)
45634 gen = gen_avx512bw_vec_dupv64qi_1;
45635 break;
45636 case E_V32QImode:
45637 gen = gen_avx2_pbroadcastv32qi_1;
45638 break;
45639 case E_V32HImode:
45640 if (TARGET_AVX512BW)
45641 gen = gen_avx512bw_vec_dupv32hi_1;
45642 break;
45643 case E_V16HImode:
45644 gen = gen_avx2_pbroadcastv16hi_1;
45645 break;
45646 case E_V16SImode:
45647 if (TARGET_AVX512F)
45648 gen = gen_avx512f_vec_dupv16si_1;
45649 break;
45650 case E_V8SImode:
45651 gen = gen_avx2_pbroadcastv8si_1;
45652 break;
45653 case E_V16QImode:
45654 gen = gen_avx2_pbroadcastv16qi;
45655 break;
45656 case E_V8HImode:
45657 gen = gen_avx2_pbroadcastv8hi;
45658 break;
45659 case E_V16SFmode:
45660 if (TARGET_AVX512F)
45661 gen = gen_avx512f_vec_dupv16sf_1;
45662 break;
45663 case E_V8SFmode:
45664 gen = gen_avx2_vec_dupv8sf_1;
45665 break;
45666 case E_V8DFmode:
45667 if (TARGET_AVX512F)
45668 gen = gen_avx512f_vec_dupv8df_1;
45669 break;
45670 case E_V8DImode:
45671 if (TARGET_AVX512F)
45672 gen = gen_avx512f_vec_dupv8di_1;
45673 break;
45674 /* For other modes prefer other shuffles this function creates. */
45675 default: break;
45676 }
45677 if (gen != NULL)
45678 {
45679 if (!d->testing_p)
45680 emit_insn (gen (d->target, d->op0));
45681 return true;
45682 }
45683 }
45684
45685 if (expand_vselect (d->target, d->op0, nd.perm, nelt, d->testing_p))
45686 return true;
45687
45688 /* There are plenty of patterns in sse.md that are written for
45689 SEL+CONCAT and are not replicated for a single op. Perhaps
45690 that should be changed, to avoid the nastiness here. */
45691
45692 /* Recognize interleave style patterns, which means incrementing
45693 every other permutation operand. */
45694 for (i = 0; i < nelt; i += 2)
45695 {
45696 nd.perm[i] = d->perm[i] & mask;
45697 nd.perm[i + 1] = (d->perm[i + 1] & mask) + nelt;
45698 }
45699 if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt,
45700 d->testing_p))
45701 return true;
45702
45703 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
45704 if (nelt >= 4)
45705 {
45706 for (i = 0; i < nelt; i += 4)
45707 {
45708 nd.perm[i + 0] = d->perm[i + 0] & mask;
45709 nd.perm[i + 1] = d->perm[i + 1] & mask;
45710 nd.perm[i + 2] = (d->perm[i + 2] & mask) + nelt;
45711 nd.perm[i + 3] = (d->perm[i + 3] & mask) + nelt;
45712 }
45713
45714 if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt,
45715 d->testing_p))
45716 return true;
45717 }
45718 }
45719
45720 /* Finally, try the fully general two operand permute. */
45721 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
45722 d->testing_p))
45723 return true;
45724
45725 /* Recognize interleave style patterns with reversed operands. */
45726 if (!d->one_operand_p)
45727 {
45728 for (i = 0; i < nelt; ++i)
45729 {
45730 unsigned e = d->perm[i];
45731 if (e >= nelt)
45732 e -= nelt;
45733 else
45734 e += nelt;
45735 nd.perm[i] = e;
45736 }
45737
45738 if (expand_vselect_vconcat (d->target, d->op1, d->op0, nd.perm, nelt,
45739 d->testing_p))
45740 return true;
45741 }
45742
45743 /* Try the SSE4.1 blend variable merge instructions. */
45744 if (expand_vec_perm_blend (d))
45745 return true;
45746
45747 /* Try one of the AVX vpermil variable permutations. */
45748 if (expand_vec_perm_vpermil (d))
45749 return true;
45750
45751 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
45752 vpshufb, vpermd, vpermps or vpermq variable permutation. */
45753 if (expand_vec_perm_pshufb (d))
45754 return true;
45755
45756 /* Try the AVX2 vpalignr instruction. */
45757 if (expand_vec_perm_palignr (d, true))
45758 return true;
45759
45760 /* Try the AVX512F vperm{s,d} instructions. */
45761 if (ix86_expand_vec_one_operand_perm_avx512 (d))
45762 return true;
45763
45764 /* Try the AVX512F vpermt2/vpermi2 instructions. */
45765 if (ix86_expand_vec_perm_vpermt2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
45766 return true;
45767
45768 /* See if we can get the same permutation in different vector integer
45769 mode. */
45770 if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd))
45771 {
45772 if (!d->testing_p)
45773 emit_move_insn (d->target, gen_lowpart (d->vmode, nd.target));
45774 return true;
45775 }
45776 return false;
45777 }
45778
45779 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
45780 in terms of a pair of pshuflw + pshufhw instructions. */
45781
45782 static bool
45783 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
45784 {
45785 unsigned char perm2[MAX_VECT_LEN];
45786 unsigned i;
45787 bool ok;
45788
45789 if (d->vmode != V8HImode || !d->one_operand_p)
45790 return false;
45791
45792 /* The two permutations only operate in 64-bit lanes. */
45793 for (i = 0; i < 4; ++i)
45794 if (d->perm[i] >= 4)
45795 return false;
45796 for (i = 4; i < 8; ++i)
45797 if (d->perm[i] < 4)
45798 return false;
45799
45800 if (d->testing_p)
45801 return true;
45802
45803 /* Emit the pshuflw. */
45804 memcpy (perm2, d->perm, 4);
45805 for (i = 4; i < 8; ++i)
45806 perm2[i] = i;
45807 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
45808 gcc_assert (ok);
45809
45810 /* Emit the pshufhw. */
45811 memcpy (perm2 + 4, d->perm + 4, 4);
45812 for (i = 0; i < 4; ++i)
45813 perm2[i] = i;
45814 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
45815 gcc_assert (ok);
45816
45817 return true;
45818 }
45819
45820 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
45821 the permutation using the SSSE3 palignr instruction. This succeeds
45822 when all of the elements in PERM fit within one vector and we merely
45823 need to shift them down so that a single vector permutation has a
45824 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
45825 the vpalignr instruction itself can perform the requested permutation. */
45826
45827 static bool
45828 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
45829 {
45830 unsigned i, nelt = d->nelt;
45831 unsigned min, max, minswap, maxswap;
45832 bool in_order, ok, swap = false;
45833 rtx shift, target;
45834 struct expand_vec_perm_d dcopy;
45835
45836 /* Even with AVX, palignr only operates on 128-bit vectors,
45837 in AVX2 palignr operates on both 128-bit lanes. */
45838 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
45839 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
45840 return false;
45841
45842 min = 2 * nelt;
45843 max = 0;
45844 minswap = 2 * nelt;
45845 maxswap = 0;
45846 for (i = 0; i < nelt; ++i)
45847 {
45848 unsigned e = d->perm[i];
45849 unsigned eswap = d->perm[i] ^ nelt;
45850 if (GET_MODE_SIZE (d->vmode) == 32)
45851 {
45852 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
45853 eswap = e ^ (nelt / 2);
45854 }
45855 if (e < min)
45856 min = e;
45857 if (e > max)
45858 max = e;
45859 if (eswap < minswap)
45860 minswap = eswap;
45861 if (eswap > maxswap)
45862 maxswap = eswap;
45863 }
45864 if (min == 0
45865 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
45866 {
45867 if (d->one_operand_p
45868 || minswap == 0
45869 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
45870 ? nelt / 2 : nelt))
45871 return false;
45872 swap = true;
45873 min = minswap;
45874 max = maxswap;
45875 }
45876
45877 /* Given that we have SSSE3, we know we'll be able to implement the
45878 single operand permutation after the palignr with pshufb for
45879 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
45880 first. */
45881 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
45882 return true;
45883
45884 dcopy = *d;
45885 if (swap)
45886 {
45887 dcopy.op0 = d->op1;
45888 dcopy.op1 = d->op0;
45889 for (i = 0; i < nelt; ++i)
45890 dcopy.perm[i] ^= nelt;
45891 }
45892
45893 in_order = true;
45894 for (i = 0; i < nelt; ++i)
45895 {
45896 unsigned e = dcopy.perm[i];
45897 if (GET_MODE_SIZE (d->vmode) == 32
45898 && e >= nelt
45899 && (e & (nelt / 2 - 1)) < min)
45900 e = e - min - (nelt / 2);
45901 else
45902 e = e - min;
45903 if (e != i)
45904 in_order = false;
45905 dcopy.perm[i] = e;
45906 }
45907 dcopy.one_operand_p = true;
45908
45909 if (single_insn_only_p && !in_order)
45910 return false;
45911
45912 /* For AVX2, test whether we can permute the result in one instruction. */
45913 if (d->testing_p)
45914 {
45915 if (in_order)
45916 return true;
45917 dcopy.op1 = dcopy.op0;
45918 return expand_vec_perm_1 (&dcopy);
45919 }
45920
45921 shift = GEN_INT (min * GET_MODE_UNIT_BITSIZE (d->vmode));
45922 if (GET_MODE_SIZE (d->vmode) == 16)
45923 {
45924 target = gen_reg_rtx (TImode);
45925 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
45926 gen_lowpart (TImode, dcopy.op0), shift));
45927 }
45928 else
45929 {
45930 target = gen_reg_rtx (V2TImode);
45931 emit_insn (gen_avx2_palignrv2ti (target,
45932 gen_lowpart (V2TImode, dcopy.op1),
45933 gen_lowpart (V2TImode, dcopy.op0),
45934 shift));
45935 }
45936
45937 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
45938
45939 /* Test for the degenerate case where the alignment by itself
45940 produces the desired permutation. */
45941 if (in_order)
45942 {
45943 emit_move_insn (d->target, dcopy.op0);
45944 return true;
45945 }
45946
45947 ok = expand_vec_perm_1 (&dcopy);
45948 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
45949
45950 return ok;
45951 }
45952
45953 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
45954 the permutation using the SSE4_1 pblendv instruction. Potentially
45955 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
45956
45957 static bool
45958 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
45959 {
45960 unsigned i, which, nelt = d->nelt;
45961 struct expand_vec_perm_d dcopy, dcopy1;
45962 machine_mode vmode = d->vmode;
45963 bool ok;
45964
45965 /* Use the same checks as in expand_vec_perm_blend. */
45966 if (d->one_operand_p)
45967 return false;
45968 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
45969 ;
45970 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
45971 ;
45972 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
45973 ;
45974 else
45975 return false;
45976
45977 /* Figure out where permutation elements stay not in their
45978 respective lanes. */
45979 for (i = 0, which = 0; i < nelt; ++i)
45980 {
45981 unsigned e = d->perm[i];
45982 if (e != i)
45983 which |= (e < nelt ? 1 : 2);
45984 }
45985 /* We can pblend the part where elements stay not in their
45986 respective lanes only when these elements are all in one
45987 half of a permutation.
45988 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
45989 lanes, but both 8 and 9 >= 8
45990 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
45991 respective lanes and 8 >= 8, but 2 not. */
45992 if (which != 1 && which != 2)
45993 return false;
45994 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
45995 return true;
45996
45997 /* First we apply one operand permutation to the part where
45998 elements stay not in their respective lanes. */
45999 dcopy = *d;
46000 if (which == 2)
46001 dcopy.op0 = dcopy.op1 = d->op1;
46002 else
46003 dcopy.op0 = dcopy.op1 = d->op0;
46004 if (!d->testing_p)
46005 dcopy.target = gen_reg_rtx (vmode);
46006 dcopy.one_operand_p = true;
46007
46008 for (i = 0; i < nelt; ++i)
46009 dcopy.perm[i] = d->perm[i] & (nelt - 1);
46010
46011 ok = expand_vec_perm_1 (&dcopy);
46012 if (GET_MODE_SIZE (vmode) != 16 && !ok)
46013 return false;
46014 else
46015 gcc_assert (ok);
46016 if (d->testing_p)
46017 return true;
46018
46019 /* Next we put permuted elements into their positions. */
46020 dcopy1 = *d;
46021 if (which == 2)
46022 dcopy1.op1 = dcopy.target;
46023 else
46024 dcopy1.op0 = dcopy.target;
46025
46026 for (i = 0; i < nelt; ++i)
46027 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
46028
46029 ok = expand_vec_perm_blend (&dcopy1);
46030 gcc_assert (ok);
46031
46032 return true;
46033 }
46034
46035 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
46036
46037 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
46038 a two vector permutation into a single vector permutation by using
46039 an interleave operation to merge the vectors. */
46040
46041 static bool
46042 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
46043 {
46044 struct expand_vec_perm_d dremap, dfinal;
46045 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
46046 unsigned HOST_WIDE_INT contents;
46047 unsigned char remap[2 * MAX_VECT_LEN];
46048 rtx_insn *seq;
46049 bool ok, same_halves = false;
46050
46051 if (GET_MODE_SIZE (d->vmode) == 16)
46052 {
46053 if (d->one_operand_p)
46054 return false;
46055 }
46056 else if (GET_MODE_SIZE (d->vmode) == 32)
46057 {
46058 if (!TARGET_AVX)
46059 return false;
46060 /* For 32-byte modes allow even d->one_operand_p.
46061 The lack of cross-lane shuffling in some instructions
46062 might prevent a single insn shuffle. */
46063 dfinal = *d;
46064 dfinal.testing_p = true;
46065 /* If expand_vec_perm_interleave3 can expand this into
46066 a 3 insn sequence, give up and let it be expanded as
46067 3 insn sequence. While that is one insn longer,
46068 it doesn't need a memory operand and in the common
46069 case that both interleave low and high permutations
46070 with the same operands are adjacent needs 4 insns
46071 for both after CSE. */
46072 if (expand_vec_perm_interleave3 (&dfinal))
46073 return false;
46074 }
46075 else
46076 return false;
46077
46078 /* Examine from whence the elements come. */
46079 contents = 0;
46080 for (i = 0; i < nelt; ++i)
46081 contents |= HOST_WIDE_INT_1U << d->perm[i];
46082
46083 memset (remap, 0xff, sizeof (remap));
46084 dremap = *d;
46085
46086 if (GET_MODE_SIZE (d->vmode) == 16)
46087 {
46088 unsigned HOST_WIDE_INT h1, h2, h3, h4;
46089
46090 /* Split the two input vectors into 4 halves. */
46091 h1 = (HOST_WIDE_INT_1U << nelt2) - 1;
46092 h2 = h1 << nelt2;
46093 h3 = h2 << nelt2;
46094 h4 = h3 << nelt2;
46095
46096 /* If the elements from the low halves use interleave low, and similarly
46097 for interleave high. If the elements are from mis-matched halves, we
46098 can use shufps for V4SF/V4SI or do a DImode shuffle. */
46099 if ((contents & (h1 | h3)) == contents)
46100 {
46101 /* punpckl* */
46102 for (i = 0; i < nelt2; ++i)
46103 {
46104 remap[i] = i * 2;
46105 remap[i + nelt] = i * 2 + 1;
46106 dremap.perm[i * 2] = i;
46107 dremap.perm[i * 2 + 1] = i + nelt;
46108 }
46109 if (!TARGET_SSE2 && d->vmode == V4SImode)
46110 dremap.vmode = V4SFmode;
46111 }
46112 else if ((contents & (h2 | h4)) == contents)
46113 {
46114 /* punpckh* */
46115 for (i = 0; i < nelt2; ++i)
46116 {
46117 remap[i + nelt2] = i * 2;
46118 remap[i + nelt + nelt2] = i * 2 + 1;
46119 dremap.perm[i * 2] = i + nelt2;
46120 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
46121 }
46122 if (!TARGET_SSE2 && d->vmode == V4SImode)
46123 dremap.vmode = V4SFmode;
46124 }
46125 else if ((contents & (h1 | h4)) == contents)
46126 {
46127 /* shufps */
46128 for (i = 0; i < nelt2; ++i)
46129 {
46130 remap[i] = i;
46131 remap[i + nelt + nelt2] = i + nelt2;
46132 dremap.perm[i] = i;
46133 dremap.perm[i + nelt2] = i + nelt + nelt2;
46134 }
46135 if (nelt != 4)
46136 {
46137 /* shufpd */
46138 dremap.vmode = V2DImode;
46139 dremap.nelt = 2;
46140 dremap.perm[0] = 0;
46141 dremap.perm[1] = 3;
46142 }
46143 }
46144 else if ((contents & (h2 | h3)) == contents)
46145 {
46146 /* shufps */
46147 for (i = 0; i < nelt2; ++i)
46148 {
46149 remap[i + nelt2] = i;
46150 remap[i + nelt] = i + nelt2;
46151 dremap.perm[i] = i + nelt2;
46152 dremap.perm[i + nelt2] = i + nelt;
46153 }
46154 if (nelt != 4)
46155 {
46156 /* shufpd */
46157 dremap.vmode = V2DImode;
46158 dremap.nelt = 2;
46159 dremap.perm[0] = 1;
46160 dremap.perm[1] = 2;
46161 }
46162 }
46163 else
46164 return false;
46165 }
46166 else
46167 {
46168 unsigned int nelt4 = nelt / 4, nzcnt = 0;
46169 unsigned HOST_WIDE_INT q[8];
46170 unsigned int nonzero_halves[4];
46171
46172 /* Split the two input vectors into 8 quarters. */
46173 q[0] = (HOST_WIDE_INT_1U << nelt4) - 1;
46174 for (i = 1; i < 8; ++i)
46175 q[i] = q[0] << (nelt4 * i);
46176 for (i = 0; i < 4; ++i)
46177 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
46178 {
46179 nonzero_halves[nzcnt] = i;
46180 ++nzcnt;
46181 }
46182
46183 if (nzcnt == 1)
46184 {
46185 gcc_assert (d->one_operand_p);
46186 nonzero_halves[1] = nonzero_halves[0];
46187 same_halves = true;
46188 }
46189 else if (d->one_operand_p)
46190 {
46191 gcc_assert (nonzero_halves[0] == 0);
46192 gcc_assert (nonzero_halves[1] == 1);
46193 }
46194
46195 if (nzcnt <= 2)
46196 {
46197 if (d->perm[0] / nelt2 == nonzero_halves[1])
46198 {
46199 /* Attempt to increase the likelihood that dfinal
46200 shuffle will be intra-lane. */
46201 std::swap (nonzero_halves[0], nonzero_halves[1]);
46202 }
46203
46204 /* vperm2f128 or vperm2i128. */
46205 for (i = 0; i < nelt2; ++i)
46206 {
46207 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
46208 remap[i + nonzero_halves[0] * nelt2] = i;
46209 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
46210 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
46211 }
46212
46213 if (d->vmode != V8SFmode
46214 && d->vmode != V4DFmode
46215 && d->vmode != V8SImode)
46216 {
46217 dremap.vmode = V8SImode;
46218 dremap.nelt = 8;
46219 for (i = 0; i < 4; ++i)
46220 {
46221 dremap.perm[i] = i + nonzero_halves[0] * 4;
46222 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
46223 }
46224 }
46225 }
46226 else if (d->one_operand_p)
46227 return false;
46228 else if (TARGET_AVX2
46229 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
46230 {
46231 /* vpunpckl* */
46232 for (i = 0; i < nelt4; ++i)
46233 {
46234 remap[i] = i * 2;
46235 remap[i + nelt] = i * 2 + 1;
46236 remap[i + nelt2] = i * 2 + nelt2;
46237 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
46238 dremap.perm[i * 2] = i;
46239 dremap.perm[i * 2 + 1] = i + nelt;
46240 dremap.perm[i * 2 + nelt2] = i + nelt2;
46241 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
46242 }
46243 }
46244 else if (TARGET_AVX2
46245 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
46246 {
46247 /* vpunpckh* */
46248 for (i = 0; i < nelt4; ++i)
46249 {
46250 remap[i + nelt4] = i * 2;
46251 remap[i + nelt + nelt4] = i * 2 + 1;
46252 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
46253 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
46254 dremap.perm[i * 2] = i + nelt4;
46255 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
46256 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
46257 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
46258 }
46259 }
46260 else
46261 return false;
46262 }
46263
46264 /* Use the remapping array set up above to move the elements from their
46265 swizzled locations into their final destinations. */
46266 dfinal = *d;
46267 for (i = 0; i < nelt; ++i)
46268 {
46269 unsigned e = remap[d->perm[i]];
46270 gcc_assert (e < nelt);
46271 /* If same_halves is true, both halves of the remapped vector are the
46272 same. Avoid cross-lane accesses if possible. */
46273 if (same_halves && i >= nelt2)
46274 {
46275 gcc_assert (e < nelt2);
46276 dfinal.perm[i] = e + nelt2;
46277 }
46278 else
46279 dfinal.perm[i] = e;
46280 }
46281 if (!d->testing_p)
46282 {
46283 dremap.target = gen_reg_rtx (dremap.vmode);
46284 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
46285 }
46286 dfinal.op1 = dfinal.op0;
46287 dfinal.one_operand_p = true;
46288
46289 /* Test if the final remap can be done with a single insn. For V4SFmode or
46290 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
46291 start_sequence ();
46292 ok = expand_vec_perm_1 (&dfinal);
46293 seq = get_insns ();
46294 end_sequence ();
46295
46296 if (!ok)
46297 return false;
46298
46299 if (d->testing_p)
46300 return true;
46301
46302 if (dremap.vmode != dfinal.vmode)
46303 {
46304 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
46305 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
46306 }
46307
46308 ok = expand_vec_perm_1 (&dremap);
46309 gcc_assert (ok);
46310
46311 emit_insn (seq);
46312 return true;
46313 }
46314
46315 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
46316 a single vector cross-lane permutation into vpermq followed
46317 by any of the single insn permutations. */
46318
46319 static bool
46320 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
46321 {
46322 struct expand_vec_perm_d dremap, dfinal;
46323 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
46324 unsigned contents[2];
46325 bool ok;
46326
46327 if (!(TARGET_AVX2
46328 && (d->vmode == V32QImode || d->vmode == V16HImode)
46329 && d->one_operand_p))
46330 return false;
46331
46332 contents[0] = 0;
46333 contents[1] = 0;
46334 for (i = 0; i < nelt2; ++i)
46335 {
46336 contents[0] |= 1u << (d->perm[i] / nelt4);
46337 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
46338 }
46339
46340 for (i = 0; i < 2; ++i)
46341 {
46342 unsigned int cnt = 0;
46343 for (j = 0; j < 4; ++j)
46344 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
46345 return false;
46346 }
46347
46348 if (d->testing_p)
46349 return true;
46350
46351 dremap = *d;
46352 dremap.vmode = V4DImode;
46353 dremap.nelt = 4;
46354 dremap.target = gen_reg_rtx (V4DImode);
46355 dremap.op0 = gen_lowpart (V4DImode, d->op0);
46356 dremap.op1 = dremap.op0;
46357 dremap.one_operand_p = true;
46358 for (i = 0; i < 2; ++i)
46359 {
46360 unsigned int cnt = 0;
46361 for (j = 0; j < 4; ++j)
46362 if ((contents[i] & (1u << j)) != 0)
46363 dremap.perm[2 * i + cnt++] = j;
46364 for (; cnt < 2; ++cnt)
46365 dremap.perm[2 * i + cnt] = 0;
46366 }
46367
46368 dfinal = *d;
46369 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
46370 dfinal.op1 = dfinal.op0;
46371 dfinal.one_operand_p = true;
46372 for (i = 0, j = 0; i < nelt; ++i)
46373 {
46374 if (i == nelt2)
46375 j = 2;
46376 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
46377 if ((d->perm[i] / nelt4) == dremap.perm[j])
46378 ;
46379 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
46380 dfinal.perm[i] |= nelt4;
46381 else
46382 gcc_unreachable ();
46383 }
46384
46385 ok = expand_vec_perm_1 (&dremap);
46386 gcc_assert (ok);
46387
46388 ok = expand_vec_perm_1 (&dfinal);
46389 gcc_assert (ok);
46390
46391 return true;
46392 }
46393
46394 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
46395 a vector permutation using two instructions, vperm2f128 resp.
46396 vperm2i128 followed by any single in-lane permutation. */
46397
46398 static bool
46399 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
46400 {
46401 struct expand_vec_perm_d dfirst, dsecond;
46402 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
46403 bool ok;
46404
46405 if (!TARGET_AVX
46406 || GET_MODE_SIZE (d->vmode) != 32
46407 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
46408 return false;
46409
46410 dsecond = *d;
46411 dsecond.one_operand_p = false;
46412 dsecond.testing_p = true;
46413
46414 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
46415 immediate. For perm < 16 the second permutation uses
46416 d->op0 as first operand, for perm >= 16 it uses d->op1
46417 as first operand. The second operand is the result of
46418 vperm2[fi]128. */
46419 for (perm = 0; perm < 32; perm++)
46420 {
46421 /* Ignore permutations which do not move anything cross-lane. */
46422 if (perm < 16)
46423 {
46424 /* The second shuffle for e.g. V4DFmode has
46425 0123 and ABCD operands.
46426 Ignore AB23, as 23 is already in the second lane
46427 of the first operand. */
46428 if ((perm & 0xc) == (1 << 2)) continue;
46429 /* And 01CD, as 01 is in the first lane of the first
46430 operand. */
46431 if ((perm & 3) == 0) continue;
46432 /* And 4567, as then the vperm2[fi]128 doesn't change
46433 anything on the original 4567 second operand. */
46434 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
46435 }
46436 else
46437 {
46438 /* The second shuffle for e.g. V4DFmode has
46439 4567 and ABCD operands.
46440 Ignore AB67, as 67 is already in the second lane
46441 of the first operand. */
46442 if ((perm & 0xc) == (3 << 2)) continue;
46443 /* And 45CD, as 45 is in the first lane of the first
46444 operand. */
46445 if ((perm & 3) == 2) continue;
46446 /* And 0123, as then the vperm2[fi]128 doesn't change
46447 anything on the original 0123 first operand. */
46448 if ((perm & 0xf) == (1 << 2)) continue;
46449 }
46450
46451 for (i = 0; i < nelt; i++)
46452 {
46453 j = d->perm[i] / nelt2;
46454 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
46455 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
46456 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
46457 dsecond.perm[i] = d->perm[i] & (nelt - 1);
46458 else
46459 break;
46460 }
46461
46462 if (i == nelt)
46463 {
46464 start_sequence ();
46465 ok = expand_vec_perm_1 (&dsecond);
46466 end_sequence ();
46467 }
46468 else
46469 ok = false;
46470
46471 if (ok)
46472 {
46473 if (d->testing_p)
46474 return true;
46475
46476 /* Found a usable second shuffle. dfirst will be
46477 vperm2f128 on d->op0 and d->op1. */
46478 dsecond.testing_p = false;
46479 dfirst = *d;
46480 dfirst.target = gen_reg_rtx (d->vmode);
46481 for (i = 0; i < nelt; i++)
46482 dfirst.perm[i] = (i & (nelt2 - 1))
46483 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
46484
46485 canonicalize_perm (&dfirst);
46486 ok = expand_vec_perm_1 (&dfirst);
46487 gcc_assert (ok);
46488
46489 /* And dsecond is some single insn shuffle, taking
46490 d->op0 and result of vperm2f128 (if perm < 16) or
46491 d->op1 and result of vperm2f128 (otherwise). */
46492 if (perm >= 16)
46493 dsecond.op0 = dsecond.op1;
46494 dsecond.op1 = dfirst.target;
46495
46496 ok = expand_vec_perm_1 (&dsecond);
46497 gcc_assert (ok);
46498
46499 return true;
46500 }
46501
46502 /* For one operand, the only useful vperm2f128 permutation is 0x01
46503 aka lanes swap. */
46504 if (d->one_operand_p)
46505 return false;
46506 }
46507
46508 return false;
46509 }
46510
46511 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
46512 a two vector permutation using 2 intra-lane interleave insns
46513 and cross-lane shuffle for 32-byte vectors. */
46514
46515 static bool
46516 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
46517 {
46518 unsigned i, nelt;
46519 rtx (*gen) (rtx, rtx, rtx);
46520
46521 if (d->one_operand_p)
46522 return false;
46523 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
46524 ;
46525 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
46526 ;
46527 else
46528 return false;
46529
46530 nelt = d->nelt;
46531 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
46532 return false;
46533 for (i = 0; i < nelt; i += 2)
46534 if (d->perm[i] != d->perm[0] + i / 2
46535 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
46536 return false;
46537
46538 if (d->testing_p)
46539 return true;
46540
46541 switch (d->vmode)
46542 {
46543 case E_V32QImode:
46544 if (d->perm[0])
46545 gen = gen_vec_interleave_highv32qi;
46546 else
46547 gen = gen_vec_interleave_lowv32qi;
46548 break;
46549 case E_V16HImode:
46550 if (d->perm[0])
46551 gen = gen_vec_interleave_highv16hi;
46552 else
46553 gen = gen_vec_interleave_lowv16hi;
46554 break;
46555 case E_V8SImode:
46556 if (d->perm[0])
46557 gen = gen_vec_interleave_highv8si;
46558 else
46559 gen = gen_vec_interleave_lowv8si;
46560 break;
46561 case E_V4DImode:
46562 if (d->perm[0])
46563 gen = gen_vec_interleave_highv4di;
46564 else
46565 gen = gen_vec_interleave_lowv4di;
46566 break;
46567 case E_V8SFmode:
46568 if (d->perm[0])
46569 gen = gen_vec_interleave_highv8sf;
46570 else
46571 gen = gen_vec_interleave_lowv8sf;
46572 break;
46573 case E_V4DFmode:
46574 if (d->perm[0])
46575 gen = gen_vec_interleave_highv4df;
46576 else
46577 gen = gen_vec_interleave_lowv4df;
46578 break;
46579 default:
46580 gcc_unreachable ();
46581 }
46582
46583 emit_insn (gen (d->target, d->op0, d->op1));
46584 return true;
46585 }
46586
46587 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
46588 a single vector permutation using a single intra-lane vector
46589 permutation, vperm2f128 swapping the lanes and vblend* insn blending
46590 the non-swapped and swapped vectors together. */
46591
46592 static bool
46593 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
46594 {
46595 struct expand_vec_perm_d dfirst, dsecond;
46596 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
46597 rtx_insn *seq;
46598 bool ok;
46599 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
46600
46601 if (!TARGET_AVX
46602 || TARGET_AVX2
46603 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
46604 || !d->one_operand_p)
46605 return false;
46606
46607 dfirst = *d;
46608 for (i = 0; i < nelt; i++)
46609 dfirst.perm[i] = 0xff;
46610 for (i = 0, msk = 0; i < nelt; i++)
46611 {
46612 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
46613 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
46614 return false;
46615 dfirst.perm[j] = d->perm[i];
46616 if (j != i)
46617 msk |= (1 << i);
46618 }
46619 for (i = 0; i < nelt; i++)
46620 if (dfirst.perm[i] == 0xff)
46621 dfirst.perm[i] = i;
46622
46623 if (!d->testing_p)
46624 dfirst.target = gen_reg_rtx (dfirst.vmode);
46625
46626 start_sequence ();
46627 ok = expand_vec_perm_1 (&dfirst);
46628 seq = get_insns ();
46629 end_sequence ();
46630
46631 if (!ok)
46632 return false;
46633
46634 if (d->testing_p)
46635 return true;
46636
46637 emit_insn (seq);
46638
46639 dsecond = *d;
46640 dsecond.op0 = dfirst.target;
46641 dsecond.op1 = dfirst.target;
46642 dsecond.one_operand_p = true;
46643 dsecond.target = gen_reg_rtx (dsecond.vmode);
46644 for (i = 0; i < nelt; i++)
46645 dsecond.perm[i] = i ^ nelt2;
46646
46647 ok = expand_vec_perm_1 (&dsecond);
46648 gcc_assert (ok);
46649
46650 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
46651 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
46652 return true;
46653 }
46654
46655 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
46656 permutation using two vperm2f128, followed by a vshufpd insn blending
46657 the two vectors together. */
46658
46659 static bool
46660 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
46661 {
46662 struct expand_vec_perm_d dfirst, dsecond, dthird;
46663 bool ok;
46664
46665 if (!TARGET_AVX || (d->vmode != V4DFmode))
46666 return false;
46667
46668 if (d->testing_p)
46669 return true;
46670
46671 dfirst = *d;
46672 dsecond = *d;
46673 dthird = *d;
46674
46675 dfirst.perm[0] = (d->perm[0] & ~1);
46676 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
46677 dfirst.perm[2] = (d->perm[2] & ~1);
46678 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
46679 dsecond.perm[0] = (d->perm[1] & ~1);
46680 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
46681 dsecond.perm[2] = (d->perm[3] & ~1);
46682 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
46683 dthird.perm[0] = (d->perm[0] % 2);
46684 dthird.perm[1] = (d->perm[1] % 2) + 4;
46685 dthird.perm[2] = (d->perm[2] % 2) + 2;
46686 dthird.perm[3] = (d->perm[3] % 2) + 6;
46687
46688 dfirst.target = gen_reg_rtx (dfirst.vmode);
46689 dsecond.target = gen_reg_rtx (dsecond.vmode);
46690 dthird.op0 = dfirst.target;
46691 dthird.op1 = dsecond.target;
46692 dthird.one_operand_p = false;
46693
46694 canonicalize_perm (&dfirst);
46695 canonicalize_perm (&dsecond);
46696
46697 ok = expand_vec_perm_1 (&dfirst)
46698 && expand_vec_perm_1 (&dsecond)
46699 && expand_vec_perm_1 (&dthird);
46700
46701 gcc_assert (ok);
46702
46703 return true;
46704 }
46705
46706 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
46707 permutation with two pshufb insns and an ior. We should have already
46708 failed all two instruction sequences. */
46709
46710 static bool
46711 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
46712 {
46713 rtx rperm[2][16], vperm, l, h, op, m128;
46714 unsigned int i, nelt, eltsz;
46715
46716 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
46717 return false;
46718 gcc_assert (!d->one_operand_p);
46719
46720 if (d->testing_p)
46721 return true;
46722
46723 nelt = d->nelt;
46724 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
46725
46726 /* Generate two permutation masks. If the required element is within
46727 the given vector it is shuffled into the proper lane. If the required
46728 element is in the other vector, force a zero into the lane by setting
46729 bit 7 in the permutation mask. */
46730 m128 = GEN_INT (-128);
46731 for (i = 0; i < nelt; ++i)
46732 {
46733 unsigned j, e = d->perm[i];
46734 unsigned which = (e >= nelt);
46735 if (e >= nelt)
46736 e -= nelt;
46737
46738 for (j = 0; j < eltsz; ++j)
46739 {
46740 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
46741 rperm[1-which][i*eltsz + j] = m128;
46742 }
46743 }
46744
46745 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
46746 vperm = force_reg (V16QImode, vperm);
46747
46748 l = gen_reg_rtx (V16QImode);
46749 op = gen_lowpart (V16QImode, d->op0);
46750 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
46751
46752 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
46753 vperm = force_reg (V16QImode, vperm);
46754
46755 h = gen_reg_rtx (V16QImode);
46756 op = gen_lowpart (V16QImode, d->op1);
46757 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
46758
46759 op = d->target;
46760 if (d->vmode != V16QImode)
46761 op = gen_reg_rtx (V16QImode);
46762 emit_insn (gen_iorv16qi3 (op, l, h));
46763 if (op != d->target)
46764 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
46765
46766 return true;
46767 }
46768
46769 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
46770 with two vpshufb insns, vpermq and vpor. We should have already failed
46771 all two or three instruction sequences. */
46772
46773 static bool
46774 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
46775 {
46776 rtx rperm[2][32], vperm, l, h, hp, op, m128;
46777 unsigned int i, nelt, eltsz;
46778
46779 if (!TARGET_AVX2
46780 || !d->one_operand_p
46781 || (d->vmode != V32QImode && d->vmode != V16HImode))
46782 return false;
46783
46784 if (d->testing_p)
46785 return true;
46786
46787 nelt = d->nelt;
46788 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
46789
46790 /* Generate two permutation masks. If the required element is within
46791 the same lane, it is shuffled in. If the required element from the
46792 other lane, force a zero by setting bit 7 in the permutation mask.
46793 In the other mask the mask has non-negative elements if element
46794 is requested from the other lane, but also moved to the other lane,
46795 so that the result of vpshufb can have the two V2TImode halves
46796 swapped. */
46797 m128 = GEN_INT (-128);
46798 for (i = 0; i < nelt; ++i)
46799 {
46800 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
46801 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
46802
46803 for (j = 0; j < eltsz; ++j)
46804 {
46805 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
46806 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
46807 }
46808 }
46809
46810 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
46811 vperm = force_reg (V32QImode, vperm);
46812
46813 h = gen_reg_rtx (V32QImode);
46814 op = gen_lowpart (V32QImode, d->op0);
46815 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
46816
46817 /* Swap the 128-byte lanes of h into hp. */
46818 hp = gen_reg_rtx (V4DImode);
46819 op = gen_lowpart (V4DImode, h);
46820 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
46821 const1_rtx));
46822
46823 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
46824 vperm = force_reg (V32QImode, vperm);
46825
46826 l = gen_reg_rtx (V32QImode);
46827 op = gen_lowpart (V32QImode, d->op0);
46828 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
46829
46830 op = d->target;
46831 if (d->vmode != V32QImode)
46832 op = gen_reg_rtx (V32QImode);
46833 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
46834 if (op != d->target)
46835 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
46836
46837 return true;
46838 }
46839
46840 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
46841 and extract-odd permutations of two V32QImode and V16QImode operand
46842 with two vpshufb insns, vpor and vpermq. We should have already
46843 failed all two or three instruction sequences. */
46844
46845 static bool
46846 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
46847 {
46848 rtx rperm[2][32], vperm, l, h, ior, op, m128;
46849 unsigned int i, nelt, eltsz;
46850
46851 if (!TARGET_AVX2
46852 || d->one_operand_p
46853 || (d->vmode != V32QImode && d->vmode != V16HImode))
46854 return false;
46855
46856 for (i = 0; i < d->nelt; ++i)
46857 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
46858 return false;
46859
46860 if (d->testing_p)
46861 return true;
46862
46863 nelt = d->nelt;
46864 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
46865
46866 /* Generate two permutation masks. In the first permutation mask
46867 the first quarter will contain indexes for the first half
46868 of the op0, the second quarter will contain bit 7 set, third quarter
46869 will contain indexes for the second half of the op0 and the
46870 last quarter bit 7 set. In the second permutation mask
46871 the first quarter will contain bit 7 set, the second quarter
46872 indexes for the first half of the op1, the third quarter bit 7 set
46873 and last quarter indexes for the second half of the op1.
46874 I.e. the first mask e.g. for V32QImode extract even will be:
46875 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
46876 (all values masked with 0xf except for -128) and second mask
46877 for extract even will be
46878 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
46879 m128 = GEN_INT (-128);
46880 for (i = 0; i < nelt; ++i)
46881 {
46882 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
46883 unsigned which = d->perm[i] >= nelt;
46884 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
46885
46886 for (j = 0; j < eltsz; ++j)
46887 {
46888 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
46889 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
46890 }
46891 }
46892
46893 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
46894 vperm = force_reg (V32QImode, vperm);
46895
46896 l = gen_reg_rtx (V32QImode);
46897 op = gen_lowpart (V32QImode, d->op0);
46898 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
46899
46900 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
46901 vperm = force_reg (V32QImode, vperm);
46902
46903 h = gen_reg_rtx (V32QImode);
46904 op = gen_lowpart (V32QImode, d->op1);
46905 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
46906
46907 ior = gen_reg_rtx (V32QImode);
46908 emit_insn (gen_iorv32qi3 (ior, l, h));
46909
46910 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
46911 op = gen_reg_rtx (V4DImode);
46912 ior = gen_lowpart (V4DImode, ior);
46913 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
46914 const1_rtx, GEN_INT (3)));
46915 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
46916
46917 return true;
46918 }
46919
46920 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
46921 and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
46922 with two "and" and "pack" or two "shift" and "pack" insns. We should
46923 have already failed all two instruction sequences. */
46924
46925 static bool
46926 expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
46927 {
46928 rtx op, dop0, dop1, t;
46929 unsigned i, odd, c, s, nelt = d->nelt;
46930 bool end_perm = false;
46931 machine_mode half_mode;
46932 rtx (*gen_and) (rtx, rtx, rtx);
46933 rtx (*gen_pack) (rtx, rtx, rtx);
46934 rtx (*gen_shift) (rtx, rtx, rtx);
46935
46936 if (d->one_operand_p)
46937 return false;
46938
46939 switch (d->vmode)
46940 {
46941 case E_V8HImode:
46942 /* Required for "pack". */
46943 if (!TARGET_SSE4_1)
46944 return false;
46945 c = 0xffff;
46946 s = 16;
46947 half_mode = V4SImode;
46948 gen_and = gen_andv4si3;
46949 gen_pack = gen_sse4_1_packusdw;
46950 gen_shift = gen_lshrv4si3;
46951 break;
46952 case E_V16QImode:
46953 /* No check as all instructions are SSE2. */
46954 c = 0xff;
46955 s = 8;
46956 half_mode = V8HImode;
46957 gen_and = gen_andv8hi3;
46958 gen_pack = gen_sse2_packuswb;
46959 gen_shift = gen_lshrv8hi3;
46960 break;
46961 case E_V16HImode:
46962 if (!TARGET_AVX2)
46963 return false;
46964 c = 0xffff;
46965 s = 16;
46966 half_mode = V8SImode;
46967 gen_and = gen_andv8si3;
46968 gen_pack = gen_avx2_packusdw;
46969 gen_shift = gen_lshrv8si3;
46970 end_perm = true;
46971 break;
46972 case E_V32QImode:
46973 if (!TARGET_AVX2)
46974 return false;
46975 c = 0xff;
46976 s = 8;
46977 half_mode = V16HImode;
46978 gen_and = gen_andv16hi3;
46979 gen_pack = gen_avx2_packuswb;
46980 gen_shift = gen_lshrv16hi3;
46981 end_perm = true;
46982 break;
46983 default:
46984 /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
46985 general shuffles. */
46986 return false;
46987 }
46988
46989 /* Check that permutation is even or odd. */
46990 odd = d->perm[0];
46991 if (odd > 1)
46992 return false;
46993
46994 for (i = 1; i < nelt; ++i)
46995 if (d->perm[i] != 2 * i + odd)
46996 return false;
46997
46998 if (d->testing_p)
46999 return true;
47000
47001 dop0 = gen_reg_rtx (half_mode);
47002 dop1 = gen_reg_rtx (half_mode);
47003 if (odd == 0)
47004 {
47005 t = gen_const_vec_duplicate (half_mode, GEN_INT (c));
47006 t = force_reg (half_mode, t);
47007 emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0)));
47008 emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1)));
47009 }
47010 else
47011 {
47012 emit_insn (gen_shift (dop0,
47013 gen_lowpart (half_mode, d->op0),
47014 GEN_INT (s)));
47015 emit_insn (gen_shift (dop1,
47016 gen_lowpart (half_mode, d->op1),
47017 GEN_INT (s)));
47018 }
47019 /* In AVX2 for 256 bit case we need to permute pack result. */
47020 if (TARGET_AVX2 && end_perm)
47021 {
47022 op = gen_reg_rtx (d->vmode);
47023 t = gen_reg_rtx (V4DImode);
47024 emit_insn (gen_pack (op, dop0, dop1));
47025 emit_insn (gen_avx2_permv4di_1 (t,
47026 gen_lowpart (V4DImode, op),
47027 const0_rtx,
47028 const2_rtx,
47029 const1_rtx,
47030 GEN_INT (3)));
47031 emit_move_insn (d->target, gen_lowpart (d->vmode, t));
47032 }
47033 else
47034 emit_insn (gen_pack (d->target, dop0, dop1));
47035
47036 return true;
47037 }
47038
47039 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
47040 and extract-odd permutations of two V64QI operands
47041 with two "shifts", two "truncs" and one "concat" insns for "odd"
47042 and two "truncs" and one concat insn for "even."
47043 Have already failed all two instruction sequences. */
47044
47045 static bool
47046 expand_vec_perm_even_odd_trunc (struct expand_vec_perm_d *d)
47047 {
47048 rtx t1, t2, t3, t4;
47049 unsigned i, odd, nelt = d->nelt;
47050
47051 if (!TARGET_AVX512BW
47052 || d->one_operand_p
47053 || d->vmode != V64QImode)
47054 return false;
47055
47056 /* Check that permutation is even or odd. */
47057 odd = d->perm[0];
47058 if (odd > 1)
47059 return false;
47060
47061 for (i = 1; i < nelt; ++i)
47062 if (d->perm[i] != 2 * i + odd)
47063 return false;
47064
47065 if (d->testing_p)
47066 return true;
47067
47068
47069 if (odd)
47070 {
47071 t1 = gen_reg_rtx (V32HImode);
47072 t2 = gen_reg_rtx (V32HImode);
47073 emit_insn (gen_lshrv32hi3 (t1,
47074 gen_lowpart (V32HImode, d->op0),
47075 GEN_INT (8)));
47076 emit_insn (gen_lshrv32hi3 (t2,
47077 gen_lowpart (V32HImode, d->op1),
47078 GEN_INT (8)));
47079 }
47080 else
47081 {
47082 t1 = gen_lowpart (V32HImode, d->op0);
47083 t2 = gen_lowpart (V32HImode, d->op1);
47084 }
47085
47086 t3 = gen_reg_rtx (V32QImode);
47087 t4 = gen_reg_rtx (V32QImode);
47088 emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t3, t1));
47089 emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t4, t2));
47090 emit_insn (gen_avx_vec_concatv64qi (d->target, t3, t4));
47091
47092 return true;
47093 }
47094
47095 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
47096 and extract-odd permutations. */
47097
47098 static bool
47099 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
47100 {
47101 rtx t1, t2, t3, t4, t5;
47102
47103 switch (d->vmode)
47104 {
47105 case E_V4DFmode:
47106 if (d->testing_p)
47107 break;
47108 t1 = gen_reg_rtx (V4DFmode);
47109 t2 = gen_reg_rtx (V4DFmode);
47110
47111 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
47112 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
47113 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
47114
47115 /* Now an unpck[lh]pd will produce the result required. */
47116 if (odd)
47117 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
47118 else
47119 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
47120 emit_insn (t3);
47121 break;
47122
47123 case E_V8SFmode:
47124 {
47125 int mask = odd ? 0xdd : 0x88;
47126
47127 if (d->testing_p)
47128 break;
47129 t1 = gen_reg_rtx (V8SFmode);
47130 t2 = gen_reg_rtx (V8SFmode);
47131 t3 = gen_reg_rtx (V8SFmode);
47132
47133 /* Shuffle within the 128-bit lanes to produce:
47134 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
47135 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
47136 GEN_INT (mask)));
47137
47138 /* Shuffle the lanes around to produce:
47139 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
47140 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
47141 GEN_INT (0x3)));
47142
47143 /* Shuffle within the 128-bit lanes to produce:
47144 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
47145 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
47146
47147 /* Shuffle within the 128-bit lanes to produce:
47148 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
47149 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
47150
47151 /* Shuffle the lanes around to produce:
47152 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
47153 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
47154 GEN_INT (0x20)));
47155 }
47156 break;
47157
47158 case E_V2DFmode:
47159 case E_V4SFmode:
47160 case E_V2DImode:
47161 case E_V4SImode:
47162 /* These are always directly implementable by expand_vec_perm_1. */
47163 gcc_unreachable ();
47164
47165 case E_V8HImode:
47166 if (TARGET_SSE4_1)
47167 return expand_vec_perm_even_odd_pack (d);
47168 else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
47169 return expand_vec_perm_pshufb2 (d);
47170 else
47171 {
47172 if (d->testing_p)
47173 break;
47174 /* We need 2*log2(N)-1 operations to achieve odd/even
47175 with interleave. */
47176 t1 = gen_reg_rtx (V8HImode);
47177 t2 = gen_reg_rtx (V8HImode);
47178 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
47179 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
47180 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
47181 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
47182 if (odd)
47183 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
47184 else
47185 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
47186 emit_insn (t3);
47187 }
47188 break;
47189
47190 case E_V16QImode:
47191 return expand_vec_perm_even_odd_pack (d);
47192
47193 case E_V16HImode:
47194 case E_V32QImode:
47195 return expand_vec_perm_even_odd_pack (d);
47196
47197 case E_V64QImode:
47198 return expand_vec_perm_even_odd_trunc (d);
47199
47200 case E_V4DImode:
47201 if (!TARGET_AVX2)
47202 {
47203 struct expand_vec_perm_d d_copy = *d;
47204 d_copy.vmode = V4DFmode;
47205 if (d->testing_p)
47206 d_copy.target = gen_raw_REG (V4DFmode, LAST_VIRTUAL_REGISTER + 1);
47207 else
47208 d_copy.target = gen_reg_rtx (V4DFmode);
47209 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
47210 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
47211 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
47212 {
47213 if (!d->testing_p)
47214 emit_move_insn (d->target,
47215 gen_lowpart (V4DImode, d_copy.target));
47216 return true;
47217 }
47218 return false;
47219 }
47220
47221 if (d->testing_p)
47222 break;
47223
47224 t1 = gen_reg_rtx (V4DImode);
47225 t2 = gen_reg_rtx (V4DImode);
47226
47227 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
47228 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
47229 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
47230
47231 /* Now an vpunpck[lh]qdq will produce the result required. */
47232 if (odd)
47233 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
47234 else
47235 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
47236 emit_insn (t3);
47237 break;
47238
47239 case E_V8SImode:
47240 if (!TARGET_AVX2)
47241 {
47242 struct expand_vec_perm_d d_copy = *d;
47243 d_copy.vmode = V8SFmode;
47244 if (d->testing_p)
47245 d_copy.target = gen_raw_REG (V8SFmode, LAST_VIRTUAL_REGISTER + 1);
47246 else
47247 d_copy.target = gen_reg_rtx (V8SFmode);
47248 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
47249 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
47250 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
47251 {
47252 if (!d->testing_p)
47253 emit_move_insn (d->target,
47254 gen_lowpart (V8SImode, d_copy.target));
47255 return true;
47256 }
47257 return false;
47258 }
47259
47260 if (d->testing_p)
47261 break;
47262
47263 t1 = gen_reg_rtx (V8SImode);
47264 t2 = gen_reg_rtx (V8SImode);
47265 t3 = gen_reg_rtx (V4DImode);
47266 t4 = gen_reg_rtx (V4DImode);
47267 t5 = gen_reg_rtx (V4DImode);
47268
47269 /* Shuffle the lanes around into
47270 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
47271 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
47272 gen_lowpart (V4DImode, d->op1),
47273 GEN_INT (0x20)));
47274 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
47275 gen_lowpart (V4DImode, d->op1),
47276 GEN_INT (0x31)));
47277
47278 /* Swap the 2nd and 3rd position in each lane into
47279 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
47280 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
47281 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
47282 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
47283 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
47284
47285 /* Now an vpunpck[lh]qdq will produce
47286 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
47287 if (odd)
47288 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
47289 gen_lowpart (V4DImode, t2));
47290 else
47291 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
47292 gen_lowpart (V4DImode, t2));
47293 emit_insn (t3);
47294 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
47295 break;
47296
47297 default:
47298 gcc_unreachable ();
47299 }
47300
47301 return true;
47302 }
47303
47304 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
47305 extract-even and extract-odd permutations. */
47306
47307 static bool
47308 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
47309 {
47310 unsigned i, odd, nelt = d->nelt;
47311
47312 odd = d->perm[0];
47313 if (odd != 0 && odd != 1)
47314 return false;
47315
47316 for (i = 1; i < nelt; ++i)
47317 if (d->perm[i] != 2 * i + odd)
47318 return false;
47319
47320 return expand_vec_perm_even_odd_1 (d, odd);
47321 }
47322
47323 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
47324 permutations. We assume that expand_vec_perm_1 has already failed. */
47325
47326 static bool
47327 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
47328 {
47329 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
47330 machine_mode vmode = d->vmode;
47331 unsigned char perm2[4];
47332 rtx op0 = d->op0, dest;
47333 bool ok;
47334
47335 switch (vmode)
47336 {
47337 case E_V4DFmode:
47338 case E_V8SFmode:
47339 /* These are special-cased in sse.md so that we can optionally
47340 use the vbroadcast instruction. They expand to two insns
47341 if the input happens to be in a register. */
47342 gcc_unreachable ();
47343
47344 case E_V2DFmode:
47345 case E_V2DImode:
47346 case E_V4SFmode:
47347 case E_V4SImode:
47348 /* These are always implementable using standard shuffle patterns. */
47349 gcc_unreachable ();
47350
47351 case E_V8HImode:
47352 case E_V16QImode:
47353 /* These can be implemented via interleave. We save one insn by
47354 stopping once we have promoted to V4SImode and then use pshufd. */
47355 if (d->testing_p)
47356 return true;
47357 do
47358 {
47359 rtx dest;
47360 rtx (*gen) (rtx, rtx, rtx)
47361 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
47362 : gen_vec_interleave_lowv8hi;
47363
47364 if (elt >= nelt2)
47365 {
47366 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
47367 : gen_vec_interleave_highv8hi;
47368 elt -= nelt2;
47369 }
47370 nelt2 /= 2;
47371
47372 dest = gen_reg_rtx (vmode);
47373 emit_insn (gen (dest, op0, op0));
47374 vmode = get_mode_wider_vector (vmode);
47375 op0 = gen_lowpart (vmode, dest);
47376 }
47377 while (vmode != V4SImode);
47378
47379 memset (perm2, elt, 4);
47380 dest = gen_reg_rtx (V4SImode);
47381 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
47382 gcc_assert (ok);
47383 if (!d->testing_p)
47384 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
47385 return true;
47386
47387 case E_V64QImode:
47388 case E_V32QImode:
47389 case E_V16HImode:
47390 case E_V8SImode:
47391 case E_V4DImode:
47392 /* For AVX2 broadcasts of the first element vpbroadcast* or
47393 vpermq should be used by expand_vec_perm_1. */
47394 gcc_assert (!TARGET_AVX2 || d->perm[0]);
47395 return false;
47396
47397 default:
47398 gcc_unreachable ();
47399 }
47400 }
47401
47402 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
47403 broadcast permutations. */
47404
47405 static bool
47406 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
47407 {
47408 unsigned i, elt, nelt = d->nelt;
47409
47410 if (!d->one_operand_p)
47411 return false;
47412
47413 elt = d->perm[0];
47414 for (i = 1; i < nelt; ++i)
47415 if (d->perm[i] != elt)
47416 return false;
47417
47418 return expand_vec_perm_broadcast_1 (d);
47419 }
47420
47421 /* Implement arbitrary permutations of two V64QImode operands
47422 with 2 vperm[it]2w, 2 vpshufb and one vpor instruction. */
47423 static bool
47424 expand_vec_perm_vpermt2_vpshub2 (struct expand_vec_perm_d *d)
47425 {
47426 if (!TARGET_AVX512BW || !(d->vmode == V64QImode))
47427 return false;
47428
47429 if (d->testing_p)
47430 return true;
47431
47432 struct expand_vec_perm_d ds[2];
47433 rtx rperm[128], vperm, target0, target1;
47434 unsigned int i, nelt;
47435 machine_mode vmode;
47436
47437 nelt = d->nelt;
47438 vmode = V64QImode;
47439
47440 for (i = 0; i < 2; i++)
47441 {
47442 ds[i] = *d;
47443 ds[i].vmode = V32HImode;
47444 ds[i].nelt = 32;
47445 ds[i].target = gen_reg_rtx (V32HImode);
47446 ds[i].op0 = gen_lowpart (V32HImode, d->op0);
47447 ds[i].op1 = gen_lowpart (V32HImode, d->op1);
47448 }
47449
47450 /* Prepare permutations such that the first one takes care of
47451 putting the even bytes into the right positions or one higher
47452 positions (ds[0]) and the second one takes care of
47453 putting the odd bytes into the right positions or one below
47454 (ds[1]). */
47455
47456 for (i = 0; i < nelt; i++)
47457 {
47458 ds[i & 1].perm[i / 2] = d->perm[i] / 2;
47459 if (i & 1)
47460 {
47461 rperm[i] = constm1_rtx;
47462 rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1));
47463 }
47464 else
47465 {
47466 rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1));
47467 rperm[i + 64] = constm1_rtx;
47468 }
47469 }
47470
47471 bool ok = expand_vec_perm_1 (&ds[0]);
47472 gcc_assert (ok);
47473 ds[0].target = gen_lowpart (V64QImode, ds[0].target);
47474
47475 ok = expand_vec_perm_1 (&ds[1]);
47476 gcc_assert (ok);
47477 ds[1].target = gen_lowpart (V64QImode, ds[1].target);
47478
47479 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm));
47480 vperm = force_reg (vmode, vperm);
47481 target0 = gen_reg_rtx (V64QImode);
47482 emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm));
47483
47484 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64));
47485 vperm = force_reg (vmode, vperm);
47486 target1 = gen_reg_rtx (V64QImode);
47487 emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm));
47488
47489 emit_insn (gen_iorv64qi3 (d->target, target0, target1));
47490 return true;
47491 }
47492
47493 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
47494 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
47495 all the shorter instruction sequences. */
47496
47497 static bool
47498 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
47499 {
47500 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
47501 unsigned int i, nelt, eltsz;
47502 bool used[4];
47503
47504 if (!TARGET_AVX2
47505 || d->one_operand_p
47506 || (d->vmode != V32QImode && d->vmode != V16HImode))
47507 return false;
47508
47509 if (d->testing_p)
47510 return true;
47511
47512 nelt = d->nelt;
47513 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
47514
47515 /* Generate 4 permutation masks. If the required element is within
47516 the same lane, it is shuffled in. If the required element from the
47517 other lane, force a zero by setting bit 7 in the permutation mask.
47518 In the other mask the mask has non-negative elements if element
47519 is requested from the other lane, but also moved to the other lane,
47520 so that the result of vpshufb can have the two V2TImode halves
47521 swapped. */
47522 m128 = GEN_INT (-128);
47523 for (i = 0; i < 32; ++i)
47524 {
47525 rperm[0][i] = m128;
47526 rperm[1][i] = m128;
47527 rperm[2][i] = m128;
47528 rperm[3][i] = m128;
47529 }
47530 used[0] = false;
47531 used[1] = false;
47532 used[2] = false;
47533 used[3] = false;
47534 for (i = 0; i < nelt; ++i)
47535 {
47536 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
47537 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
47538 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
47539
47540 for (j = 0; j < eltsz; ++j)
47541 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
47542 used[which] = true;
47543 }
47544
47545 for (i = 0; i < 2; ++i)
47546 {
47547 if (!used[2 * i + 1])
47548 {
47549 h[i] = NULL_RTX;
47550 continue;
47551 }
47552 vperm = gen_rtx_CONST_VECTOR (V32QImode,
47553 gen_rtvec_v (32, rperm[2 * i + 1]));
47554 vperm = force_reg (V32QImode, vperm);
47555 h[i] = gen_reg_rtx (V32QImode);
47556 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
47557 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
47558 }
47559
47560 /* Swap the 128-byte lanes of h[X]. */
47561 for (i = 0; i < 2; ++i)
47562 {
47563 if (h[i] == NULL_RTX)
47564 continue;
47565 op = gen_reg_rtx (V4DImode);
47566 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
47567 const2_rtx, GEN_INT (3), const0_rtx,
47568 const1_rtx));
47569 h[i] = gen_lowpart (V32QImode, op);
47570 }
47571
47572 for (i = 0; i < 2; ++i)
47573 {
47574 if (!used[2 * i])
47575 {
47576 l[i] = NULL_RTX;
47577 continue;
47578 }
47579 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
47580 vperm = force_reg (V32QImode, vperm);
47581 l[i] = gen_reg_rtx (V32QImode);
47582 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
47583 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
47584 }
47585
47586 for (i = 0; i < 2; ++i)
47587 {
47588 if (h[i] && l[i])
47589 {
47590 op = gen_reg_rtx (V32QImode);
47591 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
47592 l[i] = op;
47593 }
47594 else if (h[i])
47595 l[i] = h[i];
47596 }
47597
47598 gcc_assert (l[0] && l[1]);
47599 op = d->target;
47600 if (d->vmode != V32QImode)
47601 op = gen_reg_rtx (V32QImode);
47602 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
47603 if (op != d->target)
47604 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
47605 return true;
47606 }
47607
47608 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
47609 With all of the interface bits taken care of, perform the expansion
47610 in D and return true on success. */
47611
47612 static bool
47613 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
47614 {
47615 /* Try a single instruction expansion. */
47616 if (expand_vec_perm_1 (d))
47617 return true;
47618
47619 /* Try sequences of two instructions. */
47620
47621 if (expand_vec_perm_pshuflw_pshufhw (d))
47622 return true;
47623
47624 if (expand_vec_perm_palignr (d, false))
47625 return true;
47626
47627 if (expand_vec_perm_interleave2 (d))
47628 return true;
47629
47630 if (expand_vec_perm_broadcast (d))
47631 return true;
47632
47633 if (expand_vec_perm_vpermq_perm_1 (d))
47634 return true;
47635
47636 if (expand_vec_perm_vperm2f128 (d))
47637 return true;
47638
47639 if (expand_vec_perm_pblendv (d))
47640 return true;
47641
47642 /* Try sequences of three instructions. */
47643
47644 if (expand_vec_perm_even_odd_pack (d))
47645 return true;
47646
47647 if (expand_vec_perm_2vperm2f128_vshuf (d))
47648 return true;
47649
47650 if (expand_vec_perm_pshufb2 (d))
47651 return true;
47652
47653 if (expand_vec_perm_interleave3 (d))
47654 return true;
47655
47656 if (expand_vec_perm_vperm2f128_vblend (d))
47657 return true;
47658
47659 /* Try sequences of four instructions. */
47660
47661 if (expand_vec_perm_even_odd_trunc (d))
47662 return true;
47663 if (expand_vec_perm_vpshufb2_vpermq (d))
47664 return true;
47665
47666 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
47667 return true;
47668
47669 if (expand_vec_perm_vpermt2_vpshub2 (d))
47670 return true;
47671
47672 /* ??? Look for narrow permutations whose element orderings would
47673 allow the promotion to a wider mode. */
47674
47675 /* ??? Look for sequences of interleave or a wider permute that place
47676 the data into the correct lanes for a half-vector shuffle like
47677 pshuf[lh]w or vpermilps. */
47678
47679 /* ??? Look for sequences of interleave that produce the desired results.
47680 The combinatorics of punpck[lh] get pretty ugly... */
47681
47682 if (expand_vec_perm_even_odd (d))
47683 return true;
47684
47685 /* Even longer sequences. */
47686 if (expand_vec_perm_vpshufb4_vpermq2 (d))
47687 return true;
47688
47689 /* See if we can get the same permutation in different vector integer
47690 mode. */
47691 struct expand_vec_perm_d nd;
47692 if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd))
47693 {
47694 if (!d->testing_p)
47695 emit_move_insn (d->target, gen_lowpart (d->vmode, nd.target));
47696 return true;
47697 }
47698
47699 return false;
47700 }
47701
47702 /* If a permutation only uses one operand, make it clear. Returns true
47703 if the permutation references both operands. */
47704
47705 static bool
47706 canonicalize_perm (struct expand_vec_perm_d *d)
47707 {
47708 int i, which, nelt = d->nelt;
47709
47710 for (i = which = 0; i < nelt; ++i)
47711 which |= (d->perm[i] < nelt ? 1 : 2);
47712
47713 d->one_operand_p = true;
47714 switch (which)
47715 {
47716 default:
47717 gcc_unreachable();
47718
47719 case 3:
47720 if (!rtx_equal_p (d->op0, d->op1))
47721 {
47722 d->one_operand_p = false;
47723 break;
47724 }
47725 /* The elements of PERM do not suggest that only the first operand
47726 is used, but both operands are identical. Allow easier matching
47727 of the permutation by folding the permutation into the single
47728 input vector. */
47729 /* FALLTHRU */
47730
47731 case 2:
47732 for (i = 0; i < nelt; ++i)
47733 d->perm[i] &= nelt - 1;
47734 d->op0 = d->op1;
47735 break;
47736
47737 case 1:
47738 d->op1 = d->op0;
47739 break;
47740 }
47741
47742 return (which == 3);
47743 }
47744
47745 bool
47746 ix86_expand_vec_perm_const (rtx operands[4])
47747 {
47748 struct expand_vec_perm_d d;
47749 unsigned char perm[MAX_VECT_LEN];
47750 int i, nelt;
47751 bool two_args;
47752 rtx sel;
47753
47754 d.target = operands[0];
47755 d.op0 = operands[1];
47756 d.op1 = operands[2];
47757 sel = operands[3];
47758
47759 d.vmode = GET_MODE (d.target);
47760 gcc_assert (VECTOR_MODE_P (d.vmode));
47761 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
47762 d.testing_p = false;
47763
47764 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
47765 gcc_assert (XVECLEN (sel, 0) == nelt);
47766 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
47767
47768 for (i = 0; i < nelt; ++i)
47769 {
47770 rtx e = XVECEXP (sel, 0, i);
47771 int ei = INTVAL (e) & (2 * nelt - 1);
47772 d.perm[i] = ei;
47773 perm[i] = ei;
47774 }
47775
47776 two_args = canonicalize_perm (&d);
47777
47778 if (ix86_expand_vec_perm_const_1 (&d))
47779 return true;
47780
47781 /* If the selector says both arguments are needed, but the operands are the
47782 same, the above tried to expand with one_operand_p and flattened selector.
47783 If that didn't work, retry without one_operand_p; we succeeded with that
47784 during testing. */
47785 if (two_args && d.one_operand_p)
47786 {
47787 d.one_operand_p = false;
47788 memcpy (d.perm, perm, sizeof (perm));
47789 return ix86_expand_vec_perm_const_1 (&d);
47790 }
47791
47792 return false;
47793 }
47794
47795 /* Implement targetm.vectorize.vec_perm_const_ok. */
47796
47797 static bool
47798 ix86_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
47799 {
47800 struct expand_vec_perm_d d;
47801 unsigned int i, nelt, which;
47802 bool ret;
47803
47804 d.vmode = vmode;
47805 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
47806 d.testing_p = true;
47807
47808 /* Given sufficient ISA support we can just return true here
47809 for selected vector modes. */
47810 switch (d.vmode)
47811 {
47812 case E_V16SFmode:
47813 case E_V16SImode:
47814 case E_V8DImode:
47815 case E_V8DFmode:
47816 if (TARGET_AVX512F)
47817 /* All implementable with a single vperm[it]2 insn. */
47818 return true;
47819 break;
47820 case E_V32HImode:
47821 if (TARGET_AVX512BW)
47822 /* All implementable with a single vperm[it]2 insn. */
47823 return true;
47824 break;
47825 case E_V64QImode:
47826 if (TARGET_AVX512BW)
47827 /* Implementable with 2 vperm[it]2, 2 vpshufb and 1 or insn. */
47828 return true;
47829 break;
47830 case E_V8SImode:
47831 case E_V8SFmode:
47832 case E_V4DFmode:
47833 case E_V4DImode:
47834 if (TARGET_AVX512VL)
47835 /* All implementable with a single vperm[it]2 insn. */
47836 return true;
47837 break;
47838 case E_V16HImode:
47839 if (TARGET_AVX2)
47840 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
47841 return true;
47842 break;
47843 case E_V32QImode:
47844 if (TARGET_AVX2)
47845 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
47846 return true;
47847 break;
47848 case E_V4SImode:
47849 case E_V4SFmode:
47850 case E_V8HImode:
47851 case E_V16QImode:
47852 /* All implementable with a single vpperm insn. */
47853 if (TARGET_XOP)
47854 return true;
47855 /* All implementable with 2 pshufb + 1 ior. */
47856 if (TARGET_SSSE3)
47857 return true;
47858 break;
47859 case E_V2DImode:
47860 case E_V2DFmode:
47861 /* All implementable with shufpd or unpck[lh]pd. */
47862 return true;
47863 default:
47864 return false;
47865 }
47866
47867 /* Extract the values from the vector CST into the permutation
47868 array in D. */
47869 for (i = which = 0; i < nelt; ++i)
47870 {
47871 unsigned char e = sel[i];
47872 gcc_assert (e < 2 * nelt);
47873 d.perm[i] = e;
47874 which |= (e < nelt ? 1 : 2);
47875 }
47876
47877 /* For all elements from second vector, fold the elements to first. */
47878 if (which == 2)
47879 for (i = 0; i < nelt; ++i)
47880 d.perm[i] -= nelt;
47881
47882 /* Check whether the mask can be applied to the vector type. */
47883 d.one_operand_p = (which != 3);
47884
47885 /* Implementable with shufps or pshufd. */
47886 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
47887 return true;
47888
47889 /* Otherwise we have to go through the motions and see if we can
47890 figure out how to generate the requested permutation. */
47891 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
47892 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
47893 if (!d.one_operand_p)
47894 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
47895
47896 start_sequence ();
47897 ret = ix86_expand_vec_perm_const_1 (&d);
47898 end_sequence ();
47899
47900 return ret;
47901 }
47902
47903 void
47904 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
47905 {
47906 struct expand_vec_perm_d d;
47907 unsigned i, nelt;
47908
47909 d.target = targ;
47910 d.op0 = op0;
47911 d.op1 = op1;
47912 d.vmode = GET_MODE (targ);
47913 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
47914 d.one_operand_p = false;
47915 d.testing_p = false;
47916
47917 for (i = 0; i < nelt; ++i)
47918 d.perm[i] = i * 2 + odd;
47919
47920 /* We'll either be able to implement the permutation directly... */
47921 if (expand_vec_perm_1 (&d))
47922 return;
47923
47924 /* ... or we use the special-case patterns. */
47925 expand_vec_perm_even_odd_1 (&d, odd);
47926 }
47927
47928 static void
47929 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
47930 {
47931 struct expand_vec_perm_d d;
47932 unsigned i, nelt, base;
47933 bool ok;
47934
47935 d.target = targ;
47936 d.op0 = op0;
47937 d.op1 = op1;
47938 d.vmode = GET_MODE (targ);
47939 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
47940 d.one_operand_p = false;
47941 d.testing_p = false;
47942
47943 base = high_p ? nelt / 2 : 0;
47944 for (i = 0; i < nelt / 2; ++i)
47945 {
47946 d.perm[i * 2] = i + base;
47947 d.perm[i * 2 + 1] = i + base + nelt;
47948 }
47949
47950 /* Note that for AVX this isn't one instruction. */
47951 ok = ix86_expand_vec_perm_const_1 (&d);
47952 gcc_assert (ok);
47953 }
47954
47955
47956 /* Expand a vector operation CODE for a V*QImode in terms of the
47957 same operation on V*HImode. */
47958
47959 void
47960 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
47961 {
47962 machine_mode qimode = GET_MODE (dest);
47963 machine_mode himode;
47964 rtx (*gen_il) (rtx, rtx, rtx);
47965 rtx (*gen_ih) (rtx, rtx, rtx);
47966 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
47967 struct expand_vec_perm_d d;
47968 bool ok, full_interleave;
47969 bool uns_p = false;
47970 int i;
47971
47972 switch (qimode)
47973 {
47974 case E_V16QImode:
47975 himode = V8HImode;
47976 gen_il = gen_vec_interleave_lowv16qi;
47977 gen_ih = gen_vec_interleave_highv16qi;
47978 break;
47979 case E_V32QImode:
47980 himode = V16HImode;
47981 gen_il = gen_avx2_interleave_lowv32qi;
47982 gen_ih = gen_avx2_interleave_highv32qi;
47983 break;
47984 case E_V64QImode:
47985 himode = V32HImode;
47986 gen_il = gen_avx512bw_interleave_lowv64qi;
47987 gen_ih = gen_avx512bw_interleave_highv64qi;
47988 break;
47989 default:
47990 gcc_unreachable ();
47991 }
47992
47993 op2_l = op2_h = op2;
47994 switch (code)
47995 {
47996 case MULT:
47997 /* Unpack data such that we've got a source byte in each low byte of
47998 each word. We don't care what goes into the high byte of each word.
47999 Rather than trying to get zero in there, most convenient is to let
48000 it be a copy of the low byte. */
48001 op2_l = gen_reg_rtx (qimode);
48002 op2_h = gen_reg_rtx (qimode);
48003 emit_insn (gen_il (op2_l, op2, op2));
48004 emit_insn (gen_ih (op2_h, op2, op2));
48005
48006 op1_l = gen_reg_rtx (qimode);
48007 op1_h = gen_reg_rtx (qimode);
48008 emit_insn (gen_il (op1_l, op1, op1));
48009 emit_insn (gen_ih (op1_h, op1, op1));
48010 full_interleave = qimode == V16QImode;
48011 break;
48012
48013 case ASHIFT:
48014 case LSHIFTRT:
48015 uns_p = true;
48016 /* FALLTHRU */
48017 case ASHIFTRT:
48018 op1_l = gen_reg_rtx (himode);
48019 op1_h = gen_reg_rtx (himode);
48020 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
48021 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
48022 full_interleave = true;
48023 break;
48024 default:
48025 gcc_unreachable ();
48026 }
48027
48028 /* Perform the operation. */
48029 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
48030 1, OPTAB_DIRECT);
48031 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
48032 1, OPTAB_DIRECT);
48033 gcc_assert (res_l && res_h);
48034
48035 /* Merge the data back into the right place. */
48036 d.target = dest;
48037 d.op0 = gen_lowpart (qimode, res_l);
48038 d.op1 = gen_lowpart (qimode, res_h);
48039 d.vmode = qimode;
48040 d.nelt = GET_MODE_NUNITS (qimode);
48041 d.one_operand_p = false;
48042 d.testing_p = false;
48043
48044 if (full_interleave)
48045 {
48046 /* For SSE2, we used an full interleave, so the desired
48047 results are in the even elements. */
48048 for (i = 0; i < d.nelt; ++i)
48049 d.perm[i] = i * 2;
48050 }
48051 else
48052 {
48053 /* For AVX, the interleave used above was not cross-lane. So the
48054 extraction is evens but with the second and third quarter swapped.
48055 Happily, that is even one insn shorter than even extraction.
48056 For AVX512BW we have 4 lanes. We extract evens from within a lane,
48057 always first from the first and then from the second source operand,
48058 the index bits above the low 4 bits remains the same.
48059 Thus, for d.nelt == 32 we want permutation
48060 0,2,4,..14, 32,34,36,..46, 16,18,20,..30, 48,50,52,..62
48061 and for d.nelt == 64 we want permutation
48062 0,2,4,..14, 64,66,68,..78, 16,18,20,..30, 80,82,84,..94,
48063 32,34,36,..46, 96,98,100,..110, 48,50,52,..62, 112,114,116,..126. */
48064 for (i = 0; i < d.nelt; ++i)
48065 d.perm[i] = ((i * 2) & 14) + ((i & 8) ? d.nelt : 0) + (i & ~15);
48066 }
48067
48068 ok = ix86_expand_vec_perm_const_1 (&d);
48069 gcc_assert (ok);
48070
48071 set_unique_reg_note (get_last_insn (), REG_EQUAL,
48072 gen_rtx_fmt_ee (code, qimode, op1, op2));
48073 }
48074
48075 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
48076 if op is CONST_VECTOR with all odd elements equal to their
48077 preceding element. */
48078
48079 static bool
48080 const_vector_equal_evenodd_p (rtx op)
48081 {
48082 machine_mode mode = GET_MODE (op);
48083 int i, nunits = GET_MODE_NUNITS (mode);
48084 if (GET_CODE (op) != CONST_VECTOR
48085 || nunits != CONST_VECTOR_NUNITS (op))
48086 return false;
48087 for (i = 0; i < nunits; i += 2)
48088 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
48089 return false;
48090 return true;
48091 }
48092
48093 void
48094 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
48095 bool uns_p, bool odd_p)
48096 {
48097 machine_mode mode = GET_MODE (op1);
48098 machine_mode wmode = GET_MODE (dest);
48099 rtx x;
48100 rtx orig_op1 = op1, orig_op2 = op2;
48101
48102 if (!nonimmediate_operand (op1, mode))
48103 op1 = force_reg (mode, op1);
48104 if (!nonimmediate_operand (op2, mode))
48105 op2 = force_reg (mode, op2);
48106
48107 /* We only play even/odd games with vectors of SImode. */
48108 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
48109
48110 /* If we're looking for the odd results, shift those members down to
48111 the even slots. For some cpus this is faster than a PSHUFD. */
48112 if (odd_p)
48113 {
48114 /* For XOP use vpmacsdqh, but only for smult, as it is only
48115 signed. */
48116 if (TARGET_XOP && mode == V4SImode && !uns_p)
48117 {
48118 x = force_reg (wmode, CONST0_RTX (wmode));
48119 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
48120 return;
48121 }
48122
48123 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
48124 if (!const_vector_equal_evenodd_p (orig_op1))
48125 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
48126 x, NULL, 1, OPTAB_DIRECT);
48127 if (!const_vector_equal_evenodd_p (orig_op2))
48128 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
48129 x, NULL, 1, OPTAB_DIRECT);
48130 op1 = gen_lowpart (mode, op1);
48131 op2 = gen_lowpart (mode, op2);
48132 }
48133
48134 if (mode == V16SImode)
48135 {
48136 if (uns_p)
48137 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
48138 else
48139 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
48140 }
48141 else if (mode == V8SImode)
48142 {
48143 if (uns_p)
48144 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
48145 else
48146 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
48147 }
48148 else if (uns_p)
48149 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
48150 else if (TARGET_SSE4_1)
48151 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
48152 else
48153 {
48154 rtx s1, s2, t0, t1, t2;
48155
48156 /* The easiest way to implement this without PMULDQ is to go through
48157 the motions as if we are performing a full 64-bit multiply. With
48158 the exception that we need to do less shuffling of the elements. */
48159
48160 /* Compute the sign-extension, aka highparts, of the two operands. */
48161 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
48162 op1, pc_rtx, pc_rtx);
48163 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
48164 op2, pc_rtx, pc_rtx);
48165
48166 /* Multiply LO(A) * HI(B), and vice-versa. */
48167 t1 = gen_reg_rtx (wmode);
48168 t2 = gen_reg_rtx (wmode);
48169 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
48170 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
48171
48172 /* Multiply LO(A) * LO(B). */
48173 t0 = gen_reg_rtx (wmode);
48174 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
48175
48176 /* Combine and shift the highparts into place. */
48177 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
48178 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
48179 1, OPTAB_DIRECT);
48180
48181 /* Combine high and low parts. */
48182 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
48183 return;
48184 }
48185 emit_insn (x);
48186 }
48187
48188 void
48189 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
48190 bool uns_p, bool high_p)
48191 {
48192 machine_mode wmode = GET_MODE (dest);
48193 machine_mode mode = GET_MODE (op1);
48194 rtx t1, t2, t3, t4, mask;
48195
48196 switch (mode)
48197 {
48198 case E_V4SImode:
48199 t1 = gen_reg_rtx (mode);
48200 t2 = gen_reg_rtx (mode);
48201 if (TARGET_XOP && !uns_p)
48202 {
48203 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
48204 shuffle the elements once so that all elements are in the right
48205 place for immediate use: { A C B D }. */
48206 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
48207 const1_rtx, GEN_INT (3)));
48208 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
48209 const1_rtx, GEN_INT (3)));
48210 }
48211 else
48212 {
48213 /* Put the elements into place for the multiply. */
48214 ix86_expand_vec_interleave (t1, op1, op1, high_p);
48215 ix86_expand_vec_interleave (t2, op2, op2, high_p);
48216 high_p = false;
48217 }
48218 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
48219 break;
48220
48221 case E_V8SImode:
48222 /* Shuffle the elements between the lanes. After this we
48223 have { A B E F | C D G H } for each operand. */
48224 t1 = gen_reg_rtx (V4DImode);
48225 t2 = gen_reg_rtx (V4DImode);
48226 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
48227 const0_rtx, const2_rtx,
48228 const1_rtx, GEN_INT (3)));
48229 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
48230 const0_rtx, const2_rtx,
48231 const1_rtx, GEN_INT (3)));
48232
48233 /* Shuffle the elements within the lanes. After this we
48234 have { A A B B | C C D D } or { E E F F | G G H H }. */
48235 t3 = gen_reg_rtx (V8SImode);
48236 t4 = gen_reg_rtx (V8SImode);
48237 mask = GEN_INT (high_p
48238 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
48239 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
48240 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
48241 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
48242
48243 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
48244 break;
48245
48246 case E_V8HImode:
48247 case E_V16HImode:
48248 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
48249 uns_p, OPTAB_DIRECT);
48250 t2 = expand_binop (mode,
48251 uns_p ? umul_highpart_optab : smul_highpart_optab,
48252 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
48253 gcc_assert (t1 && t2);
48254
48255 t3 = gen_reg_rtx (mode);
48256 ix86_expand_vec_interleave (t3, t1, t2, high_p);
48257 emit_move_insn (dest, gen_lowpart (wmode, t3));
48258 break;
48259
48260 case E_V16QImode:
48261 case E_V32QImode:
48262 case E_V32HImode:
48263 case E_V16SImode:
48264 case E_V64QImode:
48265 t1 = gen_reg_rtx (wmode);
48266 t2 = gen_reg_rtx (wmode);
48267 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
48268 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
48269
48270 emit_insn (gen_rtx_SET (dest, gen_rtx_MULT (wmode, t1, t2)));
48271 break;
48272
48273 default:
48274 gcc_unreachable ();
48275 }
48276 }
48277
48278 void
48279 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
48280 {
48281 rtx res_1, res_2, res_3, res_4;
48282
48283 res_1 = gen_reg_rtx (V4SImode);
48284 res_2 = gen_reg_rtx (V4SImode);
48285 res_3 = gen_reg_rtx (V2DImode);
48286 res_4 = gen_reg_rtx (V2DImode);
48287 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
48288 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
48289
48290 /* Move the results in element 2 down to element 1; we don't care
48291 what goes in elements 2 and 3. Then we can merge the parts
48292 back together with an interleave.
48293
48294 Note that two other sequences were tried:
48295 (1) Use interleaves at the start instead of psrldq, which allows
48296 us to use a single shufps to merge things back at the end.
48297 (2) Use shufps here to combine the two vectors, then pshufd to
48298 put the elements in the correct order.
48299 In both cases the cost of the reformatting stall was too high
48300 and the overall sequence slower. */
48301
48302 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
48303 const0_rtx, const2_rtx,
48304 const0_rtx, const0_rtx));
48305 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
48306 const0_rtx, const2_rtx,
48307 const0_rtx, const0_rtx));
48308 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
48309
48310 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
48311 }
48312
48313 void
48314 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
48315 {
48316 machine_mode mode = GET_MODE (op0);
48317 rtx t1, t2, t3, t4, t5, t6;
48318
48319 if (TARGET_AVX512DQ && mode == V8DImode)
48320 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
48321 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
48322 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
48323 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
48324 emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
48325 else if (TARGET_XOP && mode == V2DImode)
48326 {
48327 /* op1: A,B,C,D, op2: E,F,G,H */
48328 op1 = gen_lowpart (V4SImode, op1);
48329 op2 = gen_lowpart (V4SImode, op2);
48330
48331 t1 = gen_reg_rtx (V4SImode);
48332 t2 = gen_reg_rtx (V4SImode);
48333 t3 = gen_reg_rtx (V2DImode);
48334 t4 = gen_reg_rtx (V2DImode);
48335
48336 /* t1: B,A,D,C */
48337 emit_insn (gen_sse2_pshufd_1 (t1, op1,
48338 GEN_INT (1),
48339 GEN_INT (0),
48340 GEN_INT (3),
48341 GEN_INT (2)));
48342
48343 /* t2: (B*E),(A*F),(D*G),(C*H) */
48344 emit_insn (gen_mulv4si3 (t2, t1, op2));
48345
48346 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
48347 emit_insn (gen_xop_phadddq (t3, t2));
48348
48349 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
48350 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
48351
48352 /* Multiply lower parts and add all */
48353 t5 = gen_reg_rtx (V2DImode);
48354 emit_insn (gen_vec_widen_umult_even_v4si (t5,
48355 gen_lowpart (V4SImode, op1),
48356 gen_lowpart (V4SImode, op2)));
48357 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
48358
48359 }
48360 else
48361 {
48362 machine_mode nmode;
48363 rtx (*umul) (rtx, rtx, rtx);
48364
48365 if (mode == V2DImode)
48366 {
48367 umul = gen_vec_widen_umult_even_v4si;
48368 nmode = V4SImode;
48369 }
48370 else if (mode == V4DImode)
48371 {
48372 umul = gen_vec_widen_umult_even_v8si;
48373 nmode = V8SImode;
48374 }
48375 else if (mode == V8DImode)
48376 {
48377 umul = gen_vec_widen_umult_even_v16si;
48378 nmode = V16SImode;
48379 }
48380 else
48381 gcc_unreachable ();
48382
48383
48384 /* Multiply low parts. */
48385 t1 = gen_reg_rtx (mode);
48386 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
48387
48388 /* Shift input vectors right 32 bits so we can multiply high parts. */
48389 t6 = GEN_INT (32);
48390 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
48391 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
48392
48393 /* Multiply high parts by low parts. */
48394 t4 = gen_reg_rtx (mode);
48395 t5 = gen_reg_rtx (mode);
48396 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
48397 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
48398
48399 /* Combine and shift the highparts back. */
48400 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
48401 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
48402
48403 /* Combine high and low parts. */
48404 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
48405 }
48406
48407 set_unique_reg_note (get_last_insn (), REG_EQUAL,
48408 gen_rtx_MULT (mode, op1, op2));
48409 }
48410
48411 /* Return 1 if control tansfer instruction INSN
48412 should be encoded with bnd prefix.
48413 If insn is NULL then return 1 when control
48414 transfer instructions should be prefixed with
48415 bnd by default for current function. */
48416
48417 bool
48418 ix86_bnd_prefixed_insn_p (rtx insn)
48419 {
48420 /* For call insns check special flag. */
48421 if (insn && CALL_P (insn))
48422 {
48423 rtx call = get_call_rtx_from (insn);
48424 if (call)
48425 return CALL_EXPR_WITH_BOUNDS_P (call);
48426 }
48427
48428 /* All other insns are prefixed only if function is instrumented. */
48429 return chkp_function_instrumented_p (current_function_decl);
48430 }
48431
48432 /* Return 1 if control tansfer instruction INSN
48433 should be encoded with notrack prefix. */
48434
48435 static bool
48436 ix86_notrack_prefixed_insn_p (rtx insn)
48437 {
48438 if (!insn || !((flag_cf_protection & CF_BRANCH) && TARGET_IBT))
48439 return false;
48440
48441 if (CALL_P (insn))
48442 {
48443 rtx call = get_call_rtx_from (insn);
48444 gcc_assert (call != NULL_RTX);
48445 rtx addr = XEXP (call, 0);
48446
48447 /* Do not emit 'notrack' if it's not an indirect call. */
48448 if (MEM_P (addr)
48449 && GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
48450 return false;
48451 else
48452 return find_reg_note (insn, REG_CALL_NOCF_CHECK, 0);
48453 }
48454
48455 if (JUMP_P (insn) && !flag_cet_switch)
48456 {
48457 rtx target = JUMP_LABEL (insn);
48458 if (target == NULL_RTX || ANY_RETURN_P (target))
48459 return false;
48460
48461 /* Check the jump is a switch table. */
48462 rtx_insn *label = as_a<rtx_insn *> (target);
48463 rtx_insn *table = next_insn (label);
48464 if (table == NULL_RTX || !JUMP_TABLE_DATA_P (table))
48465 return false;
48466 else
48467 return true;
48468 }
48469 return false;
48470 }
48471
48472 /* Calculate integer abs() using only SSE2 instructions. */
48473
48474 void
48475 ix86_expand_sse2_abs (rtx target, rtx input)
48476 {
48477 machine_mode mode = GET_MODE (target);
48478 rtx tmp0, tmp1, x;
48479
48480 switch (mode)
48481 {
48482 /* For 32-bit signed integer X, the best way to calculate the absolute
48483 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
48484 case E_V4SImode:
48485 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
48486 GEN_INT (GET_MODE_UNIT_BITSIZE (mode) - 1),
48487 NULL, 0, OPTAB_DIRECT);
48488 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
48489 NULL, 0, OPTAB_DIRECT);
48490 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
48491 target, 0, OPTAB_DIRECT);
48492 break;
48493
48494 /* For 16-bit signed integer X, the best way to calculate the absolute
48495 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
48496 case E_V8HImode:
48497 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
48498
48499 x = expand_simple_binop (mode, SMAX, tmp0, input,
48500 target, 0, OPTAB_DIRECT);
48501 break;
48502
48503 /* For 8-bit signed integer X, the best way to calculate the absolute
48504 value of X is min ((unsigned char) X, (unsigned char) (-X)),
48505 as SSE2 provides the PMINUB insn. */
48506 case E_V16QImode:
48507 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
48508
48509 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
48510 target, 0, OPTAB_DIRECT);
48511 break;
48512
48513 default:
48514 gcc_unreachable ();
48515 }
48516
48517 if (x != target)
48518 emit_move_insn (target, x);
48519 }
48520
48521 /* Expand an extract from a vector register through pextr insn.
48522 Return true if successful. */
48523
48524 bool
48525 ix86_expand_pextr (rtx *operands)
48526 {
48527 rtx dst = operands[0];
48528 rtx src = operands[1];
48529
48530 unsigned int size = INTVAL (operands[2]);
48531 unsigned int pos = INTVAL (operands[3]);
48532
48533 if (SUBREG_P (dst))
48534 {
48535 /* Reject non-lowpart subregs. */
48536 if (SUBREG_BYTE (dst) > 0)
48537 return false;
48538 dst = SUBREG_REG (dst);
48539 }
48540
48541 if (SUBREG_P (src))
48542 {
48543 pos += SUBREG_BYTE (src) * BITS_PER_UNIT;
48544 src = SUBREG_REG (src);
48545 }
48546
48547 switch (GET_MODE (src))
48548 {
48549 case E_V16QImode:
48550 case E_V8HImode:
48551 case E_V4SImode:
48552 case E_V2DImode:
48553 case E_V1TImode:
48554 case E_TImode:
48555 {
48556 machine_mode srcmode, dstmode;
48557 rtx d, pat;
48558
48559 if (!int_mode_for_size (size, 0).exists (&dstmode))
48560 return false;
48561
48562 switch (dstmode)
48563 {
48564 case E_QImode:
48565 if (!TARGET_SSE4_1)
48566 return false;
48567 srcmode = V16QImode;
48568 break;
48569
48570 case E_HImode:
48571 if (!TARGET_SSE2)
48572 return false;
48573 srcmode = V8HImode;
48574 break;
48575
48576 case E_SImode:
48577 if (!TARGET_SSE4_1)
48578 return false;
48579 srcmode = V4SImode;
48580 break;
48581
48582 case E_DImode:
48583 gcc_assert (TARGET_64BIT);
48584 if (!TARGET_SSE4_1)
48585 return false;
48586 srcmode = V2DImode;
48587 break;
48588
48589 default:
48590 return false;
48591 }
48592
48593 /* Reject extractions from misaligned positions. */
48594 if (pos & (size-1))
48595 return false;
48596
48597 if (GET_MODE (dst) == dstmode)
48598 d = dst;
48599 else
48600 d = gen_reg_rtx (dstmode);
48601
48602 /* Construct insn pattern. */
48603 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (pos / size)));
48604 pat = gen_rtx_VEC_SELECT (dstmode, gen_lowpart (srcmode, src), pat);
48605
48606 /* Let the rtl optimizers know about the zero extension performed. */
48607 if (dstmode == QImode || dstmode == HImode)
48608 {
48609 pat = gen_rtx_ZERO_EXTEND (SImode, pat);
48610 d = gen_lowpart (SImode, d);
48611 }
48612
48613 emit_insn (gen_rtx_SET (d, pat));
48614
48615 if (d != dst)
48616 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
48617 return true;
48618 }
48619
48620 default:
48621 return false;
48622 }
48623 }
48624
48625 /* Expand an insert into a vector register through pinsr insn.
48626 Return true if successful. */
48627
48628 bool
48629 ix86_expand_pinsr (rtx *operands)
48630 {
48631 rtx dst = operands[0];
48632 rtx src = operands[3];
48633
48634 unsigned int size = INTVAL (operands[1]);
48635 unsigned int pos = INTVAL (operands[2]);
48636
48637 if (SUBREG_P (dst))
48638 {
48639 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
48640 dst = SUBREG_REG (dst);
48641 }
48642
48643 switch (GET_MODE (dst))
48644 {
48645 case E_V16QImode:
48646 case E_V8HImode:
48647 case E_V4SImode:
48648 case E_V2DImode:
48649 case E_V1TImode:
48650 case E_TImode:
48651 {
48652 machine_mode srcmode, dstmode;
48653 rtx (*pinsr)(rtx, rtx, rtx, rtx);
48654 rtx d;
48655
48656 if (!int_mode_for_size (size, 0).exists (&srcmode))
48657 return false;
48658
48659 switch (srcmode)
48660 {
48661 case E_QImode:
48662 if (!TARGET_SSE4_1)
48663 return false;
48664 dstmode = V16QImode;
48665 pinsr = gen_sse4_1_pinsrb;
48666 break;
48667
48668 case E_HImode:
48669 if (!TARGET_SSE2)
48670 return false;
48671 dstmode = V8HImode;
48672 pinsr = gen_sse2_pinsrw;
48673 break;
48674
48675 case E_SImode:
48676 if (!TARGET_SSE4_1)
48677 return false;
48678 dstmode = V4SImode;
48679 pinsr = gen_sse4_1_pinsrd;
48680 break;
48681
48682 case E_DImode:
48683 gcc_assert (TARGET_64BIT);
48684 if (!TARGET_SSE4_1)
48685 return false;
48686 dstmode = V2DImode;
48687 pinsr = gen_sse4_1_pinsrq;
48688 break;
48689
48690 default:
48691 return false;
48692 }
48693
48694 /* Reject insertions to misaligned positions. */
48695 if (pos & (size-1))
48696 return false;
48697
48698 if (SUBREG_P (src))
48699 {
48700 unsigned int srcpos = SUBREG_BYTE (src);
48701
48702 if (srcpos > 0)
48703 {
48704 rtx extr_ops[4];
48705
48706 extr_ops[0] = gen_reg_rtx (srcmode);
48707 extr_ops[1] = gen_lowpart (srcmode, SUBREG_REG (src));
48708 extr_ops[2] = GEN_INT (size);
48709 extr_ops[3] = GEN_INT (srcpos * BITS_PER_UNIT);
48710
48711 if (!ix86_expand_pextr (extr_ops))
48712 return false;
48713
48714 src = extr_ops[0];
48715 }
48716 else
48717 src = gen_lowpart (srcmode, SUBREG_REG (src));
48718 }
48719
48720 if (GET_MODE (dst) == dstmode)
48721 d = dst;
48722 else
48723 d = gen_reg_rtx (dstmode);
48724
48725 emit_insn (pinsr (d, gen_lowpart (dstmode, dst),
48726 gen_lowpart (srcmode, src),
48727 GEN_INT (1 << (pos / size))));
48728 if (d != dst)
48729 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
48730 return true;
48731 }
48732
48733 default:
48734 return false;
48735 }
48736 }
48737 \f
48738 /* This function returns the calling abi specific va_list type node.
48739 It returns the FNDECL specific va_list type. */
48740
48741 static tree
48742 ix86_fn_abi_va_list (tree fndecl)
48743 {
48744 if (!TARGET_64BIT)
48745 return va_list_type_node;
48746 gcc_assert (fndecl != NULL_TREE);
48747
48748 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
48749 return ms_va_list_type_node;
48750 else
48751 return sysv_va_list_type_node;
48752 }
48753
48754 /* Returns the canonical va_list type specified by TYPE. If there
48755 is no valid TYPE provided, it return NULL_TREE. */
48756
48757 static tree
48758 ix86_canonical_va_list_type (tree type)
48759 {
48760 if (TARGET_64BIT)
48761 {
48762 if (lookup_attribute ("ms_abi va_list", TYPE_ATTRIBUTES (type)))
48763 return ms_va_list_type_node;
48764
48765 if ((TREE_CODE (type) == ARRAY_TYPE
48766 && integer_zerop (array_type_nelts (type)))
48767 || POINTER_TYPE_P (type))
48768 {
48769 tree elem_type = TREE_TYPE (type);
48770 if (TREE_CODE (elem_type) == RECORD_TYPE
48771 && lookup_attribute ("sysv_abi va_list",
48772 TYPE_ATTRIBUTES (elem_type)))
48773 return sysv_va_list_type_node;
48774 }
48775
48776 return NULL_TREE;
48777 }
48778
48779 return std_canonical_va_list_type (type);
48780 }
48781
48782 /* Iterate through the target-specific builtin types for va_list.
48783 IDX denotes the iterator, *PTREE is set to the result type of
48784 the va_list builtin, and *PNAME to its internal type.
48785 Returns zero if there is no element for this index, otherwise
48786 IDX should be increased upon the next call.
48787 Note, do not iterate a base builtin's name like __builtin_va_list.
48788 Used from c_common_nodes_and_builtins. */
48789
48790 static int
48791 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
48792 {
48793 if (TARGET_64BIT)
48794 {
48795 switch (idx)
48796 {
48797 default:
48798 break;
48799
48800 case 0:
48801 *ptree = ms_va_list_type_node;
48802 *pname = "__builtin_ms_va_list";
48803 return 1;
48804
48805 case 1:
48806 *ptree = sysv_va_list_type_node;
48807 *pname = "__builtin_sysv_va_list";
48808 return 1;
48809 }
48810 }
48811
48812 return 0;
48813 }
48814
48815 #undef TARGET_SCHED_DISPATCH
48816 #define TARGET_SCHED_DISPATCH ix86_bd_has_dispatch
48817 #undef TARGET_SCHED_DISPATCH_DO
48818 #define TARGET_SCHED_DISPATCH_DO ix86_bd_do_dispatch
48819 #undef TARGET_SCHED_REASSOCIATION_WIDTH
48820 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
48821 #undef TARGET_SCHED_REORDER
48822 #define TARGET_SCHED_REORDER ix86_atom_sched_reorder
48823 #undef TARGET_SCHED_ADJUST_PRIORITY
48824 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
48825 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
48826 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
48827 ix86_dependencies_evaluation_hook
48828
48829
48830 /* Implementation of reassociation_width target hook used by
48831 reassoc phase to identify parallelism level in reassociated
48832 tree. Statements tree_code is passed in OPC. Arguments type
48833 is passed in MODE. */
48834
48835 static int
48836 ix86_reassociation_width (unsigned int op, machine_mode mode)
48837 {
48838 int width = 1;
48839 /* Vector part. */
48840 if (VECTOR_MODE_P (mode))
48841 {
48842 int div = 1;
48843 if (INTEGRAL_MODE_P (mode))
48844 width = ix86_cost->reassoc_vec_int;
48845 else if (FLOAT_MODE_P (mode))
48846 width = ix86_cost->reassoc_vec_fp;
48847
48848 if (width == 1)
48849 return 1;
48850
48851 /* Integer vector instructions execute in FP unit
48852 and can execute 3 additions and one multiplication per cycle. */
48853 if (ix86_tune == PROCESSOR_ZNVER1 && INTEGRAL_MODE_P (mode)
48854 && op != PLUS && op != MINUS)
48855 return 1;
48856
48857 /* Account for targets that splits wide vectors into multiple parts. */
48858 if (TARGET_AVX128_OPTIMAL && GET_MODE_BITSIZE (mode) > 128)
48859 div = GET_MODE_BITSIZE (mode) / 128;
48860 else if (TARGET_SSE_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 64)
48861 div = GET_MODE_BITSIZE (mode) / 64;
48862 width = (width + div - 1) / div;
48863 }
48864 /* Scalar part. */
48865 else if (INTEGRAL_MODE_P (mode))
48866 width = ix86_cost->reassoc_int;
48867 else if (FLOAT_MODE_P (mode))
48868 width = ix86_cost->reassoc_fp;
48869
48870 /* Avoid using too many registers in 32bit mode. */
48871 if (!TARGET_64BIT && width > 2)
48872 width = 2;
48873 return width;
48874 }
48875
48876 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
48877 place emms and femms instructions. */
48878
48879 static machine_mode
48880 ix86_preferred_simd_mode (scalar_mode mode)
48881 {
48882 if (!TARGET_SSE)
48883 return word_mode;
48884
48885 switch (mode)
48886 {
48887 case E_QImode:
48888 if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
48889 return V64QImode;
48890 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
48891 return V32QImode;
48892 else
48893 return V16QImode;
48894
48895 case E_HImode:
48896 if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
48897 return V32HImode;
48898 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
48899 return V16HImode;
48900 else
48901 return V8HImode;
48902
48903 case E_SImode:
48904 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
48905 return V16SImode;
48906 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
48907 return V8SImode;
48908 else
48909 return V4SImode;
48910
48911 case E_DImode:
48912 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
48913 return V8DImode;
48914 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
48915 return V4DImode;
48916 else
48917 return V2DImode;
48918
48919 case E_SFmode:
48920 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
48921 return V16SFmode;
48922 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
48923 return V8SFmode;
48924 else
48925 return V4SFmode;
48926
48927 case E_DFmode:
48928 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
48929 return V8DFmode;
48930 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
48931 return V4DFmode;
48932 else if (TARGET_SSE2)
48933 return V2DFmode;
48934 /* FALLTHRU */
48935
48936 default:
48937 return word_mode;
48938 }
48939 }
48940
48941 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
48942 vectors. If AVX512F is enabled then try vectorizing with 512bit,
48943 256bit and 128bit vectors. */
48944
48945 static unsigned int
48946 ix86_autovectorize_vector_sizes (void)
48947 {
48948 unsigned int bytesizes = 0;
48949
48950 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
48951 bytesizes |= (64 | 32 | 16);
48952 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
48953 bytesizes |= (32 | 16);
48954
48955 return bytesizes;
48956 }
48957
48958 /* Implemenation of targetm.vectorize.get_mask_mode. */
48959
48960 static opt_machine_mode
48961 ix86_get_mask_mode (unsigned nunits, unsigned vector_size)
48962 {
48963 unsigned elem_size = vector_size / nunits;
48964
48965 /* Scalar mask case. */
48966 if ((TARGET_AVX512F && vector_size == 64)
48967 || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
48968 {
48969 if (elem_size == 4 || elem_size == 8 || TARGET_AVX512BW)
48970 return smallest_int_mode_for_size (nunits);
48971 }
48972
48973 scalar_int_mode elem_mode
48974 = smallest_int_mode_for_size (elem_size * BITS_PER_UNIT);
48975
48976 gcc_assert (elem_size * nunits == vector_size);
48977
48978 return mode_for_vector (elem_mode, nunits);
48979 }
48980
48981 \f
48982
48983 /* Return class of registers which could be used for pseudo of MODE
48984 and of class RCLASS for spilling instead of memory. Return NO_REGS
48985 if it is not possible or non-profitable. */
48986
48987 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
48988
48989 static reg_class_t
48990 ix86_spill_class (reg_class_t rclass, machine_mode mode)
48991 {
48992 if (0 && TARGET_GENERAL_REGS_SSE_SPILL
48993 && TARGET_SSE2
48994 && TARGET_INTER_UNIT_MOVES_TO_VEC
48995 && TARGET_INTER_UNIT_MOVES_FROM_VEC
48996 && (mode == SImode || (TARGET_64BIT && mode == DImode))
48997 && INTEGER_CLASS_P (rclass))
48998 return ALL_SSE_REGS;
48999 return NO_REGS;
49000 }
49001
49002 /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation,
49003 but returns a lower bound. */
49004
49005 static unsigned int
49006 ix86_max_noce_ifcvt_seq_cost (edge e)
49007 {
49008 bool predictable_p = predictable_edge_p (e);
49009
49010 enum compiler_param param
49011 = (predictable_p
49012 ? PARAM_MAX_RTL_IF_CONVERSION_PREDICTABLE_COST
49013 : PARAM_MAX_RTL_IF_CONVERSION_UNPREDICTABLE_COST);
49014
49015 /* If we have a parameter set, use that, otherwise take a guess using
49016 BRANCH_COST. */
49017 if (global_options_set.x_param_values[param])
49018 return PARAM_VALUE (param);
49019 else
49020 return BRANCH_COST (true, predictable_p) * COSTS_N_INSNS (2);
49021 }
49022
49023 /* Return true if SEQ is a good candidate as a replacement for the
49024 if-convertible sequence described in IF_INFO. */
49025
49026 static bool
49027 ix86_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
49028 {
49029 if (TARGET_ONE_IF_CONV_INSN && if_info->speed_p)
49030 {
49031 int cmov_cnt = 0;
49032 /* Punt if SEQ contains more than one CMOV or FCMOV instruction.
49033 Maybe we should allow even more conditional moves as long as they
49034 are used far enough not to stall the CPU, or also consider
49035 IF_INFO->TEST_BB succ edge probabilities. */
49036 for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
49037 {
49038 rtx set = single_set (insn);
49039 if (!set)
49040 continue;
49041 if (GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
49042 continue;
49043 rtx src = SET_SRC (set);
49044 machine_mode mode = GET_MODE (src);
49045 if (GET_MODE_CLASS (mode) != MODE_INT
49046 && GET_MODE_CLASS (mode) != MODE_FLOAT)
49047 continue;
49048 if ((!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)))
49049 || (!REG_P (XEXP (src, 2)) && !MEM_P (XEXP (src, 2))))
49050 continue;
49051 /* insn is CMOV or FCMOV. */
49052 if (++cmov_cnt > 1)
49053 return false;
49054 }
49055 }
49056 return default_noce_conversion_profitable_p (seq, if_info);
49057 }
49058
49059 /* Implement targetm.vectorize.init_cost. */
49060
49061 static void *
49062 ix86_init_cost (struct loop *)
49063 {
49064 unsigned *cost = XNEWVEC (unsigned, 3);
49065 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
49066 return cost;
49067 }
49068
49069 /* Implement targetm.vectorize.add_stmt_cost. */
49070
49071 static unsigned
49072 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
49073 struct _stmt_vec_info *stmt_info, int misalign,
49074 enum vect_cost_model_location where)
49075 {
49076 unsigned *cost = (unsigned *) data;
49077 unsigned retval = 0;
49078
49079 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
49080 int stmt_cost = - 1;
49081
49082 if ((kind == vector_stmt || kind == scalar_stmt)
49083 && stmt_info
49084 && stmt_info->stmt && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
49085 {
49086 tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
49087 bool fp = false;
49088 machine_mode mode = TImode;
49089
49090 if (vectype != NULL)
49091 {
49092 fp = FLOAT_TYPE_P (vectype);
49093 mode = TYPE_MODE (vectype);
49094 }
49095 /*machine_mode inner_mode = mode;
49096 if (VECTOR_MODE_P (mode))
49097 inner_mode = GET_MODE_INNER (mode);*/
49098
49099 switch (subcode)
49100 {
49101 case PLUS_EXPR:
49102 case POINTER_PLUS_EXPR:
49103 case MINUS_EXPR:
49104 if (kind == scalar_stmt)
49105 {
49106 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
49107 stmt_cost = ix86_cost->addss;
49108 else if (X87_FLOAT_MODE_P (mode))
49109 stmt_cost = ix86_cost->fadd;
49110 else
49111 stmt_cost = ix86_cost->add;
49112 }
49113 else
49114 stmt_cost = ix86_vec_cost (mode,
49115 fp ? ix86_cost->addss
49116 : ix86_cost->sse_op,
49117 true);
49118 break;
49119
49120 case MULT_EXPR:
49121 case WIDEN_MULT_EXPR:
49122 case MULT_HIGHPART_EXPR:
49123 stmt_cost = ix86_multiplication_cost (ix86_cost, mode);
49124 break;
49125 case FMA_EXPR:
49126 stmt_cost = ix86_vec_cost (mode,
49127 mode == SFmode ? ix86_cost->fmass
49128 : ix86_cost->fmasd,
49129 true);
49130 break;
49131 case NEGATE_EXPR:
49132 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
49133 stmt_cost = ix86_cost->sse_op;
49134 else if (X87_FLOAT_MODE_P (mode))
49135 stmt_cost = ix86_cost->fchs;
49136 else if (VECTOR_MODE_P (mode))
49137 stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op, true);
49138 else
49139 stmt_cost = ix86_cost->add;
49140 break;
49141 case TRUNC_DIV_EXPR:
49142 case CEIL_DIV_EXPR:
49143 case FLOOR_DIV_EXPR:
49144 case ROUND_DIV_EXPR:
49145 case TRUNC_MOD_EXPR:
49146 case CEIL_MOD_EXPR:
49147 case FLOOR_MOD_EXPR:
49148 case RDIV_EXPR:
49149 case ROUND_MOD_EXPR:
49150 case EXACT_DIV_EXPR:
49151 stmt_cost = ix86_division_cost (ix86_cost, mode);
49152 break;
49153
49154 case RSHIFT_EXPR:
49155 case LSHIFT_EXPR:
49156 case LROTATE_EXPR:
49157 case RROTATE_EXPR:
49158 {
49159 tree op2 = gimple_assign_rhs2 (stmt_info->stmt);
49160 stmt_cost = ix86_shift_rotate_cost
49161 (ix86_cost, mode,
49162 TREE_CODE (op2) == INTEGER_CST,
49163 cst_and_fits_in_hwi (op2) ? int_cst_value (op2) : -1,
49164 true, false, false, NULL, NULL);
49165 }
49166 break;
49167 case NOP_EXPR:
49168 stmt_cost = 0;
49169 break;
49170
49171 case BIT_IOR_EXPR:
49172 case ABS_EXPR:
49173 case MIN_EXPR:
49174 case MAX_EXPR:
49175 case BIT_XOR_EXPR:
49176 case BIT_AND_EXPR:
49177 case BIT_NOT_EXPR:
49178 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
49179 stmt_cost = ix86_cost->sse_op;
49180 else if (VECTOR_MODE_P (mode))
49181 stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op, true);
49182 else
49183 stmt_cost = ix86_cost->add;
49184 break;
49185 default:
49186 break;
49187 }
49188 }
49189 if (stmt_cost == -1)
49190 stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
49191
49192 /* Penalize DFmode vector operations for Bonnell. */
49193 if (TARGET_BONNELL && kind == vector_stmt
49194 && vectype && GET_MODE_INNER (TYPE_MODE (vectype)) == DFmode)
49195 stmt_cost *= 5; /* FIXME: The value here is arbitrary. */
49196
49197 /* Statements in an inner loop relative to the loop being
49198 vectorized are weighted more heavily. The value here is
49199 arbitrary and could potentially be improved with analysis. */
49200 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
49201 count *= 50; /* FIXME. */
49202
49203 retval = (unsigned) (count * stmt_cost);
49204
49205 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
49206 for Silvermont as it has out of order integer pipeline and can execute
49207 2 scalar instruction per tick, but has in order SIMD pipeline. */
49208 if ((TARGET_SILVERMONT || TARGET_INTEL)
49209 && stmt_info && stmt_info->stmt)
49210 {
49211 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
49212 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
49213 retval = (retval * 17) / 10;
49214 }
49215
49216 cost[where] += retval;
49217
49218 return retval;
49219 }
49220
49221 /* Implement targetm.vectorize.finish_cost. */
49222
49223 static void
49224 ix86_finish_cost (void *data, unsigned *prologue_cost,
49225 unsigned *body_cost, unsigned *epilogue_cost)
49226 {
49227 unsigned *cost = (unsigned *) data;
49228 *prologue_cost = cost[vect_prologue];
49229 *body_cost = cost[vect_body];
49230 *epilogue_cost = cost[vect_epilogue];
49231 }
49232
49233 /* Implement targetm.vectorize.destroy_cost_data. */
49234
49235 static void
49236 ix86_destroy_cost_data (void *data)
49237 {
49238 free (data);
49239 }
49240
49241 /* Validate target specific memory model bits in VAL. */
49242
49243 static unsigned HOST_WIDE_INT
49244 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
49245 {
49246 enum memmodel model = memmodel_from_int (val);
49247 bool strong;
49248
49249 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
49250 |MEMMODEL_MASK)
49251 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
49252 {
49253 warning (OPT_Winvalid_memory_model,
49254 "unknown architecture specific memory model");
49255 return MEMMODEL_SEQ_CST;
49256 }
49257 strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
49258 if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
49259 {
49260 warning (OPT_Winvalid_memory_model,
49261 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
49262 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
49263 }
49264 if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
49265 {
49266 warning (OPT_Winvalid_memory_model,
49267 "HLE_RELEASE not used with RELEASE or stronger memory model");
49268 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
49269 }
49270 return val;
49271 }
49272
49273 /* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int,
49274 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
49275 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
49276 or number of vecsize_mangle variants that should be emitted. */
49277
49278 static int
49279 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
49280 struct cgraph_simd_clone *clonei,
49281 tree base_type, int num)
49282 {
49283 int ret = 1;
49284
49285 if (clonei->simdlen
49286 && (clonei->simdlen < 2
49287 || clonei->simdlen > 1024
49288 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
49289 {
49290 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
49291 "unsupported simdlen %d", clonei->simdlen);
49292 return 0;
49293 }
49294
49295 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
49296 if (TREE_CODE (ret_type) != VOID_TYPE)
49297 switch (TYPE_MODE (ret_type))
49298 {
49299 case E_QImode:
49300 case E_HImode:
49301 case E_SImode:
49302 case E_DImode:
49303 case E_SFmode:
49304 case E_DFmode:
49305 /* case E_SCmode: */
49306 /* case E_DCmode: */
49307 break;
49308 default:
49309 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
49310 "unsupported return type %qT for simd\n", ret_type);
49311 return 0;
49312 }
49313
49314 tree t;
49315 int i;
49316
49317 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
49318 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
49319 switch (TYPE_MODE (TREE_TYPE (t)))
49320 {
49321 case E_QImode:
49322 case E_HImode:
49323 case E_SImode:
49324 case E_DImode:
49325 case E_SFmode:
49326 case E_DFmode:
49327 /* case E_SCmode: */
49328 /* case E_DCmode: */
49329 break;
49330 default:
49331 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
49332 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
49333 return 0;
49334 }
49335
49336 if (!TREE_PUBLIC (node->decl))
49337 {
49338 /* If the function isn't exported, we can pick up just one ISA
49339 for the clones. */
49340 if (TARGET_AVX512F)
49341 clonei->vecsize_mangle = 'e';
49342 else if (TARGET_AVX2)
49343 clonei->vecsize_mangle = 'd';
49344 else if (TARGET_AVX)
49345 clonei->vecsize_mangle = 'c';
49346 else
49347 clonei->vecsize_mangle = 'b';
49348 ret = 1;
49349 }
49350 else
49351 {
49352 clonei->vecsize_mangle = "bcde"[num];
49353 ret = 4;
49354 }
49355 clonei->mask_mode = VOIDmode;
49356 switch (clonei->vecsize_mangle)
49357 {
49358 case 'b':
49359 clonei->vecsize_int = 128;
49360 clonei->vecsize_float = 128;
49361 break;
49362 case 'c':
49363 clonei->vecsize_int = 128;
49364 clonei->vecsize_float = 256;
49365 break;
49366 case 'd':
49367 clonei->vecsize_int = 256;
49368 clonei->vecsize_float = 256;
49369 break;
49370 case 'e':
49371 clonei->vecsize_int = 512;
49372 clonei->vecsize_float = 512;
49373 if (TYPE_MODE (base_type) == QImode)
49374 clonei->mask_mode = DImode;
49375 else
49376 clonei->mask_mode = SImode;
49377 break;
49378 }
49379 if (clonei->simdlen == 0)
49380 {
49381 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
49382 clonei->simdlen = clonei->vecsize_int;
49383 else
49384 clonei->simdlen = clonei->vecsize_float;
49385 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
49386 }
49387 else if (clonei->simdlen > 16)
49388 {
49389 /* For compatibility with ICC, use the same upper bounds
49390 for simdlen. In particular, for CTYPE below, use the return type,
49391 unless the function returns void, in that case use the characteristic
49392 type. If it is possible for given SIMDLEN to pass CTYPE value
49393 in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs
49394 for 64-bit code), accept that SIMDLEN, otherwise warn and don't
49395 emit corresponding clone. */
49396 tree ctype = ret_type;
49397 if (TREE_CODE (ret_type) == VOID_TYPE)
49398 ctype = base_type;
49399 int cnt = GET_MODE_BITSIZE (TYPE_MODE (ctype)) * clonei->simdlen;
49400 if (SCALAR_INT_MODE_P (TYPE_MODE (ctype)))
49401 cnt /= clonei->vecsize_int;
49402 else
49403 cnt /= clonei->vecsize_float;
49404 if (cnt > (TARGET_64BIT ? 16 : 8))
49405 {
49406 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
49407 "unsupported simdlen %d", clonei->simdlen);
49408 return 0;
49409 }
49410 }
49411 return ret;
49412 }
49413
49414 /* Add target attribute to SIMD clone NODE if needed. */
49415
49416 static void
49417 ix86_simd_clone_adjust (struct cgraph_node *node)
49418 {
49419 const char *str = NULL;
49420 gcc_assert (node->decl == cfun->decl);
49421 switch (node->simdclone->vecsize_mangle)
49422 {
49423 case 'b':
49424 if (!TARGET_SSE2)
49425 str = "sse2";
49426 break;
49427 case 'c':
49428 if (!TARGET_AVX)
49429 str = "avx";
49430 break;
49431 case 'd':
49432 if (!TARGET_AVX2)
49433 str = "avx2";
49434 break;
49435 case 'e':
49436 if (!TARGET_AVX512F)
49437 str = "avx512f";
49438 break;
49439 default:
49440 gcc_unreachable ();
49441 }
49442 if (str == NULL)
49443 return;
49444 push_cfun (NULL);
49445 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
49446 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
49447 gcc_assert (ok);
49448 pop_cfun ();
49449 ix86_reset_previous_fndecl ();
49450 ix86_set_current_function (node->decl);
49451 }
49452
49453 /* If SIMD clone NODE can't be used in a vectorized loop
49454 in current function, return -1, otherwise return a badness of using it
49455 (0 if it is most desirable from vecsize_mangle point of view, 1
49456 slightly less desirable, etc.). */
49457
49458 static int
49459 ix86_simd_clone_usable (struct cgraph_node *node)
49460 {
49461 switch (node->simdclone->vecsize_mangle)
49462 {
49463 case 'b':
49464 if (!TARGET_SSE2)
49465 return -1;
49466 if (!TARGET_AVX)
49467 return 0;
49468 return TARGET_AVX2 ? 2 : 1;
49469 case 'c':
49470 if (!TARGET_AVX)
49471 return -1;
49472 return TARGET_AVX2 ? 1 : 0;
49473 case 'd':
49474 if (!TARGET_AVX2)
49475 return -1;
49476 return 0;
49477 case 'e':
49478 if (!TARGET_AVX512F)
49479 return -1;
49480 return 0;
49481 default:
49482 gcc_unreachable ();
49483 }
49484 }
49485
49486 /* This function adjusts the unroll factor based on
49487 the hardware capabilities. For ex, bdver3 has
49488 a loop buffer which makes unrolling of smaller
49489 loops less important. This function decides the
49490 unroll factor using number of memory references
49491 (value 32 is used) as a heuristic. */
49492
49493 static unsigned
49494 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
49495 {
49496 basic_block *bbs;
49497 rtx_insn *insn;
49498 unsigned i;
49499 unsigned mem_count = 0;
49500
49501 if (!TARGET_ADJUST_UNROLL)
49502 return nunroll;
49503
49504 /* Count the number of memory references within the loop body.
49505 This value determines the unrolling factor for bdver3 and bdver4
49506 architectures. */
49507 subrtx_iterator::array_type array;
49508 bbs = get_loop_body (loop);
49509 for (i = 0; i < loop->num_nodes; i++)
49510 FOR_BB_INSNS (bbs[i], insn)
49511 if (NONDEBUG_INSN_P (insn))
49512 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
49513 if (const_rtx x = *iter)
49514 if (MEM_P (x))
49515 {
49516 machine_mode mode = GET_MODE (x);
49517 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
49518 if (n_words > 4)
49519 mem_count += 2;
49520 else
49521 mem_count += 1;
49522 }
49523 free (bbs);
49524
49525 if (mem_count && mem_count <=32)
49526 return 32/mem_count;
49527
49528 return nunroll;
49529 }
49530
49531
49532 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
49533
49534 static bool
49535 ix86_float_exceptions_rounding_supported_p (void)
49536 {
49537 /* For x87 floating point with standard excess precision handling,
49538 there is no adddf3 pattern (since x87 floating point only has
49539 XFmode operations) so the default hook implementation gets this
49540 wrong. */
49541 return TARGET_80387 || TARGET_SSE_MATH;
49542 }
49543
49544 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
49545
49546 static void
49547 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
49548 {
49549 if (!TARGET_80387 && !TARGET_SSE_MATH)
49550 return;
49551 tree exceptions_var = create_tmp_var_raw (integer_type_node);
49552 if (TARGET_80387)
49553 {
49554 tree fenv_index_type = build_index_type (size_int (6));
49555 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
49556 tree fenv_var = create_tmp_var_raw (fenv_type);
49557 TREE_ADDRESSABLE (fenv_var) = 1;
49558 tree fenv_ptr = build_pointer_type (fenv_type);
49559 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
49560 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
49561 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
49562 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
49563 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
49564 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
49565 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
49566 tree hold_fnclex = build_call_expr (fnclex, 0);
49567 fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv,
49568 NULL_TREE, NULL_TREE);
49569 *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var,
49570 hold_fnclex);
49571 *clear = build_call_expr (fnclex, 0);
49572 tree sw_var = create_tmp_var_raw (short_unsigned_type_node);
49573 tree fnstsw_call = build_call_expr (fnstsw, 0);
49574 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
49575 sw_var, fnstsw_call);
49576 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
49577 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
49578 exceptions_var, exceptions_x87);
49579 *update = build2 (COMPOUND_EXPR, integer_type_node,
49580 sw_mod, update_mod);
49581 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
49582 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
49583 }
49584 if (TARGET_SSE_MATH)
49585 {
49586 tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node);
49587 tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node);
49588 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
49589 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
49590 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
49591 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
49592 mxcsr_orig_var, stmxcsr_hold_call);
49593 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
49594 mxcsr_orig_var,
49595 build_int_cst (unsigned_type_node, 0x1f80));
49596 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
49597 build_int_cst (unsigned_type_node, 0xffffffc0));
49598 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
49599 mxcsr_mod_var, hold_mod_val);
49600 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
49601 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
49602 hold_assign_orig, hold_assign_mod);
49603 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
49604 ldmxcsr_hold_call);
49605 if (*hold)
49606 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
49607 else
49608 *hold = hold_all;
49609 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
49610 if (*clear)
49611 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
49612 ldmxcsr_clear_call);
49613 else
49614 *clear = ldmxcsr_clear_call;
49615 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
49616 tree exceptions_sse = fold_convert (integer_type_node,
49617 stxmcsr_update_call);
49618 if (*update)
49619 {
49620 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
49621 exceptions_var, exceptions_sse);
49622 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
49623 exceptions_var, exceptions_mod);
49624 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
49625 exceptions_assign);
49626 }
49627 else
49628 *update = build2 (MODIFY_EXPR, integer_type_node,
49629 exceptions_var, exceptions_sse);
49630 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
49631 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
49632 ldmxcsr_update_call);
49633 }
49634 tree atomic_feraiseexcept
49635 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
49636 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
49637 1, exceptions_var);
49638 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
49639 atomic_feraiseexcept_call);
49640 }
49641
49642 /* Return mode to be used for bounds or VOIDmode
49643 if bounds are not supported. */
49644
49645 static machine_mode
49646 ix86_mpx_bound_mode ()
49647 {
49648 /* Do not support pointer checker if MPX
49649 is not enabled. */
49650 if (!TARGET_MPX)
49651 {
49652 if (flag_check_pointer_bounds)
49653 warning (0, "Pointer Checker requires MPX support on this target."
49654 " Use -mmpx options to enable MPX.");
49655 return VOIDmode;
49656 }
49657
49658 return BNDmode;
49659 }
49660
49661 /* Return constant used to statically initialize constant bounds.
49662
49663 This function is used to create special bound values. For now
49664 only INIT bounds and NONE bounds are expected. More special
49665 values may be added later. */
49666
49667 static tree
49668 ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub)
49669 {
49670 tree low = lb ? build_minus_one_cst (pointer_sized_int_node)
49671 : build_zero_cst (pointer_sized_int_node);
49672 tree high = ub ? build_zero_cst (pointer_sized_int_node)
49673 : build_minus_one_cst (pointer_sized_int_node);
49674
49675 /* This function is supposed to be used to create INIT and
49676 NONE bounds only. */
49677 gcc_assert ((lb == 0 && ub == -1)
49678 || (lb == -1 && ub == 0));
49679
49680 return build_complex (NULL, low, high);
49681 }
49682
49683 /* Generate a list of statements STMTS to initialize pointer bounds
49684 variable VAR with bounds LB and UB. Return the number of generated
49685 statements. */
49686
49687 static int
49688 ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts)
49689 {
49690 tree bnd_ptr = build_pointer_type (pointer_sized_int_node);
49691 tree lhs, modify, var_p;
49692
49693 ub = build1 (BIT_NOT_EXPR, pointer_sized_int_node, ub);
49694 var_p = fold_convert (bnd_ptr, build_fold_addr_expr (var));
49695
49696 lhs = build1 (INDIRECT_REF, pointer_sized_int_node, var_p);
49697 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb);
49698 append_to_statement_list (modify, stmts);
49699
49700 lhs = build1 (INDIRECT_REF, pointer_sized_int_node,
49701 build2 (POINTER_PLUS_EXPR, bnd_ptr, var_p,
49702 TYPE_SIZE_UNIT (pointer_sized_int_node)));
49703 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub);
49704 append_to_statement_list (modify, stmts);
49705
49706 return 2;
49707 }
49708
49709 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
49710 /* For i386, common symbol is local only for non-PIE binaries. For
49711 x86-64, common symbol is local only for non-PIE binaries or linker
49712 supports copy reloc in PIE binaries. */
49713
49714 static bool
49715 ix86_binds_local_p (const_tree exp)
49716 {
49717 return default_binds_local_p_3 (exp, flag_shlib != 0, true, true,
49718 (!flag_pic
49719 || (TARGET_64BIT
49720 && HAVE_LD_PIE_COPYRELOC != 0)));
49721 }
49722 #endif
49723
49724 /* If MEM is in the form of [base+offset], extract the two parts
49725 of address and set to BASE and OFFSET, otherwise return false. */
49726
49727 static bool
49728 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
49729 {
49730 rtx addr;
49731
49732 gcc_assert (MEM_P (mem));
49733
49734 addr = XEXP (mem, 0);
49735
49736 if (GET_CODE (addr) == CONST)
49737 addr = XEXP (addr, 0);
49738
49739 if (REG_P (addr) || GET_CODE (addr) == SYMBOL_REF)
49740 {
49741 *base = addr;
49742 *offset = const0_rtx;
49743 return true;
49744 }
49745
49746 if (GET_CODE (addr) == PLUS
49747 && (REG_P (XEXP (addr, 0))
49748 || GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
49749 && CONST_INT_P (XEXP (addr, 1)))
49750 {
49751 *base = XEXP (addr, 0);
49752 *offset = XEXP (addr, 1);
49753 return true;
49754 }
49755
49756 return false;
49757 }
49758
49759 /* Given OPERANDS of consecutive load/store, check if we can merge
49760 them into move multiple. LOAD is true if they are load instructions.
49761 MODE is the mode of memory operands. */
49762
49763 bool
49764 ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
49765 machine_mode mode)
49766 {
49767 HOST_WIDE_INT offval_1, offval_2, msize;
49768 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
49769
49770 if (load)
49771 {
49772 mem_1 = operands[1];
49773 mem_2 = operands[3];
49774 reg_1 = operands[0];
49775 reg_2 = operands[2];
49776 }
49777 else
49778 {
49779 mem_1 = operands[0];
49780 mem_2 = operands[2];
49781 reg_1 = operands[1];
49782 reg_2 = operands[3];
49783 }
49784
49785 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
49786
49787 if (REGNO (reg_1) != REGNO (reg_2))
49788 return false;
49789
49790 /* Check if the addresses are in the form of [base+offset]. */
49791 if (!extract_base_offset_in_addr (mem_1, &base_1, &offset_1))
49792 return false;
49793 if (!extract_base_offset_in_addr (mem_2, &base_2, &offset_2))
49794 return false;
49795
49796 /* Check if the bases are the same. */
49797 if (!rtx_equal_p (base_1, base_2))
49798 return false;
49799
49800 offval_1 = INTVAL (offset_1);
49801 offval_2 = INTVAL (offset_2);
49802 msize = GET_MODE_SIZE (mode);
49803 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
49804 if (offval_1 + msize != offval_2)
49805 return false;
49806
49807 return true;
49808 }
49809
49810 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
49811
49812 static bool
49813 ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
49814 optimization_type opt_type)
49815 {
49816 switch (op)
49817 {
49818 case asin_optab:
49819 case acos_optab:
49820 case log1p_optab:
49821 case exp_optab:
49822 case exp10_optab:
49823 case exp2_optab:
49824 case expm1_optab:
49825 case ldexp_optab:
49826 case scalb_optab:
49827 case round_optab:
49828 return opt_type == OPTIMIZE_FOR_SPEED;
49829
49830 case rint_optab:
49831 if (SSE_FLOAT_MODE_P (mode1)
49832 && TARGET_SSE_MATH
49833 && !flag_trapping_math
49834 && !TARGET_SSE4_1)
49835 return opt_type == OPTIMIZE_FOR_SPEED;
49836 return true;
49837
49838 case floor_optab:
49839 case ceil_optab:
49840 case btrunc_optab:
49841 if (SSE_FLOAT_MODE_P (mode1)
49842 && TARGET_SSE_MATH
49843 && !flag_trapping_math
49844 && TARGET_SSE4_1)
49845 return true;
49846 return opt_type == OPTIMIZE_FOR_SPEED;
49847
49848 case rsqrt_optab:
49849 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p ();
49850
49851 default:
49852 return true;
49853 }
49854 }
49855
49856 /* Address space support.
49857
49858 This is not "far pointers" in the 16-bit sense, but an easy way
49859 to use %fs and %gs segment prefixes. Therefore:
49860
49861 (a) All address spaces have the same modes,
49862 (b) All address spaces have the same addresss forms,
49863 (c) While %fs and %gs are technically subsets of the generic
49864 address space, they are probably not subsets of each other.
49865 (d) Since we have no access to the segment base register values
49866 without resorting to a system call, we cannot convert a
49867 non-default address space to a default address space.
49868 Therefore we do not claim %fs or %gs are subsets of generic.
49869
49870 Therefore we can (mostly) use the default hooks. */
49871
49872 /* All use of segmentation is assumed to make address 0 valid. */
49873
49874 static bool
49875 ix86_addr_space_zero_address_valid (addr_space_t as)
49876 {
49877 return as != ADDR_SPACE_GENERIC;
49878 }
49879
49880 static void
49881 ix86_init_libfuncs (void)
49882 {
49883 if (TARGET_64BIT)
49884 {
49885 set_optab_libfunc (sdivmod_optab, TImode, "__divmodti4");
49886 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
49887 }
49888 else
49889 {
49890 set_optab_libfunc (sdivmod_optab, DImode, "__divmoddi4");
49891 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
49892 }
49893
49894 #if TARGET_MACHO
49895 darwin_rename_builtins ();
49896 #endif
49897 }
49898
49899 /* Generate call to __divmoddi4. */
49900
49901 static void
49902 ix86_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
49903 rtx op0, rtx op1,
49904 rtx *quot_p, rtx *rem_p)
49905 {
49906 rtx rem = assign_386_stack_local (mode, SLOT_TEMP);
49907
49908 rtx quot = emit_library_call_value (libfunc, NULL_RTX, LCT_NORMAL,
49909 mode,
49910 op0, GET_MODE (op0),
49911 op1, GET_MODE (op1),
49912 XEXP (rem, 0), Pmode);
49913 *quot_p = quot;
49914 *rem_p = rem;
49915 }
49916
49917 /* Set the value of FLT_EVAL_METHOD in float.h. When using only the
49918 FPU, assume that the fpcw is set to extended precision; when using
49919 only SSE, rounding is correct; when using both SSE and the FPU,
49920 the rounding precision is indeterminate, since either may be chosen
49921 apparently at random. */
49922
49923 static enum flt_eval_method
49924 ix86_excess_precision (enum excess_precision_type type)
49925 {
49926 switch (type)
49927 {
49928 case EXCESS_PRECISION_TYPE_FAST:
49929 /* The fastest type to promote to will always be the native type,
49930 whether that occurs with implicit excess precision or
49931 otherwise. */
49932 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
49933 case EXCESS_PRECISION_TYPE_STANDARD:
49934 case EXCESS_PRECISION_TYPE_IMPLICIT:
49935 /* Otherwise, the excess precision we want when we are
49936 in a standards compliant mode, and the implicit precision we
49937 provide would be identical were it not for the unpredictable
49938 cases. */
49939 if (!TARGET_80387)
49940 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
49941 else if (!TARGET_MIX_SSE_I387)
49942 {
49943 if (!TARGET_SSE_MATH)
49944 return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE;
49945 else if (TARGET_SSE2)
49946 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
49947 }
49948
49949 /* If we are in standards compliant mode, but we know we will
49950 calculate in unpredictable precision, return
49951 FLT_EVAL_METHOD_FLOAT. There is no reason to introduce explicit
49952 excess precision if the target can't guarantee it will honor
49953 it. */
49954 return (type == EXCESS_PRECISION_TYPE_STANDARD
49955 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
49956 : FLT_EVAL_METHOD_UNPREDICTABLE);
49957 default:
49958 gcc_unreachable ();
49959 }
49960
49961 return FLT_EVAL_METHOD_UNPREDICTABLE;
49962 }
49963
49964 /* Target-specific selftests. */
49965
49966 #if CHECKING_P
49967
49968 namespace selftest {
49969
49970 /* Verify that hard regs are dumped as expected (in compact mode). */
49971
49972 static void
49973 ix86_test_dumping_hard_regs ()
49974 {
49975 ASSERT_RTL_DUMP_EQ ("(reg:SI ax)", gen_raw_REG (SImode, 0));
49976 ASSERT_RTL_DUMP_EQ ("(reg:SI dx)", gen_raw_REG (SImode, 1));
49977 }
49978
49979 /* Test dumping an insn with repeated references to the same SCRATCH,
49980 to verify the rtx_reuse code. */
49981
49982 static void
49983 ix86_test_dumping_memory_blockage ()
49984 {
49985 set_new_first_and_last_insn (NULL, NULL);
49986
49987 rtx pat = gen_memory_blockage ();
49988 rtx_reuse_manager r;
49989 r.preprocess (pat);
49990
49991 /* Verify that the repeated references to the SCRATCH show use
49992 reuse IDS. The first should be prefixed with a reuse ID,
49993 and the second should be dumped as a "reuse_rtx" of that ID.
49994 The expected string assumes Pmode == DImode. */
49995 if (Pmode == DImode)
49996 ASSERT_RTL_DUMP_EQ_WITH_REUSE
49997 ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0 A8])\n"
49998 " (unspec:BLK [\n"
49999 " (mem/v:BLK (reuse_rtx 0) [0 A8])\n"
50000 " ] UNSPEC_MEMORY_BLOCKAGE)))\n", pat, &r);
50001 }
50002
50003 /* Verify loading an RTL dump; specifically a dump of copying
50004 a param on x86_64 from a hard reg into the frame.
50005 This test is target-specific since the dump contains target-specific
50006 hard reg names. */
50007
50008 static void
50009 ix86_test_loading_dump_fragment_1 ()
50010 {
50011 rtl_dump_test t (SELFTEST_LOCATION,
50012 locate_file ("x86_64/copy-hard-reg-into-frame.rtl"));
50013
50014 rtx_insn *insn = get_insn_by_uid (1);
50015
50016 /* The block structure and indentation here is purely for
50017 readability; it mirrors the structure of the rtx. */
50018 tree mem_expr;
50019 {
50020 rtx pat = PATTERN (insn);
50021 ASSERT_EQ (SET, GET_CODE (pat));
50022 {
50023 rtx dest = SET_DEST (pat);
50024 ASSERT_EQ (MEM, GET_CODE (dest));
50025 /* Verify the "/c" was parsed. */
50026 ASSERT_TRUE (RTX_FLAG (dest, call));
50027 ASSERT_EQ (SImode, GET_MODE (dest));
50028 {
50029 rtx addr = XEXP (dest, 0);
50030 ASSERT_EQ (PLUS, GET_CODE (addr));
50031 ASSERT_EQ (DImode, GET_MODE (addr));
50032 {
50033 rtx lhs = XEXP (addr, 0);
50034 /* Verify that the "frame" REG was consolidated. */
50035 ASSERT_RTX_PTR_EQ (frame_pointer_rtx, lhs);
50036 }
50037 {
50038 rtx rhs = XEXP (addr, 1);
50039 ASSERT_EQ (CONST_INT, GET_CODE (rhs));
50040 ASSERT_EQ (-4, INTVAL (rhs));
50041 }
50042 }
50043 /* Verify the "[1 i+0 S4 A32]" was parsed. */
50044 ASSERT_EQ (1, MEM_ALIAS_SET (dest));
50045 /* "i" should have been handled by synthesizing a global int
50046 variable named "i". */
50047 mem_expr = MEM_EXPR (dest);
50048 ASSERT_NE (mem_expr, NULL);
50049 ASSERT_EQ (VAR_DECL, TREE_CODE (mem_expr));
50050 ASSERT_EQ (integer_type_node, TREE_TYPE (mem_expr));
50051 ASSERT_EQ (IDENTIFIER_NODE, TREE_CODE (DECL_NAME (mem_expr)));
50052 ASSERT_STREQ ("i", IDENTIFIER_POINTER (DECL_NAME (mem_expr)));
50053 /* "+0". */
50054 ASSERT_TRUE (MEM_OFFSET_KNOWN_P (dest));
50055 ASSERT_EQ (0, MEM_OFFSET (dest));
50056 /* "S4". */
50057 ASSERT_EQ (4, MEM_SIZE (dest));
50058 /* "A32. */
50059 ASSERT_EQ (32, MEM_ALIGN (dest));
50060 }
50061 {
50062 rtx src = SET_SRC (pat);
50063 ASSERT_EQ (REG, GET_CODE (src));
50064 ASSERT_EQ (SImode, GET_MODE (src));
50065 ASSERT_EQ (5, REGNO (src));
50066 tree reg_expr = REG_EXPR (src);
50067 /* "i" here should point to the same var as for the MEM_EXPR. */
50068 ASSERT_EQ (reg_expr, mem_expr);
50069 }
50070 }
50071 }
50072
50073 /* Verify that the RTL loader copes with a call_insn dump.
50074 This test is target-specific since the dump contains a target-specific
50075 hard reg name. */
50076
50077 static void
50078 ix86_test_loading_call_insn ()
50079 {
50080 /* The test dump includes register "xmm0", where requires TARGET_SSE
50081 to exist. */
50082 if (!TARGET_SSE)
50083 return;
50084
50085 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/call-insn.rtl"));
50086
50087 rtx_insn *insn = get_insns ();
50088 ASSERT_EQ (CALL_INSN, GET_CODE (insn));
50089
50090 /* "/j". */
50091 ASSERT_TRUE (RTX_FLAG (insn, jump));
50092
50093 rtx pat = PATTERN (insn);
50094 ASSERT_EQ (CALL, GET_CODE (SET_SRC (pat)));
50095
50096 /* Verify REG_NOTES. */
50097 {
50098 /* "(expr_list:REG_CALL_DECL". */
50099 ASSERT_EQ (EXPR_LIST, GET_CODE (REG_NOTES (insn)));
50100 rtx_expr_list *note0 = as_a <rtx_expr_list *> (REG_NOTES (insn));
50101 ASSERT_EQ (REG_CALL_DECL, REG_NOTE_KIND (note0));
50102
50103 /* "(expr_list:REG_EH_REGION (const_int 0 [0])". */
50104 rtx_expr_list *note1 = note0->next ();
50105 ASSERT_EQ (REG_EH_REGION, REG_NOTE_KIND (note1));
50106
50107 ASSERT_EQ (NULL, note1->next ());
50108 }
50109
50110 /* Verify CALL_INSN_FUNCTION_USAGE. */
50111 {
50112 /* "(expr_list:DF (use (reg:DF 21 xmm0))". */
50113 rtx_expr_list *usage
50114 = as_a <rtx_expr_list *> (CALL_INSN_FUNCTION_USAGE (insn));
50115 ASSERT_EQ (EXPR_LIST, GET_CODE (usage));
50116 ASSERT_EQ (DFmode, GET_MODE (usage));
50117 ASSERT_EQ (USE, GET_CODE (usage->element ()));
50118 ASSERT_EQ (NULL, usage->next ());
50119 }
50120 }
50121
50122 /* Verify that the RTL loader copes a dump from print_rtx_function.
50123 This test is target-specific since the dump contains target-specific
50124 hard reg names. */
50125
50126 static void
50127 ix86_test_loading_full_dump ()
50128 {
50129 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/times-two.rtl"));
50130
50131 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
50132
50133 rtx_insn *insn_1 = get_insn_by_uid (1);
50134 ASSERT_EQ (NOTE, GET_CODE (insn_1));
50135
50136 rtx_insn *insn_7 = get_insn_by_uid (7);
50137 ASSERT_EQ (INSN, GET_CODE (insn_7));
50138 ASSERT_EQ (PARALLEL, GET_CODE (PATTERN (insn_7)));
50139
50140 rtx_insn *insn_15 = get_insn_by_uid (15);
50141 ASSERT_EQ (INSN, GET_CODE (insn_15));
50142 ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
50143
50144 /* Verify crtl->return_rtx. */
50145 ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
50146 ASSERT_EQ (0, REGNO (crtl->return_rtx));
50147 ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
50148 }
50149
50150 /* Verify that the RTL loader copes with UNSPEC and UNSPEC_VOLATILE insns.
50151 In particular, verify that it correctly loads the 2nd operand.
50152 This test is target-specific since these are machine-specific
50153 operands (and enums). */
50154
50155 static void
50156 ix86_test_loading_unspec ()
50157 {
50158 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/unspec.rtl"));
50159
50160 ASSERT_STREQ ("test_unspec", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
50161
50162 ASSERT_TRUE (cfun);
50163
50164 /* Test of an UNSPEC. */
50165 rtx_insn *insn = get_insns ();
50166 ASSERT_EQ (INSN, GET_CODE (insn));
50167 rtx set = single_set (insn);
50168 ASSERT_NE (NULL, set);
50169 rtx dst = SET_DEST (set);
50170 ASSERT_EQ (MEM, GET_CODE (dst));
50171 rtx src = SET_SRC (set);
50172 ASSERT_EQ (UNSPEC, GET_CODE (src));
50173 ASSERT_EQ (BLKmode, GET_MODE (src));
50174 ASSERT_EQ (UNSPEC_MEMORY_BLOCKAGE, XINT (src, 1));
50175
50176 rtx v0 = XVECEXP (src, 0, 0);
50177
50178 /* Verify that the two uses of the first SCRATCH have pointer
50179 equality. */
50180 rtx scratch_a = XEXP (dst, 0);
50181 ASSERT_EQ (SCRATCH, GET_CODE (scratch_a));
50182
50183 rtx scratch_b = XEXP (v0, 0);
50184 ASSERT_EQ (SCRATCH, GET_CODE (scratch_b));
50185
50186 ASSERT_EQ (scratch_a, scratch_b);
50187
50188 /* Verify that the two mems are thus treated as equal. */
50189 ASSERT_TRUE (rtx_equal_p (dst, v0));
50190
50191 /* Verify the the insn is recognized. */
50192 ASSERT_NE(-1, recog_memoized (insn));
50193
50194 /* Test of an UNSPEC_VOLATILE, which has its own enum values. */
50195 insn = NEXT_INSN (insn);
50196 ASSERT_EQ (INSN, GET_CODE (insn));
50197
50198 set = single_set (insn);
50199 ASSERT_NE (NULL, set);
50200
50201 src = SET_SRC (set);
50202 ASSERT_EQ (UNSPEC_VOLATILE, GET_CODE (src));
50203 ASSERT_EQ (UNSPECV_RDTSCP, XINT (src, 1));
50204 }
50205
50206 /* Run all target-specific selftests. */
50207
50208 static void
50209 ix86_run_selftests (void)
50210 {
50211 ix86_test_dumping_hard_regs ();
50212 ix86_test_dumping_memory_blockage ();
50213
50214 /* Various tests of loading RTL dumps, here because they contain
50215 ix86-isms (e.g. names of hard regs). */
50216 ix86_test_loading_dump_fragment_1 ();
50217 ix86_test_loading_call_insn ();
50218 ix86_test_loading_full_dump ();
50219 ix86_test_loading_unspec ();
50220 }
50221
50222 } // namespace selftest
50223
50224 #endif /* CHECKING_P */
50225
50226 /* Initialize the GCC target structure. */
50227 #undef TARGET_RETURN_IN_MEMORY
50228 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
50229
50230 #undef TARGET_LEGITIMIZE_ADDRESS
50231 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
50232
50233 #undef TARGET_ATTRIBUTE_TABLE
50234 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
50235 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
50236 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
50237 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
50238 # undef TARGET_MERGE_DECL_ATTRIBUTES
50239 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
50240 #endif
50241
50242 #undef TARGET_COMP_TYPE_ATTRIBUTES
50243 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
50244
50245 #undef TARGET_INIT_BUILTINS
50246 #define TARGET_INIT_BUILTINS ix86_init_builtins
50247 #undef TARGET_BUILTIN_DECL
50248 #define TARGET_BUILTIN_DECL ix86_builtin_decl
50249 #undef TARGET_EXPAND_BUILTIN
50250 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
50251
50252 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
50253 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
50254 ix86_builtin_vectorized_function
50255
50256 #undef TARGET_VECTORIZE_BUILTIN_GATHER
50257 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
50258
50259 #undef TARGET_VECTORIZE_BUILTIN_SCATTER
50260 #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
50261
50262 #undef TARGET_BUILTIN_RECIPROCAL
50263 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
50264
50265 #undef TARGET_ASM_FUNCTION_EPILOGUE
50266 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
50267
50268 #undef TARGET_ENCODE_SECTION_INFO
50269 #ifndef SUBTARGET_ENCODE_SECTION_INFO
50270 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
50271 #else
50272 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
50273 #endif
50274
50275 #undef TARGET_ASM_OPEN_PAREN
50276 #define TARGET_ASM_OPEN_PAREN ""
50277 #undef TARGET_ASM_CLOSE_PAREN
50278 #define TARGET_ASM_CLOSE_PAREN ""
50279
50280 #undef TARGET_ASM_BYTE_OP
50281 #define TARGET_ASM_BYTE_OP ASM_BYTE
50282
50283 #undef TARGET_ASM_ALIGNED_HI_OP
50284 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
50285 #undef TARGET_ASM_ALIGNED_SI_OP
50286 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
50287 #ifdef ASM_QUAD
50288 #undef TARGET_ASM_ALIGNED_DI_OP
50289 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
50290 #endif
50291
50292 #undef TARGET_PROFILE_BEFORE_PROLOGUE
50293 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
50294
50295 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
50296 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
50297
50298 #undef TARGET_ASM_UNALIGNED_HI_OP
50299 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
50300 #undef TARGET_ASM_UNALIGNED_SI_OP
50301 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
50302 #undef TARGET_ASM_UNALIGNED_DI_OP
50303 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
50304
50305 #undef TARGET_PRINT_OPERAND
50306 #define TARGET_PRINT_OPERAND ix86_print_operand
50307 #undef TARGET_PRINT_OPERAND_ADDRESS
50308 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
50309 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
50310 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
50311 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
50312 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
50313
50314 #undef TARGET_SCHED_INIT_GLOBAL
50315 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
50316 #undef TARGET_SCHED_ADJUST_COST
50317 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
50318 #undef TARGET_SCHED_ISSUE_RATE
50319 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
50320 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
50321 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
50322 ia32_multipass_dfa_lookahead
50323 #undef TARGET_SCHED_MACRO_FUSION_P
50324 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
50325 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
50326 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
50327
50328 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
50329 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
50330
50331 #undef TARGET_MEMMODEL_CHECK
50332 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
50333
50334 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
50335 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
50336
50337 #ifdef HAVE_AS_TLS
50338 #undef TARGET_HAVE_TLS
50339 #define TARGET_HAVE_TLS true
50340 #endif
50341 #undef TARGET_CANNOT_FORCE_CONST_MEM
50342 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
50343 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
50344 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
50345
50346 #undef TARGET_DELEGITIMIZE_ADDRESS
50347 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
50348
50349 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
50350 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P ix86_const_not_ok_for_debug_p
50351
50352 #undef TARGET_MS_BITFIELD_LAYOUT_P
50353 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
50354
50355 #if TARGET_MACHO
50356 #undef TARGET_BINDS_LOCAL_P
50357 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
50358 #else
50359 #undef TARGET_BINDS_LOCAL_P
50360 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
50361 #endif
50362 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
50363 #undef TARGET_BINDS_LOCAL_P
50364 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
50365 #endif
50366
50367 #undef TARGET_ASM_OUTPUT_MI_THUNK
50368 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
50369 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
50370 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
50371
50372 #undef TARGET_ASM_FILE_START
50373 #define TARGET_ASM_FILE_START x86_file_start
50374
50375 #undef TARGET_OPTION_OVERRIDE
50376 #define TARGET_OPTION_OVERRIDE ix86_option_override
50377
50378 #undef TARGET_REGISTER_MOVE_COST
50379 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
50380 #undef TARGET_MEMORY_MOVE_COST
50381 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
50382 #undef TARGET_RTX_COSTS
50383 #define TARGET_RTX_COSTS ix86_rtx_costs
50384 #undef TARGET_ADDRESS_COST
50385 #define TARGET_ADDRESS_COST ix86_address_cost
50386
50387 #undef TARGET_FLAGS_REGNUM
50388 #define TARGET_FLAGS_REGNUM FLAGS_REG
50389 #undef TARGET_FIXED_CONDITION_CODE_REGS
50390 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
50391 #undef TARGET_CC_MODES_COMPATIBLE
50392 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
50393
50394 #undef TARGET_MACHINE_DEPENDENT_REORG
50395 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
50396
50397 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
50398 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
50399
50400 #undef TARGET_BUILD_BUILTIN_VA_LIST
50401 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
50402
50403 #undef TARGET_FOLD_BUILTIN
50404 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
50405
50406 #undef TARGET_GIMPLE_FOLD_BUILTIN
50407 #define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin
50408
50409 #undef TARGET_COMPARE_VERSION_PRIORITY
50410 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
50411
50412 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
50413 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
50414 ix86_generate_version_dispatcher_body
50415
50416 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
50417 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
50418 ix86_get_function_versions_dispatcher
50419
50420 #undef TARGET_ENUM_VA_LIST_P
50421 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
50422
50423 #undef TARGET_FN_ABI_VA_LIST
50424 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
50425
50426 #undef TARGET_CANONICAL_VA_LIST_TYPE
50427 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
50428
50429 #undef TARGET_EXPAND_BUILTIN_VA_START
50430 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
50431
50432 #undef TARGET_MD_ASM_ADJUST
50433 #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
50434
50435 #undef TARGET_C_EXCESS_PRECISION
50436 #define TARGET_C_EXCESS_PRECISION ix86_excess_precision
50437 #undef TARGET_PROMOTE_PROTOTYPES
50438 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
50439 #undef TARGET_SETUP_INCOMING_VARARGS
50440 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
50441 #undef TARGET_MUST_PASS_IN_STACK
50442 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
50443 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
50444 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args
50445 #undef TARGET_FUNCTION_ARG_ADVANCE
50446 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
50447 #undef TARGET_FUNCTION_ARG
50448 #define TARGET_FUNCTION_ARG ix86_function_arg
50449 #undef TARGET_INIT_PIC_REG
50450 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
50451 #undef TARGET_USE_PSEUDO_PIC_REG
50452 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
50453 #undef TARGET_FUNCTION_ARG_BOUNDARY
50454 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
50455 #undef TARGET_PASS_BY_REFERENCE
50456 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
50457 #undef TARGET_INTERNAL_ARG_POINTER
50458 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
50459 #undef TARGET_UPDATE_STACK_BOUNDARY
50460 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
50461 #undef TARGET_GET_DRAP_RTX
50462 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
50463 #undef TARGET_STRICT_ARGUMENT_NAMING
50464 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
50465 #undef TARGET_STATIC_CHAIN
50466 #define TARGET_STATIC_CHAIN ix86_static_chain
50467 #undef TARGET_TRAMPOLINE_INIT
50468 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
50469 #undef TARGET_RETURN_POPS_ARGS
50470 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
50471
50472 #undef TARGET_WARN_FUNC_RETURN
50473 #define TARGET_WARN_FUNC_RETURN ix86_warn_func_return
50474
50475 #undef TARGET_LEGITIMATE_COMBINED_INSN
50476 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
50477
50478 #undef TARGET_ASAN_SHADOW_OFFSET
50479 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
50480
50481 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
50482 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
50483
50484 #undef TARGET_SCALAR_MODE_SUPPORTED_P
50485 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
50486
50487 #undef TARGET_VECTOR_MODE_SUPPORTED_P
50488 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
50489
50490 #undef TARGET_C_MODE_FOR_SUFFIX
50491 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
50492
50493 #ifdef HAVE_AS_TLS
50494 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
50495 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
50496 #endif
50497
50498 #ifdef SUBTARGET_INSERT_ATTRIBUTES
50499 #undef TARGET_INSERT_ATTRIBUTES
50500 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
50501 #endif
50502
50503 #undef TARGET_MANGLE_TYPE
50504 #define TARGET_MANGLE_TYPE ix86_mangle_type
50505
50506 #undef TARGET_STACK_PROTECT_GUARD
50507 #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
50508
50509 #if !TARGET_MACHO
50510 #undef TARGET_STACK_PROTECT_FAIL
50511 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
50512 #endif
50513
50514 #undef TARGET_FUNCTION_VALUE
50515 #define TARGET_FUNCTION_VALUE ix86_function_value
50516
50517 #undef TARGET_FUNCTION_VALUE_REGNO_P
50518 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
50519
50520 #undef TARGET_PROMOTE_FUNCTION_MODE
50521 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
50522
50523 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
50524 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
50525
50526 #undef TARGET_MEMBER_TYPE_FORCES_BLK
50527 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
50528
50529 #undef TARGET_INSTANTIATE_DECLS
50530 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
50531
50532 #undef TARGET_SECONDARY_RELOAD
50533 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
50534 #undef TARGET_SECONDARY_MEMORY_NEEDED
50535 #define TARGET_SECONDARY_MEMORY_NEEDED ix86_secondary_memory_needed
50536 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
50537 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE ix86_secondary_memory_needed_mode
50538
50539 #undef TARGET_CLASS_MAX_NREGS
50540 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
50541
50542 #undef TARGET_PREFERRED_RELOAD_CLASS
50543 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
50544 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
50545 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
50546 #undef TARGET_CLASS_LIKELY_SPILLED_P
50547 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
50548
50549 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
50550 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
50551 ix86_builtin_vectorization_cost
50552 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
50553 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
50554 ix86_vectorize_vec_perm_const_ok
50555 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
50556 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
50557 ix86_preferred_simd_mode
50558 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
50559 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
50560 ix86_autovectorize_vector_sizes
50561 #undef TARGET_VECTORIZE_GET_MASK_MODE
50562 #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
50563 #undef TARGET_VECTORIZE_INIT_COST
50564 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
50565 #undef TARGET_VECTORIZE_ADD_STMT_COST
50566 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
50567 #undef TARGET_VECTORIZE_FINISH_COST
50568 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
50569 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
50570 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
50571
50572 #undef TARGET_SET_CURRENT_FUNCTION
50573 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
50574
50575 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
50576 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
50577
50578 #undef TARGET_OPTION_SAVE
50579 #define TARGET_OPTION_SAVE ix86_function_specific_save
50580
50581 #undef TARGET_OPTION_RESTORE
50582 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
50583
50584 #undef TARGET_OPTION_POST_STREAM_IN
50585 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
50586
50587 #undef TARGET_OPTION_PRINT
50588 #define TARGET_OPTION_PRINT ix86_function_specific_print
50589
50590 #undef TARGET_OPTION_FUNCTION_VERSIONS
50591 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
50592
50593 #undef TARGET_CAN_INLINE_P
50594 #define TARGET_CAN_INLINE_P ix86_can_inline_p
50595
50596 #undef TARGET_LEGITIMATE_ADDRESS_P
50597 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
50598
50599 #undef TARGET_REGISTER_PRIORITY
50600 #define TARGET_REGISTER_PRIORITY ix86_register_priority
50601
50602 #undef TARGET_REGISTER_USAGE_LEVELING_P
50603 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
50604
50605 #undef TARGET_LEGITIMATE_CONSTANT_P
50606 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
50607
50608 #undef TARGET_COMPUTE_FRAME_LAYOUT
50609 #define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout
50610
50611 #undef TARGET_FRAME_POINTER_REQUIRED
50612 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
50613
50614 #undef TARGET_CAN_ELIMINATE
50615 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
50616
50617 #undef TARGET_EXTRA_LIVE_ON_ENTRY
50618 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
50619
50620 #undef TARGET_ASM_CODE_END
50621 #define TARGET_ASM_CODE_END ix86_code_end
50622
50623 #undef TARGET_CONDITIONAL_REGISTER_USAGE
50624 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
50625
50626 #undef TARGET_CANONICALIZE_COMPARISON
50627 #define TARGET_CANONICALIZE_COMPARISON ix86_canonicalize_comparison
50628
50629 #undef TARGET_LOOP_UNROLL_ADJUST
50630 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
50631
50632 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
50633 #undef TARGET_SPILL_CLASS
50634 #define TARGET_SPILL_CLASS ix86_spill_class
50635
50636 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
50637 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
50638 ix86_simd_clone_compute_vecsize_and_simdlen
50639
50640 #undef TARGET_SIMD_CLONE_ADJUST
50641 #define TARGET_SIMD_CLONE_ADJUST \
50642 ix86_simd_clone_adjust
50643
50644 #undef TARGET_SIMD_CLONE_USABLE
50645 #define TARGET_SIMD_CLONE_USABLE \
50646 ix86_simd_clone_usable
50647
50648 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
50649 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
50650 ix86_float_exceptions_rounding_supported_p
50651
50652 #undef TARGET_MODE_EMIT
50653 #define TARGET_MODE_EMIT ix86_emit_mode_set
50654
50655 #undef TARGET_MODE_NEEDED
50656 #define TARGET_MODE_NEEDED ix86_mode_needed
50657
50658 #undef TARGET_MODE_AFTER
50659 #define TARGET_MODE_AFTER ix86_mode_after
50660
50661 #undef TARGET_MODE_ENTRY
50662 #define TARGET_MODE_ENTRY ix86_mode_entry
50663
50664 #undef TARGET_MODE_EXIT
50665 #define TARGET_MODE_EXIT ix86_mode_exit
50666
50667 #undef TARGET_MODE_PRIORITY
50668 #define TARGET_MODE_PRIORITY ix86_mode_priority
50669
50670 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
50671 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
50672
50673 #undef TARGET_LOAD_BOUNDS_FOR_ARG
50674 #define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
50675
50676 #undef TARGET_STORE_BOUNDS_FOR_ARG
50677 #define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
50678
50679 #undef TARGET_LOAD_RETURNED_BOUNDS
50680 #define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
50681
50682 #undef TARGET_STORE_RETURNED_BOUNDS
50683 #define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
50684
50685 #undef TARGET_CHKP_BOUND_MODE
50686 #define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
50687
50688 #undef TARGET_BUILTIN_CHKP_FUNCTION
50689 #define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
50690
50691 #undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
50692 #define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
50693
50694 #undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
50695 #define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
50696
50697 #undef TARGET_CHKP_INITIALIZE_BOUNDS
50698 #define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
50699
50700 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
50701 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
50702
50703 #undef TARGET_OFFLOAD_OPTIONS
50704 #define TARGET_OFFLOAD_OPTIONS \
50705 ix86_offload_options
50706
50707 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
50708 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
50709
50710 #undef TARGET_OPTAB_SUPPORTED_P
50711 #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
50712
50713 #undef TARGET_HARD_REGNO_SCRATCH_OK
50714 #define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok
50715
50716 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
50717 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
50718
50719 #undef TARGET_ADDITIONAL_ALLOCNO_CLASS_P
50720 #define TARGET_ADDITIONAL_ALLOCNO_CLASS_P ix86_additional_allocno_class_p
50721
50722 #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
50723 #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
50724
50725 #undef TARGET_INIT_LIBFUNCS
50726 #define TARGET_INIT_LIBFUNCS ix86_init_libfuncs
50727
50728 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
50729 #define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc
50730
50731 #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
50732 #define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost
50733
50734 #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
50735 #define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p
50736
50737 #undef TARGET_HARD_REGNO_NREGS
50738 #define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs
50739 #undef TARGET_HARD_REGNO_MODE_OK
50740 #define TARGET_HARD_REGNO_MODE_OK ix86_hard_regno_mode_ok
50741
50742 #undef TARGET_MODES_TIEABLE_P
50743 #define TARGET_MODES_TIEABLE_P ix86_modes_tieable_p
50744
50745 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
50746 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
50747 ix86_hard_regno_call_part_clobbered
50748
50749 #undef TARGET_CAN_CHANGE_MODE_CLASS
50750 #define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class
50751
50752 #undef TARGET_STATIC_RTX_ALIGNMENT
50753 #define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment
50754 #undef TARGET_CONSTANT_ALIGNMENT
50755 #define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment
50756
50757 #undef TARGET_EMPTY_RECORD_P
50758 #define TARGET_EMPTY_RECORD_P ix86_is_empty_record
50759
50760 #undef TARGET_WARN_PARAMETER_PASSING_ABI
50761 #define TARGET_WARN_PARAMETER_PASSING_ABI ix86_warn_parameter_passing_abi
50762
50763 #if CHECKING_P
50764 #undef TARGET_RUN_TARGET_SELFTESTS
50765 #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
50766 #endif /* #if CHECKING_P */
50767
50768 struct gcc_target targetm = TARGET_INITIALIZER;
50769 \f
50770 #include "gt-i386.h"