]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/i386.c
Update copyright years.
[thirdparty/gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2016 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "cfghooks.h"
28 #include "cfgloop.h"
29 #include "df.h"
30 #include "tm_p.h"
31 #include "stringpool.h"
32 #include "expmed.h"
33 #include "optabs.h"
34 #include "regs.h"
35 #include "emit-rtl.h"
36 #include "recog.h"
37 #include "cgraph.h"
38 #include "diagnostic.h"
39 #include "cfgbuild.h"
40 #include "alias.h"
41 #include "fold-const.h"
42 #include "attribs.h"
43 #include "calls.h"
44 #include "stor-layout.h"
45 #include "varasm.h"
46 #include "output.h"
47 #include "insn-attr.h"
48 #include "flags.h"
49 #include "except.h"
50 #include "explow.h"
51 #include "expr.h"
52 #include "cfgrtl.h"
53 #include "common/common-target.h"
54 #include "langhooks.h"
55 #include "reload.h"
56 #include "gimplify.h"
57 #include "dwarf2.h"
58 #include "tm-constrs.h"
59 #include "params.h"
60 #include "cselib.h"
61 #include "sched-int.h"
62 #include "opts.h"
63 #include "tree-pass.h"
64 #include "context.h"
65 #include "pass_manager.h"
66 #include "target-globals.h"
67 #include "gimple-iterator.h"
68 #include "tree-vectorizer.h"
69 #include "shrink-wrap.h"
70 #include "builtins.h"
71 #include "rtl-iter.h"
72 #include "tree-iterator.h"
73 #include "tree-chkp.h"
74 #include "rtl-chkp.h"
75 #include "dbgcnt.h"
76 #include "case-cfn-macros.h"
77 #include "regrename.h"
78
79 /* This file should be included last. */
80 #include "target-def.h"
81
82 static rtx legitimize_dllimport_symbol (rtx, bool);
83 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
84 static rtx legitimize_pe_coff_symbol (rtx, bool);
85 static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool);
86
87 #ifndef CHECK_STACK_LIMIT
88 #define CHECK_STACK_LIMIT (-1)
89 #endif
90
91 /* Return index of given mode in mult and division cost tables. */
92 #define MODE_INDEX(mode) \
93 ((mode) == QImode ? 0 \
94 : (mode) == HImode ? 1 \
95 : (mode) == SImode ? 2 \
96 : (mode) == DImode ? 3 \
97 : 4)
98
99 /* Processor costs (relative to an add) */
100 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
101 #define COSTS_N_BYTES(N) ((N) * 2)
102
103 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
104
105 static stringop_algs ix86_size_memcpy[2] = {
106 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
107 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
108 static stringop_algs ix86_size_memset[2] = {
109 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
110 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
111
112 const
113 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
114 COSTS_N_BYTES (2), /* cost of an add instruction */
115 COSTS_N_BYTES (3), /* cost of a lea instruction */
116 COSTS_N_BYTES (2), /* variable shift costs */
117 COSTS_N_BYTES (3), /* constant shift costs */
118 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
119 COSTS_N_BYTES (3), /* HI */
120 COSTS_N_BYTES (3), /* SI */
121 COSTS_N_BYTES (3), /* DI */
122 COSTS_N_BYTES (5)}, /* other */
123 0, /* cost of multiply per each bit set */
124 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
125 COSTS_N_BYTES (3), /* HI */
126 COSTS_N_BYTES (3), /* SI */
127 COSTS_N_BYTES (3), /* DI */
128 COSTS_N_BYTES (5)}, /* other */
129 COSTS_N_BYTES (3), /* cost of movsx */
130 COSTS_N_BYTES (3), /* cost of movzx */
131 0, /* "large" insn */
132 2, /* MOVE_RATIO */
133 2, /* cost for loading QImode using movzbl */
134 {2, 2, 2}, /* cost of loading integer registers
135 in QImode, HImode and SImode.
136 Relative to reg-reg move (2). */
137 {2, 2, 2}, /* cost of storing integer registers */
138 2, /* cost of reg,reg fld/fst */
139 {2, 2, 2}, /* cost of loading fp registers
140 in SFmode, DFmode and XFmode */
141 {2, 2, 2}, /* cost of storing fp registers
142 in SFmode, DFmode and XFmode */
143 3, /* cost of moving MMX register */
144 {3, 3}, /* cost of loading MMX registers
145 in SImode and DImode */
146 {3, 3}, /* cost of storing MMX registers
147 in SImode and DImode */
148 3, /* cost of moving SSE register */
149 {3, 3, 3}, /* cost of loading SSE registers
150 in SImode, DImode and TImode */
151 {3, 3, 3}, /* cost of storing SSE registers
152 in SImode, DImode and TImode */
153 3, /* MMX or SSE register to integer */
154 0, /* size of l1 cache */
155 0, /* size of l2 cache */
156 0, /* size of prefetch block */
157 0, /* number of parallel prefetches */
158 2, /* Branch cost */
159 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
160 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
161 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
162 COSTS_N_BYTES (2), /* cost of FABS instruction. */
163 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
164 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
165 ix86_size_memcpy,
166 ix86_size_memset,
167 1, /* scalar_stmt_cost. */
168 1, /* scalar load_cost. */
169 1, /* scalar_store_cost. */
170 1, /* vec_stmt_cost. */
171 1, /* vec_to_scalar_cost. */
172 1, /* scalar_to_vec_cost. */
173 1, /* vec_align_load_cost. */
174 1, /* vec_unalign_load_cost. */
175 1, /* vec_store_cost. */
176 1, /* cond_taken_branch_cost. */
177 1, /* cond_not_taken_branch_cost. */
178 };
179
180 /* Processor costs (relative to an add) */
181 static stringop_algs i386_memcpy[2] = {
182 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
183 DUMMY_STRINGOP_ALGS};
184 static stringop_algs i386_memset[2] = {
185 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
186 DUMMY_STRINGOP_ALGS};
187
188 static const
189 struct processor_costs i386_cost = { /* 386 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (6), /* HI */
196 COSTS_N_INSNS (6), /* SI */
197 COSTS_N_INSNS (6), /* DI */
198 COSTS_N_INSNS (6)}, /* other */
199 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (23), /* HI */
202 COSTS_N_INSNS (23), /* SI */
203 COSTS_N_INSNS (23), /* DI */
204 COSTS_N_INSNS (23)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
208 3, /* MOVE_RATIO */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of l1 cache */
231 0, /* size of l2 cache */
232 0, /* size of prefetch block */
233 0, /* number of parallel prefetches */
234 1, /* Branch cost */
235 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
236 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
237 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
238 COSTS_N_INSNS (22), /* cost of FABS instruction. */
239 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
240 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
241 i386_memcpy,
242 i386_memset,
243 1, /* scalar_stmt_cost. */
244 1, /* scalar load_cost. */
245 1, /* scalar_store_cost. */
246 1, /* vec_stmt_cost. */
247 1, /* vec_to_scalar_cost. */
248 1, /* scalar_to_vec_cost. */
249 1, /* vec_align_load_cost. */
250 2, /* vec_unalign_load_cost. */
251 1, /* vec_store_cost. */
252 3, /* cond_taken_branch_cost. */
253 1, /* cond_not_taken_branch_cost. */
254 };
255
256 static stringop_algs i486_memcpy[2] = {
257 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
258 DUMMY_STRINGOP_ALGS};
259 static stringop_algs i486_memset[2] = {
260 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
261 DUMMY_STRINGOP_ALGS};
262
263 static const
264 struct processor_costs i486_cost = { /* 486 specific costs */
265 COSTS_N_INSNS (1), /* cost of an add instruction */
266 COSTS_N_INSNS (1), /* cost of a lea instruction */
267 COSTS_N_INSNS (3), /* variable shift costs */
268 COSTS_N_INSNS (2), /* constant shift costs */
269 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
270 COSTS_N_INSNS (12), /* HI */
271 COSTS_N_INSNS (12), /* SI */
272 COSTS_N_INSNS (12), /* DI */
273 COSTS_N_INSNS (12)}, /* other */
274 1, /* cost of multiply per each bit set */
275 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
276 COSTS_N_INSNS (40), /* HI */
277 COSTS_N_INSNS (40), /* SI */
278 COSTS_N_INSNS (40), /* DI */
279 COSTS_N_INSNS (40)}, /* other */
280 COSTS_N_INSNS (3), /* cost of movsx */
281 COSTS_N_INSNS (2), /* cost of movzx */
282 15, /* "large" insn */
283 3, /* MOVE_RATIO */
284 4, /* cost for loading QImode using movzbl */
285 {2, 4, 2}, /* cost of loading integer registers
286 in QImode, HImode and SImode.
287 Relative to reg-reg move (2). */
288 {2, 4, 2}, /* cost of storing integer registers */
289 2, /* cost of reg,reg fld/fst */
290 {8, 8, 8}, /* cost of loading fp registers
291 in SFmode, DFmode and XFmode */
292 {8, 8, 8}, /* cost of storing fp registers
293 in SFmode, DFmode and XFmode */
294 2, /* cost of moving MMX register */
295 {4, 8}, /* cost of loading MMX registers
296 in SImode and DImode */
297 {4, 8}, /* cost of storing MMX registers
298 in SImode and DImode */
299 2, /* cost of moving SSE register */
300 {4, 8, 16}, /* cost of loading SSE registers
301 in SImode, DImode and TImode */
302 {4, 8, 16}, /* cost of storing SSE registers
303 in SImode, DImode and TImode */
304 3, /* MMX or SSE register to integer */
305 4, /* size of l1 cache. 486 has 8kB cache
306 shared for code and data, so 4kB is
307 not really precise. */
308 4, /* size of l2 cache */
309 0, /* size of prefetch block */
310 0, /* number of parallel prefetches */
311 1, /* Branch cost */
312 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
313 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
314 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
315 COSTS_N_INSNS (3), /* cost of FABS instruction. */
316 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
317 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
318 i486_memcpy,
319 i486_memset,
320 1, /* scalar_stmt_cost. */
321 1, /* scalar load_cost. */
322 1, /* scalar_store_cost. */
323 1, /* vec_stmt_cost. */
324 1, /* vec_to_scalar_cost. */
325 1, /* scalar_to_vec_cost. */
326 1, /* vec_align_load_cost. */
327 2, /* vec_unalign_load_cost. */
328 1, /* vec_store_cost. */
329 3, /* cond_taken_branch_cost. */
330 1, /* cond_not_taken_branch_cost. */
331 };
332
333 static stringop_algs pentium_memcpy[2] = {
334 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
335 DUMMY_STRINGOP_ALGS};
336 static stringop_algs pentium_memset[2] = {
337 {libcall, {{-1, rep_prefix_4_byte, false}}},
338 DUMMY_STRINGOP_ALGS};
339
340 static const
341 struct processor_costs pentium_cost = {
342 COSTS_N_INSNS (1), /* cost of an add instruction */
343 COSTS_N_INSNS (1), /* cost of a lea instruction */
344 COSTS_N_INSNS (4), /* variable shift costs */
345 COSTS_N_INSNS (1), /* constant shift costs */
346 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
347 COSTS_N_INSNS (11), /* HI */
348 COSTS_N_INSNS (11), /* SI */
349 COSTS_N_INSNS (11), /* DI */
350 COSTS_N_INSNS (11)}, /* other */
351 0, /* cost of multiply per each bit set */
352 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
353 COSTS_N_INSNS (25), /* HI */
354 COSTS_N_INSNS (25), /* SI */
355 COSTS_N_INSNS (25), /* DI */
356 COSTS_N_INSNS (25)}, /* other */
357 COSTS_N_INSNS (3), /* cost of movsx */
358 COSTS_N_INSNS (2), /* cost of movzx */
359 8, /* "large" insn */
360 6, /* MOVE_RATIO */
361 6, /* cost for loading QImode using movzbl */
362 {2, 4, 2}, /* cost of loading integer registers
363 in QImode, HImode and SImode.
364 Relative to reg-reg move (2). */
365 {2, 4, 2}, /* cost of storing integer registers */
366 2, /* cost of reg,reg fld/fst */
367 {2, 2, 6}, /* cost of loading fp registers
368 in SFmode, DFmode and XFmode */
369 {4, 4, 6}, /* cost of storing fp registers
370 in SFmode, DFmode and XFmode */
371 8, /* cost of moving MMX register */
372 {8, 8}, /* cost of loading MMX registers
373 in SImode and DImode */
374 {8, 8}, /* cost of storing MMX registers
375 in SImode and DImode */
376 2, /* cost of moving SSE register */
377 {4, 8, 16}, /* cost of loading SSE registers
378 in SImode, DImode and TImode */
379 {4, 8, 16}, /* cost of storing SSE registers
380 in SImode, DImode and TImode */
381 3, /* MMX or SSE register to integer */
382 8, /* size of l1 cache. */
383 8, /* size of l2 cache */
384 0, /* size of prefetch block */
385 0, /* number of parallel prefetches */
386 2, /* Branch cost */
387 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
388 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
389 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
390 COSTS_N_INSNS (1), /* cost of FABS instruction. */
391 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
392 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
393 pentium_memcpy,
394 pentium_memset,
395 1, /* scalar_stmt_cost. */
396 1, /* scalar load_cost. */
397 1, /* scalar_store_cost. */
398 1, /* vec_stmt_cost. */
399 1, /* vec_to_scalar_cost. */
400 1, /* scalar_to_vec_cost. */
401 1, /* vec_align_load_cost. */
402 2, /* vec_unalign_load_cost. */
403 1, /* vec_store_cost. */
404 3, /* cond_taken_branch_cost. */
405 1, /* cond_not_taken_branch_cost. */
406 };
407
408 static const
409 struct processor_costs lakemont_cost = {
410 COSTS_N_INSNS (1), /* cost of an add instruction */
411 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
412 COSTS_N_INSNS (1), /* variable shift costs */
413 COSTS_N_INSNS (1), /* constant shift costs */
414 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
415 COSTS_N_INSNS (11), /* HI */
416 COSTS_N_INSNS (11), /* SI */
417 COSTS_N_INSNS (11), /* DI */
418 COSTS_N_INSNS (11)}, /* other */
419 0, /* cost of multiply per each bit set */
420 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
421 COSTS_N_INSNS (25), /* HI */
422 COSTS_N_INSNS (25), /* SI */
423 COSTS_N_INSNS (25), /* DI */
424 COSTS_N_INSNS (25)}, /* other */
425 COSTS_N_INSNS (3), /* cost of movsx */
426 COSTS_N_INSNS (2), /* cost of movzx */
427 8, /* "large" insn */
428 9, /* MOVE_RATIO */
429 6, /* cost for loading QImode using movzbl */
430 {2, 4, 2}, /* cost of loading integer registers
431 in QImode, HImode and SImode.
432 Relative to reg-reg move (2). */
433 {2, 4, 2}, /* cost of storing integer registers */
434 2, /* cost of reg,reg fld/fst */
435 {2, 2, 6}, /* cost of loading fp registers
436 in SFmode, DFmode and XFmode */
437 {4, 4, 6}, /* cost of storing fp registers
438 in SFmode, DFmode and XFmode */
439 8, /* cost of moving MMX register */
440 {8, 8}, /* cost of loading MMX registers
441 in SImode and DImode */
442 {8, 8}, /* cost of storing MMX registers
443 in SImode and DImode */
444 2, /* cost of moving SSE register */
445 {4, 8, 16}, /* cost of loading SSE registers
446 in SImode, DImode and TImode */
447 {4, 8, 16}, /* cost of storing SSE registers
448 in SImode, DImode and TImode */
449 3, /* MMX or SSE register to integer */
450 8, /* size of l1 cache. */
451 8, /* size of l2 cache */
452 0, /* size of prefetch block */
453 0, /* number of parallel prefetches */
454 2, /* Branch cost */
455 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
456 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
457 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
458 COSTS_N_INSNS (1), /* cost of FABS instruction. */
459 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
460 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
461 pentium_memcpy,
462 pentium_memset,
463 1, /* scalar_stmt_cost. */
464 1, /* scalar load_cost. */
465 1, /* scalar_store_cost. */
466 1, /* vec_stmt_cost. */
467 1, /* vec_to_scalar_cost. */
468 1, /* scalar_to_vec_cost. */
469 1, /* vec_align_load_cost. */
470 2, /* vec_unalign_load_cost. */
471 1, /* vec_store_cost. */
472 3, /* cond_taken_branch_cost. */
473 1, /* cond_not_taken_branch_cost. */
474 };
475
476 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
477 (we ensure the alignment). For small blocks inline loop is still a
478 noticeable win, for bigger blocks either rep movsl or rep movsb is
479 way to go. Rep movsb has apparently more expensive startup time in CPU,
480 but after 4K the difference is down in the noise. */
481 static stringop_algs pentiumpro_memcpy[2] = {
482 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
483 {8192, rep_prefix_4_byte, false},
484 {-1, rep_prefix_1_byte, false}}},
485 DUMMY_STRINGOP_ALGS};
486 static stringop_algs pentiumpro_memset[2] = {
487 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
488 {8192, rep_prefix_4_byte, false},
489 {-1, libcall, false}}},
490 DUMMY_STRINGOP_ALGS};
491 static const
492 struct processor_costs pentiumpro_cost = {
493 COSTS_N_INSNS (1), /* cost of an add instruction */
494 COSTS_N_INSNS (1), /* cost of a lea instruction */
495 COSTS_N_INSNS (1), /* variable shift costs */
496 COSTS_N_INSNS (1), /* constant shift costs */
497 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
498 COSTS_N_INSNS (4), /* HI */
499 COSTS_N_INSNS (4), /* SI */
500 COSTS_N_INSNS (4), /* DI */
501 COSTS_N_INSNS (4)}, /* other */
502 0, /* cost of multiply per each bit set */
503 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
504 COSTS_N_INSNS (17), /* HI */
505 COSTS_N_INSNS (17), /* SI */
506 COSTS_N_INSNS (17), /* DI */
507 COSTS_N_INSNS (17)}, /* other */
508 COSTS_N_INSNS (1), /* cost of movsx */
509 COSTS_N_INSNS (1), /* cost of movzx */
510 8, /* "large" insn */
511 6, /* MOVE_RATIO */
512 2, /* cost for loading QImode using movzbl */
513 {4, 4, 4}, /* cost of loading integer registers
514 in QImode, HImode and SImode.
515 Relative to reg-reg move (2). */
516 {2, 2, 2}, /* cost of storing integer registers */
517 2, /* cost of reg,reg fld/fst */
518 {2, 2, 6}, /* cost of loading fp registers
519 in SFmode, DFmode and XFmode */
520 {4, 4, 6}, /* cost of storing fp registers
521 in SFmode, DFmode and XFmode */
522 2, /* cost of moving MMX register */
523 {2, 2}, /* cost of loading MMX registers
524 in SImode and DImode */
525 {2, 2}, /* cost of storing MMX registers
526 in SImode and DImode */
527 2, /* cost of moving SSE register */
528 {2, 2, 8}, /* cost of loading SSE registers
529 in SImode, DImode and TImode */
530 {2, 2, 8}, /* cost of storing SSE registers
531 in SImode, DImode and TImode */
532 3, /* MMX or SSE register to integer */
533 8, /* size of l1 cache. */
534 256, /* size of l2 cache */
535 32, /* size of prefetch block */
536 6, /* number of parallel prefetches */
537 2, /* Branch cost */
538 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
539 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
540 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
541 COSTS_N_INSNS (2), /* cost of FABS instruction. */
542 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
543 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
544 pentiumpro_memcpy,
545 pentiumpro_memset,
546 1, /* scalar_stmt_cost. */
547 1, /* scalar load_cost. */
548 1, /* scalar_store_cost. */
549 1, /* vec_stmt_cost. */
550 1, /* vec_to_scalar_cost. */
551 1, /* scalar_to_vec_cost. */
552 1, /* vec_align_load_cost. */
553 2, /* vec_unalign_load_cost. */
554 1, /* vec_store_cost. */
555 3, /* cond_taken_branch_cost. */
556 1, /* cond_not_taken_branch_cost. */
557 };
558
559 static stringop_algs geode_memcpy[2] = {
560 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
561 DUMMY_STRINGOP_ALGS};
562 static stringop_algs geode_memset[2] = {
563 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
564 DUMMY_STRINGOP_ALGS};
565 static const
566 struct processor_costs geode_cost = {
567 COSTS_N_INSNS (1), /* cost of an add instruction */
568 COSTS_N_INSNS (1), /* cost of a lea instruction */
569 COSTS_N_INSNS (2), /* variable shift costs */
570 COSTS_N_INSNS (1), /* constant shift costs */
571 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
572 COSTS_N_INSNS (4), /* HI */
573 COSTS_N_INSNS (7), /* SI */
574 COSTS_N_INSNS (7), /* DI */
575 COSTS_N_INSNS (7)}, /* other */
576 0, /* cost of multiply per each bit set */
577 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
578 COSTS_N_INSNS (23), /* HI */
579 COSTS_N_INSNS (39), /* SI */
580 COSTS_N_INSNS (39), /* DI */
581 COSTS_N_INSNS (39)}, /* other */
582 COSTS_N_INSNS (1), /* cost of movsx */
583 COSTS_N_INSNS (1), /* cost of movzx */
584 8, /* "large" insn */
585 4, /* MOVE_RATIO */
586 1, /* cost for loading QImode using movzbl */
587 {1, 1, 1}, /* cost of loading integer registers
588 in QImode, HImode and SImode.
589 Relative to reg-reg move (2). */
590 {1, 1, 1}, /* cost of storing integer registers */
591 1, /* cost of reg,reg fld/fst */
592 {1, 1, 1}, /* cost of loading fp registers
593 in SFmode, DFmode and XFmode */
594 {4, 6, 6}, /* cost of storing fp registers
595 in SFmode, DFmode and XFmode */
596
597 1, /* cost of moving MMX register */
598 {1, 1}, /* cost of loading MMX registers
599 in SImode and DImode */
600 {1, 1}, /* cost of storing MMX registers
601 in SImode and DImode */
602 1, /* cost of moving SSE register */
603 {1, 1, 1}, /* cost of loading SSE registers
604 in SImode, DImode and TImode */
605 {1, 1, 1}, /* cost of storing SSE registers
606 in SImode, DImode and TImode */
607 1, /* MMX or SSE register to integer */
608 64, /* size of l1 cache. */
609 128, /* size of l2 cache. */
610 32, /* size of prefetch block */
611 1, /* number of parallel prefetches */
612 1, /* Branch cost */
613 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
614 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
615 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
616 COSTS_N_INSNS (1), /* cost of FABS instruction. */
617 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
618 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
619 geode_memcpy,
620 geode_memset,
621 1, /* scalar_stmt_cost. */
622 1, /* scalar load_cost. */
623 1, /* scalar_store_cost. */
624 1, /* vec_stmt_cost. */
625 1, /* vec_to_scalar_cost. */
626 1, /* scalar_to_vec_cost. */
627 1, /* vec_align_load_cost. */
628 2, /* vec_unalign_load_cost. */
629 1, /* vec_store_cost. */
630 3, /* cond_taken_branch_cost. */
631 1, /* cond_not_taken_branch_cost. */
632 };
633
634 static stringop_algs k6_memcpy[2] = {
635 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
636 DUMMY_STRINGOP_ALGS};
637 static stringop_algs k6_memset[2] = {
638 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
639 DUMMY_STRINGOP_ALGS};
640 static const
641 struct processor_costs k6_cost = {
642 COSTS_N_INSNS (1), /* cost of an add instruction */
643 COSTS_N_INSNS (2), /* cost of a lea instruction */
644 COSTS_N_INSNS (1), /* variable shift costs */
645 COSTS_N_INSNS (1), /* constant shift costs */
646 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
647 COSTS_N_INSNS (3), /* HI */
648 COSTS_N_INSNS (3), /* SI */
649 COSTS_N_INSNS (3), /* DI */
650 COSTS_N_INSNS (3)}, /* other */
651 0, /* cost of multiply per each bit set */
652 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
653 COSTS_N_INSNS (18), /* HI */
654 COSTS_N_INSNS (18), /* SI */
655 COSTS_N_INSNS (18), /* DI */
656 COSTS_N_INSNS (18)}, /* other */
657 COSTS_N_INSNS (2), /* cost of movsx */
658 COSTS_N_INSNS (2), /* cost of movzx */
659 8, /* "large" insn */
660 4, /* MOVE_RATIO */
661 3, /* cost for loading QImode using movzbl */
662 {4, 5, 4}, /* cost of loading integer registers
663 in QImode, HImode and SImode.
664 Relative to reg-reg move (2). */
665 {2, 3, 2}, /* cost of storing integer registers */
666 4, /* cost of reg,reg fld/fst */
667 {6, 6, 6}, /* cost of loading fp registers
668 in SFmode, DFmode and XFmode */
669 {4, 4, 4}, /* cost of storing fp registers
670 in SFmode, DFmode and XFmode */
671 2, /* cost of moving MMX register */
672 {2, 2}, /* cost of loading MMX registers
673 in SImode and DImode */
674 {2, 2}, /* cost of storing MMX registers
675 in SImode and DImode */
676 2, /* cost of moving SSE register */
677 {2, 2, 8}, /* cost of loading SSE registers
678 in SImode, DImode and TImode */
679 {2, 2, 8}, /* cost of storing SSE registers
680 in SImode, DImode and TImode */
681 6, /* MMX or SSE register to integer */
682 32, /* size of l1 cache. */
683 32, /* size of l2 cache. Some models
684 have integrated l2 cache, but
685 optimizing for k6 is not important
686 enough to worry about that. */
687 32, /* size of prefetch block */
688 1, /* number of parallel prefetches */
689 1, /* Branch cost */
690 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
691 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
692 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
693 COSTS_N_INSNS (2), /* cost of FABS instruction. */
694 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
695 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
696 k6_memcpy,
697 k6_memset,
698 1, /* scalar_stmt_cost. */
699 1, /* scalar load_cost. */
700 1, /* scalar_store_cost. */
701 1, /* vec_stmt_cost. */
702 1, /* vec_to_scalar_cost. */
703 1, /* scalar_to_vec_cost. */
704 1, /* vec_align_load_cost. */
705 2, /* vec_unalign_load_cost. */
706 1, /* vec_store_cost. */
707 3, /* cond_taken_branch_cost. */
708 1, /* cond_not_taken_branch_cost. */
709 };
710
711 /* For some reason, Athlon deals better with REP prefix (relative to loops)
712 compared to K8. Alignment becomes important after 8 bytes for memcpy and
713 128 bytes for memset. */
714 static stringop_algs athlon_memcpy[2] = {
715 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
716 DUMMY_STRINGOP_ALGS};
717 static stringop_algs athlon_memset[2] = {
718 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
719 DUMMY_STRINGOP_ALGS};
720 static const
721 struct processor_costs athlon_cost = {
722 COSTS_N_INSNS (1), /* cost of an add instruction */
723 COSTS_N_INSNS (2), /* cost of a lea instruction */
724 COSTS_N_INSNS (1), /* variable shift costs */
725 COSTS_N_INSNS (1), /* constant shift costs */
726 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
727 COSTS_N_INSNS (5), /* HI */
728 COSTS_N_INSNS (5), /* SI */
729 COSTS_N_INSNS (5), /* DI */
730 COSTS_N_INSNS (5)}, /* other */
731 0, /* cost of multiply per each bit set */
732 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
733 COSTS_N_INSNS (26), /* HI */
734 COSTS_N_INSNS (42), /* SI */
735 COSTS_N_INSNS (74), /* DI */
736 COSTS_N_INSNS (74)}, /* other */
737 COSTS_N_INSNS (1), /* cost of movsx */
738 COSTS_N_INSNS (1), /* cost of movzx */
739 8, /* "large" insn */
740 9, /* MOVE_RATIO */
741 4, /* cost for loading QImode using movzbl */
742 {3, 4, 3}, /* cost of loading integer registers
743 in QImode, HImode and SImode.
744 Relative to reg-reg move (2). */
745 {3, 4, 3}, /* cost of storing integer registers */
746 4, /* cost of reg,reg fld/fst */
747 {4, 4, 12}, /* cost of loading fp registers
748 in SFmode, DFmode and XFmode */
749 {6, 6, 8}, /* cost of storing fp registers
750 in SFmode, DFmode and XFmode */
751 2, /* cost of moving MMX register */
752 {4, 4}, /* cost of loading MMX registers
753 in SImode and DImode */
754 {4, 4}, /* cost of storing MMX registers
755 in SImode and DImode */
756 2, /* cost of moving SSE register */
757 {4, 4, 6}, /* cost of loading SSE registers
758 in SImode, DImode and TImode */
759 {4, 4, 5}, /* cost of storing SSE registers
760 in SImode, DImode and TImode */
761 5, /* MMX or SSE register to integer */
762 64, /* size of l1 cache. */
763 256, /* size of l2 cache. */
764 64, /* size of prefetch block */
765 6, /* number of parallel prefetches */
766 5, /* Branch cost */
767 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
768 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
769 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
770 COSTS_N_INSNS (2), /* cost of FABS instruction. */
771 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
772 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
773 athlon_memcpy,
774 athlon_memset,
775 1, /* scalar_stmt_cost. */
776 1, /* scalar load_cost. */
777 1, /* scalar_store_cost. */
778 1, /* vec_stmt_cost. */
779 1, /* vec_to_scalar_cost. */
780 1, /* scalar_to_vec_cost. */
781 1, /* vec_align_load_cost. */
782 2, /* vec_unalign_load_cost. */
783 1, /* vec_store_cost. */
784 3, /* cond_taken_branch_cost. */
785 1, /* cond_not_taken_branch_cost. */
786 };
787
788 /* K8 has optimized REP instruction for medium sized blocks, but for very
789 small blocks it is better to use loop. For large blocks, libcall can
790 do nontemporary accesses and beat inline considerably. */
791 static stringop_algs k8_memcpy[2] = {
792 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
793 {-1, rep_prefix_4_byte, false}}},
794 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
795 {-1, libcall, false}}}};
796 static stringop_algs k8_memset[2] = {
797 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
798 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
799 {libcall, {{48, unrolled_loop, false},
800 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
801 static const
802 struct processor_costs k8_cost = {
803 COSTS_N_INSNS (1), /* cost of an add instruction */
804 COSTS_N_INSNS (2), /* cost of a lea instruction */
805 COSTS_N_INSNS (1), /* variable shift costs */
806 COSTS_N_INSNS (1), /* constant shift costs */
807 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
808 COSTS_N_INSNS (4), /* HI */
809 COSTS_N_INSNS (3), /* SI */
810 COSTS_N_INSNS (4), /* DI */
811 COSTS_N_INSNS (5)}, /* other */
812 0, /* cost of multiply per each bit set */
813 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
814 COSTS_N_INSNS (26), /* HI */
815 COSTS_N_INSNS (42), /* SI */
816 COSTS_N_INSNS (74), /* DI */
817 COSTS_N_INSNS (74)}, /* other */
818 COSTS_N_INSNS (1), /* cost of movsx */
819 COSTS_N_INSNS (1), /* cost of movzx */
820 8, /* "large" insn */
821 9, /* MOVE_RATIO */
822 4, /* cost for loading QImode using movzbl */
823 {3, 4, 3}, /* cost of loading integer registers
824 in QImode, HImode and SImode.
825 Relative to reg-reg move (2). */
826 {3, 4, 3}, /* cost of storing integer registers */
827 4, /* cost of reg,reg fld/fst */
828 {4, 4, 12}, /* cost of loading fp registers
829 in SFmode, DFmode and XFmode */
830 {6, 6, 8}, /* cost of storing fp registers
831 in SFmode, DFmode and XFmode */
832 2, /* cost of moving MMX register */
833 {3, 3}, /* cost of loading MMX registers
834 in SImode and DImode */
835 {4, 4}, /* cost of storing MMX registers
836 in SImode and DImode */
837 2, /* cost of moving SSE register */
838 {4, 3, 6}, /* cost of loading SSE registers
839 in SImode, DImode and TImode */
840 {4, 4, 5}, /* cost of storing SSE registers
841 in SImode, DImode and TImode */
842 5, /* MMX or SSE register to integer */
843 64, /* size of l1 cache. */
844 512, /* size of l2 cache. */
845 64, /* size of prefetch block */
846 /* New AMD processors never drop prefetches; if they cannot be performed
847 immediately, they are queued. We set number of simultaneous prefetches
848 to a large constant to reflect this (it probably is not a good idea not
849 to limit number of prefetches at all, as their execution also takes some
850 time). */
851 100, /* number of parallel prefetches */
852 3, /* Branch cost */
853 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
854 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
855 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
856 COSTS_N_INSNS (2), /* cost of FABS instruction. */
857 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
858 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
859
860 k8_memcpy,
861 k8_memset,
862 4, /* scalar_stmt_cost. */
863 2, /* scalar load_cost. */
864 2, /* scalar_store_cost. */
865 5, /* vec_stmt_cost. */
866 0, /* vec_to_scalar_cost. */
867 2, /* scalar_to_vec_cost. */
868 2, /* vec_align_load_cost. */
869 3, /* vec_unalign_load_cost. */
870 3, /* vec_store_cost. */
871 3, /* cond_taken_branch_cost. */
872 2, /* cond_not_taken_branch_cost. */
873 };
874
875 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
876 very small blocks it is better to use loop. For large blocks, libcall can
877 do nontemporary accesses and beat inline considerably. */
878 static stringop_algs amdfam10_memcpy[2] = {
879 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
880 {-1, rep_prefix_4_byte, false}}},
881 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
882 {-1, libcall, false}}}};
883 static stringop_algs amdfam10_memset[2] = {
884 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
885 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
886 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
887 {-1, libcall, false}}}};
888 struct processor_costs amdfam10_cost = {
889 COSTS_N_INSNS (1), /* cost of an add instruction */
890 COSTS_N_INSNS (2), /* cost of a lea instruction */
891 COSTS_N_INSNS (1), /* variable shift costs */
892 COSTS_N_INSNS (1), /* constant shift costs */
893 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
894 COSTS_N_INSNS (4), /* HI */
895 COSTS_N_INSNS (3), /* SI */
896 COSTS_N_INSNS (4), /* DI */
897 COSTS_N_INSNS (5)}, /* other */
898 0, /* cost of multiply per each bit set */
899 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
900 COSTS_N_INSNS (35), /* HI */
901 COSTS_N_INSNS (51), /* SI */
902 COSTS_N_INSNS (83), /* DI */
903 COSTS_N_INSNS (83)}, /* other */
904 COSTS_N_INSNS (1), /* cost of movsx */
905 COSTS_N_INSNS (1), /* cost of movzx */
906 8, /* "large" insn */
907 9, /* MOVE_RATIO */
908 4, /* cost for loading QImode using movzbl */
909 {3, 4, 3}, /* cost of loading integer registers
910 in QImode, HImode and SImode.
911 Relative to reg-reg move (2). */
912 {3, 4, 3}, /* cost of storing integer registers */
913 4, /* cost of reg,reg fld/fst */
914 {4, 4, 12}, /* cost of loading fp registers
915 in SFmode, DFmode and XFmode */
916 {6, 6, 8}, /* cost of storing fp registers
917 in SFmode, DFmode and XFmode */
918 2, /* cost of moving MMX register */
919 {3, 3}, /* cost of loading MMX registers
920 in SImode and DImode */
921 {4, 4}, /* cost of storing MMX registers
922 in SImode and DImode */
923 2, /* cost of moving SSE register */
924 {4, 4, 3}, /* cost of loading SSE registers
925 in SImode, DImode and TImode */
926 {4, 4, 5}, /* cost of storing SSE registers
927 in SImode, DImode and TImode */
928 3, /* MMX or SSE register to integer */
929 /* On K8:
930 MOVD reg64, xmmreg Double FSTORE 4
931 MOVD reg32, xmmreg Double FSTORE 4
932 On AMDFAM10:
933 MOVD reg64, xmmreg Double FADD 3
934 1/1 1/1
935 MOVD reg32, xmmreg Double FADD 3
936 1/1 1/1 */
937 64, /* size of l1 cache. */
938 512, /* size of l2 cache. */
939 64, /* size of prefetch block */
940 /* New AMD processors never drop prefetches; if they cannot be performed
941 immediately, they are queued. We set number of simultaneous prefetches
942 to a large constant to reflect this (it probably is not a good idea not
943 to limit number of prefetches at all, as their execution also takes some
944 time). */
945 100, /* number of parallel prefetches */
946 2, /* Branch cost */
947 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
948 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
949 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
950 COSTS_N_INSNS (2), /* cost of FABS instruction. */
951 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
952 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
953
954 amdfam10_memcpy,
955 amdfam10_memset,
956 4, /* scalar_stmt_cost. */
957 2, /* scalar load_cost. */
958 2, /* scalar_store_cost. */
959 6, /* vec_stmt_cost. */
960 0, /* vec_to_scalar_cost. */
961 2, /* scalar_to_vec_cost. */
962 2, /* vec_align_load_cost. */
963 2, /* vec_unalign_load_cost. */
964 2, /* vec_store_cost. */
965 2, /* cond_taken_branch_cost. */
966 1, /* cond_not_taken_branch_cost. */
967 };
968
969 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
970 very small blocks it is better to use loop. For large blocks, libcall
971 can do nontemporary accesses and beat inline considerably. */
972 static stringop_algs bdver1_memcpy[2] = {
973 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
974 {-1, rep_prefix_4_byte, false}}},
975 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
976 {-1, libcall, false}}}};
977 static stringop_algs bdver1_memset[2] = {
978 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
979 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
980 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
981 {-1, libcall, false}}}};
982
983 const struct processor_costs bdver1_cost = {
984 COSTS_N_INSNS (1), /* cost of an add instruction */
985 COSTS_N_INSNS (1), /* cost of a lea instruction */
986 COSTS_N_INSNS (1), /* variable shift costs */
987 COSTS_N_INSNS (1), /* constant shift costs */
988 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
989 COSTS_N_INSNS (4), /* HI */
990 COSTS_N_INSNS (4), /* SI */
991 COSTS_N_INSNS (6), /* DI */
992 COSTS_N_INSNS (6)}, /* other */
993 0, /* cost of multiply per each bit set */
994 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
995 COSTS_N_INSNS (35), /* HI */
996 COSTS_N_INSNS (51), /* SI */
997 COSTS_N_INSNS (83), /* DI */
998 COSTS_N_INSNS (83)}, /* other */
999 COSTS_N_INSNS (1), /* cost of movsx */
1000 COSTS_N_INSNS (1), /* cost of movzx */
1001 8, /* "large" insn */
1002 9, /* MOVE_RATIO */
1003 4, /* cost for loading QImode using movzbl */
1004 {5, 5, 4}, /* cost of loading integer registers
1005 in QImode, HImode and SImode.
1006 Relative to reg-reg move (2). */
1007 {4, 4, 4}, /* cost of storing integer registers */
1008 2, /* cost of reg,reg fld/fst */
1009 {5, 5, 12}, /* cost of loading fp registers
1010 in SFmode, DFmode and XFmode */
1011 {4, 4, 8}, /* cost of storing fp registers
1012 in SFmode, DFmode and XFmode */
1013 2, /* cost of moving MMX register */
1014 {4, 4}, /* cost of loading MMX registers
1015 in SImode and DImode */
1016 {4, 4}, /* cost of storing MMX registers
1017 in SImode and DImode */
1018 2, /* cost of moving SSE register */
1019 {4, 4, 4}, /* cost of loading SSE registers
1020 in SImode, DImode and TImode */
1021 {4, 4, 4}, /* cost of storing SSE registers
1022 in SImode, DImode and TImode */
1023 2, /* MMX or SSE register to integer */
1024 /* On K8:
1025 MOVD reg64, xmmreg Double FSTORE 4
1026 MOVD reg32, xmmreg Double FSTORE 4
1027 On AMDFAM10:
1028 MOVD reg64, xmmreg Double FADD 3
1029 1/1 1/1
1030 MOVD reg32, xmmreg Double FADD 3
1031 1/1 1/1 */
1032 16, /* size of l1 cache. */
1033 2048, /* size of l2 cache. */
1034 64, /* size of prefetch block */
1035 /* New AMD processors never drop prefetches; if they cannot be performed
1036 immediately, they are queued. We set number of simultaneous prefetches
1037 to a large constant to reflect this (it probably is not a good idea not
1038 to limit number of prefetches at all, as their execution also takes some
1039 time). */
1040 100, /* number of parallel prefetches */
1041 2, /* Branch cost */
1042 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1043 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1044 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1045 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1046 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1047 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1048
1049 bdver1_memcpy,
1050 bdver1_memset,
1051 6, /* scalar_stmt_cost. */
1052 4, /* scalar load_cost. */
1053 4, /* scalar_store_cost. */
1054 6, /* vec_stmt_cost. */
1055 0, /* vec_to_scalar_cost. */
1056 2, /* scalar_to_vec_cost. */
1057 4, /* vec_align_load_cost. */
1058 4, /* vec_unalign_load_cost. */
1059 4, /* vec_store_cost. */
1060 4, /* cond_taken_branch_cost. */
1061 2, /* cond_not_taken_branch_cost. */
1062 };
1063
1064 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1065 very small blocks it is better to use loop. For large blocks, libcall
1066 can do nontemporary accesses and beat inline considerably. */
1067
1068 static stringop_algs bdver2_memcpy[2] = {
1069 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1070 {-1, rep_prefix_4_byte, false}}},
1071 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1072 {-1, libcall, false}}}};
1073 static stringop_algs bdver2_memset[2] = {
1074 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1075 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1076 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1077 {-1, libcall, false}}}};
1078
1079 const struct processor_costs bdver2_cost = {
1080 COSTS_N_INSNS (1), /* cost of an add instruction */
1081 COSTS_N_INSNS (1), /* cost of a lea instruction */
1082 COSTS_N_INSNS (1), /* variable shift costs */
1083 COSTS_N_INSNS (1), /* constant shift costs */
1084 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1085 COSTS_N_INSNS (4), /* HI */
1086 COSTS_N_INSNS (4), /* SI */
1087 COSTS_N_INSNS (6), /* DI */
1088 COSTS_N_INSNS (6)}, /* other */
1089 0, /* cost of multiply per each bit set */
1090 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1091 COSTS_N_INSNS (35), /* HI */
1092 COSTS_N_INSNS (51), /* SI */
1093 COSTS_N_INSNS (83), /* DI */
1094 COSTS_N_INSNS (83)}, /* other */
1095 COSTS_N_INSNS (1), /* cost of movsx */
1096 COSTS_N_INSNS (1), /* cost of movzx */
1097 8, /* "large" insn */
1098 9, /* MOVE_RATIO */
1099 4, /* cost for loading QImode using movzbl */
1100 {5, 5, 4}, /* cost of loading integer registers
1101 in QImode, HImode and SImode.
1102 Relative to reg-reg move (2). */
1103 {4, 4, 4}, /* cost of storing integer registers */
1104 2, /* cost of reg,reg fld/fst */
1105 {5, 5, 12}, /* cost of loading fp registers
1106 in SFmode, DFmode and XFmode */
1107 {4, 4, 8}, /* cost of storing fp registers
1108 in SFmode, DFmode and XFmode */
1109 2, /* cost of moving MMX register */
1110 {4, 4}, /* cost of loading MMX registers
1111 in SImode and DImode */
1112 {4, 4}, /* cost of storing MMX registers
1113 in SImode and DImode */
1114 2, /* cost of moving SSE register */
1115 {4, 4, 4}, /* cost of loading SSE registers
1116 in SImode, DImode and TImode */
1117 {4, 4, 4}, /* cost of storing SSE registers
1118 in SImode, DImode and TImode */
1119 2, /* MMX or SSE register to integer */
1120 /* On K8:
1121 MOVD reg64, xmmreg Double FSTORE 4
1122 MOVD reg32, xmmreg Double FSTORE 4
1123 On AMDFAM10:
1124 MOVD reg64, xmmreg Double FADD 3
1125 1/1 1/1
1126 MOVD reg32, xmmreg Double FADD 3
1127 1/1 1/1 */
1128 16, /* size of l1 cache. */
1129 2048, /* size of l2 cache. */
1130 64, /* size of prefetch block */
1131 /* New AMD processors never drop prefetches; if they cannot be performed
1132 immediately, they are queued. We set number of simultaneous prefetches
1133 to a large constant to reflect this (it probably is not a good idea not
1134 to limit number of prefetches at all, as their execution also takes some
1135 time). */
1136 100, /* number of parallel prefetches */
1137 2, /* Branch cost */
1138 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1139 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1140 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1141 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1142 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1143 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1144
1145 bdver2_memcpy,
1146 bdver2_memset,
1147 6, /* scalar_stmt_cost. */
1148 4, /* scalar load_cost. */
1149 4, /* scalar_store_cost. */
1150 6, /* vec_stmt_cost. */
1151 0, /* vec_to_scalar_cost. */
1152 2, /* scalar_to_vec_cost. */
1153 4, /* vec_align_load_cost. */
1154 4, /* vec_unalign_load_cost. */
1155 4, /* vec_store_cost. */
1156 4, /* cond_taken_branch_cost. */
1157 2, /* cond_not_taken_branch_cost. */
1158 };
1159
1160
1161 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1162 very small blocks it is better to use loop. For large blocks, libcall
1163 can do nontemporary accesses and beat inline considerably. */
1164 static stringop_algs bdver3_memcpy[2] = {
1165 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1166 {-1, rep_prefix_4_byte, false}}},
1167 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1168 {-1, libcall, false}}}};
1169 static stringop_algs bdver3_memset[2] = {
1170 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1171 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1172 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1173 {-1, libcall, false}}}};
1174 struct processor_costs bdver3_cost = {
1175 COSTS_N_INSNS (1), /* cost of an add instruction */
1176 COSTS_N_INSNS (1), /* cost of a lea instruction */
1177 COSTS_N_INSNS (1), /* variable shift costs */
1178 COSTS_N_INSNS (1), /* constant shift costs */
1179 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1180 COSTS_N_INSNS (4), /* HI */
1181 COSTS_N_INSNS (4), /* SI */
1182 COSTS_N_INSNS (6), /* DI */
1183 COSTS_N_INSNS (6)}, /* other */
1184 0, /* cost of multiply per each bit set */
1185 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1186 COSTS_N_INSNS (35), /* HI */
1187 COSTS_N_INSNS (51), /* SI */
1188 COSTS_N_INSNS (83), /* DI */
1189 COSTS_N_INSNS (83)}, /* other */
1190 COSTS_N_INSNS (1), /* cost of movsx */
1191 COSTS_N_INSNS (1), /* cost of movzx */
1192 8, /* "large" insn */
1193 9, /* MOVE_RATIO */
1194 4, /* cost for loading QImode using movzbl */
1195 {5, 5, 4}, /* cost of loading integer registers
1196 in QImode, HImode and SImode.
1197 Relative to reg-reg move (2). */
1198 {4, 4, 4}, /* cost of storing integer registers */
1199 2, /* cost of reg,reg fld/fst */
1200 {5, 5, 12}, /* cost of loading fp registers
1201 in SFmode, DFmode and XFmode */
1202 {4, 4, 8}, /* cost of storing fp registers
1203 in SFmode, DFmode and XFmode */
1204 2, /* cost of moving MMX register */
1205 {4, 4}, /* cost of loading MMX registers
1206 in SImode and DImode */
1207 {4, 4}, /* cost of storing MMX registers
1208 in SImode and DImode */
1209 2, /* cost of moving SSE register */
1210 {4, 4, 4}, /* cost of loading SSE registers
1211 in SImode, DImode and TImode */
1212 {4, 4, 4}, /* cost of storing SSE registers
1213 in SImode, DImode and TImode */
1214 2, /* MMX or SSE register to integer */
1215 16, /* size of l1 cache. */
1216 2048, /* size of l2 cache. */
1217 64, /* size of prefetch block */
1218 /* New AMD processors never drop prefetches; if they cannot be performed
1219 immediately, they are queued. We set number of simultaneous prefetches
1220 to a large constant to reflect this (it probably is not a good idea not
1221 to limit number of prefetches at all, as their execution also takes some
1222 time). */
1223 100, /* number of parallel prefetches */
1224 2, /* Branch cost */
1225 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1226 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1227 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1228 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1229 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1230 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1231
1232 bdver3_memcpy,
1233 bdver3_memset,
1234 6, /* scalar_stmt_cost. */
1235 4, /* scalar load_cost. */
1236 4, /* scalar_store_cost. */
1237 6, /* vec_stmt_cost. */
1238 0, /* vec_to_scalar_cost. */
1239 2, /* scalar_to_vec_cost. */
1240 4, /* vec_align_load_cost. */
1241 4, /* vec_unalign_load_cost. */
1242 4, /* vec_store_cost. */
1243 4, /* cond_taken_branch_cost. */
1244 2, /* cond_not_taken_branch_cost. */
1245 };
1246
1247 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1248 very small blocks it is better to use loop. For large blocks, libcall
1249 can do nontemporary accesses and beat inline considerably. */
1250 static stringop_algs bdver4_memcpy[2] = {
1251 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1252 {-1, rep_prefix_4_byte, false}}},
1253 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1254 {-1, libcall, false}}}};
1255 static stringop_algs bdver4_memset[2] = {
1256 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1257 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1258 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1259 {-1, libcall, false}}}};
1260 struct processor_costs bdver4_cost = {
1261 COSTS_N_INSNS (1), /* cost of an add instruction */
1262 COSTS_N_INSNS (1), /* cost of a lea instruction */
1263 COSTS_N_INSNS (1), /* variable shift costs */
1264 COSTS_N_INSNS (1), /* constant shift costs */
1265 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1266 COSTS_N_INSNS (4), /* HI */
1267 COSTS_N_INSNS (4), /* SI */
1268 COSTS_N_INSNS (6), /* DI */
1269 COSTS_N_INSNS (6)}, /* other */
1270 0, /* cost of multiply per each bit set */
1271 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1272 COSTS_N_INSNS (35), /* HI */
1273 COSTS_N_INSNS (51), /* SI */
1274 COSTS_N_INSNS (83), /* DI */
1275 COSTS_N_INSNS (83)}, /* other */
1276 COSTS_N_INSNS (1), /* cost of movsx */
1277 COSTS_N_INSNS (1), /* cost of movzx */
1278 8, /* "large" insn */
1279 9, /* MOVE_RATIO */
1280 4, /* cost for loading QImode using movzbl */
1281 {5, 5, 4}, /* cost of loading integer registers
1282 in QImode, HImode and SImode.
1283 Relative to reg-reg move (2). */
1284 {4, 4, 4}, /* cost of storing integer registers */
1285 2, /* cost of reg,reg fld/fst */
1286 {5, 5, 12}, /* cost of loading fp registers
1287 in SFmode, DFmode and XFmode */
1288 {4, 4, 8}, /* cost of storing fp registers
1289 in SFmode, DFmode and XFmode */
1290 2, /* cost of moving MMX register */
1291 {4, 4}, /* cost of loading MMX registers
1292 in SImode and DImode */
1293 {4, 4}, /* cost of storing MMX registers
1294 in SImode and DImode */
1295 2, /* cost of moving SSE register */
1296 {4, 4, 4}, /* cost of loading SSE registers
1297 in SImode, DImode and TImode */
1298 {4, 4, 4}, /* cost of storing SSE registers
1299 in SImode, DImode and TImode */
1300 2, /* MMX or SSE register to integer */
1301 16, /* size of l1 cache. */
1302 2048, /* size of l2 cache. */
1303 64, /* size of prefetch block */
1304 /* New AMD processors never drop prefetches; if they cannot be performed
1305 immediately, they are queued. We set number of simultaneous prefetches
1306 to a large constant to reflect this (it probably is not a good idea not
1307 to limit number of prefetches at all, as their execution also takes some
1308 time). */
1309 100, /* number of parallel prefetches */
1310 2, /* Branch cost */
1311 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1312 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1313 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1314 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1315 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1316 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1317
1318 bdver4_memcpy,
1319 bdver4_memset,
1320 6, /* scalar_stmt_cost. */
1321 4, /* scalar load_cost. */
1322 4, /* scalar_store_cost. */
1323 6, /* vec_stmt_cost. */
1324 0, /* vec_to_scalar_cost. */
1325 2, /* scalar_to_vec_cost. */
1326 4, /* vec_align_load_cost. */
1327 4, /* vec_unalign_load_cost. */
1328 4, /* vec_store_cost. */
1329 4, /* cond_taken_branch_cost. */
1330 2, /* cond_not_taken_branch_cost. */
1331 };
1332
1333
1334 /* ZNVER1 has optimized REP instruction for medium sized blocks, but for
1335 very small blocks it is better to use loop. For large blocks, libcall
1336 can do nontemporary accesses and beat inline considerably. */
1337 static stringop_algs znver1_memcpy[2] = {
1338 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1339 {-1, rep_prefix_4_byte, false}}},
1340 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1341 {-1, libcall, false}}}};
1342 static stringop_algs znver1_memset[2] = {
1343 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1344 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1345 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1346 {-1, libcall, false}}}};
1347 struct processor_costs znver1_cost = {
1348 COSTS_N_INSNS (1), /* cost of an add instruction. */
1349 COSTS_N_INSNS (1), /* cost of a lea instruction. */
1350 COSTS_N_INSNS (1), /* variable shift costs. */
1351 COSTS_N_INSNS (1), /* constant shift costs. */
1352 {COSTS_N_INSNS (4), /* cost of starting multiply for QI. */
1353 COSTS_N_INSNS (4), /* HI. */
1354 COSTS_N_INSNS (4), /* SI. */
1355 COSTS_N_INSNS (6), /* DI. */
1356 COSTS_N_INSNS (6)}, /* other. */
1357 0, /* cost of multiply per each bit
1358 set. */
1359 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI. */
1360 COSTS_N_INSNS (35), /* HI. */
1361 COSTS_N_INSNS (51), /* SI. */
1362 COSTS_N_INSNS (83), /* DI. */
1363 COSTS_N_INSNS (83)}, /* other. */
1364 COSTS_N_INSNS (1), /* cost of movsx. */
1365 COSTS_N_INSNS (1), /* cost of movzx. */
1366 8, /* "large" insn. */
1367 9, /* MOVE_RATIO. */
1368 4, /* cost for loading QImode using
1369 movzbl. */
1370 {5, 5, 4}, /* cost of loading integer registers
1371 in QImode, HImode and SImode.
1372 Relative to reg-reg move (2). */
1373 {4, 4, 4}, /* cost of storing integer
1374 registers. */
1375 2, /* cost of reg,reg fld/fst. */
1376 {5, 5, 12}, /* cost of loading fp registers
1377 in SFmode, DFmode and XFmode. */
1378 {4, 4, 8}, /* cost of storing fp registers
1379 in SFmode, DFmode and XFmode. */
1380 2, /* cost of moving MMX register. */
1381 {4, 4}, /* cost of loading MMX registers
1382 in SImode and DImode. */
1383 {4, 4}, /* cost of storing MMX registers
1384 in SImode and DImode. */
1385 2, /* cost of moving SSE register. */
1386 {4, 4, 4}, /* cost of loading SSE registers
1387 in SImode, DImode and TImode. */
1388 {4, 4, 4}, /* cost of storing SSE registers
1389 in SImode, DImode and TImode. */
1390 2, /* MMX or SSE register to integer. */
1391 32, /* size of l1 cache. */
1392 512, /* size of l2 cache. */
1393 64, /* size of prefetch block. */
1394 /* New AMD processors never drop prefetches; if they cannot be performed
1395 immediately, they are queued. We set number of simultaneous prefetches
1396 to a large constant to reflect this (it probably is not a good idea not
1397 to limit number of prefetches at all, as their execution also takes some
1398 time). */
1399 100, /* number of parallel prefetches. */
1400 2, /* Branch cost. */
1401 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1402 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1403 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1404 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1405 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1406 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1407
1408 znver1_memcpy,
1409 znver1_memset,
1410 6, /* scalar_stmt_cost. */
1411 4, /* scalar load_cost. */
1412 4, /* scalar_store_cost. */
1413 6, /* vec_stmt_cost. */
1414 0, /* vec_to_scalar_cost. */
1415 2, /* scalar_to_vec_cost. */
1416 4, /* vec_align_load_cost. */
1417 4, /* vec_unalign_load_cost. */
1418 4, /* vec_store_cost. */
1419 4, /* cond_taken_branch_cost. */
1420 2, /* cond_not_taken_branch_cost. */
1421 };
1422
1423 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1424 very small blocks it is better to use loop. For large blocks, libcall can
1425 do nontemporary accesses and beat inline considerably. */
1426 static stringop_algs btver1_memcpy[2] = {
1427 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1428 {-1, rep_prefix_4_byte, false}}},
1429 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1430 {-1, libcall, false}}}};
1431 static stringop_algs btver1_memset[2] = {
1432 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1433 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1434 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1435 {-1, libcall, false}}}};
1436 const struct processor_costs btver1_cost = {
1437 COSTS_N_INSNS (1), /* cost of an add instruction */
1438 COSTS_N_INSNS (2), /* cost of a lea instruction */
1439 COSTS_N_INSNS (1), /* variable shift costs */
1440 COSTS_N_INSNS (1), /* constant shift costs */
1441 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1442 COSTS_N_INSNS (4), /* HI */
1443 COSTS_N_INSNS (3), /* SI */
1444 COSTS_N_INSNS (4), /* DI */
1445 COSTS_N_INSNS (5)}, /* other */
1446 0, /* cost of multiply per each bit set */
1447 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1448 COSTS_N_INSNS (35), /* HI */
1449 COSTS_N_INSNS (51), /* SI */
1450 COSTS_N_INSNS (83), /* DI */
1451 COSTS_N_INSNS (83)}, /* other */
1452 COSTS_N_INSNS (1), /* cost of movsx */
1453 COSTS_N_INSNS (1), /* cost of movzx */
1454 8, /* "large" insn */
1455 9, /* MOVE_RATIO */
1456 4, /* cost for loading QImode using movzbl */
1457 {3, 4, 3}, /* cost of loading integer registers
1458 in QImode, HImode and SImode.
1459 Relative to reg-reg move (2). */
1460 {3, 4, 3}, /* cost of storing integer registers */
1461 4, /* cost of reg,reg fld/fst */
1462 {4, 4, 12}, /* cost of loading fp registers
1463 in SFmode, DFmode and XFmode */
1464 {6, 6, 8}, /* cost of storing fp registers
1465 in SFmode, DFmode and XFmode */
1466 2, /* cost of moving MMX register */
1467 {3, 3}, /* cost of loading MMX registers
1468 in SImode and DImode */
1469 {4, 4}, /* cost of storing MMX registers
1470 in SImode and DImode */
1471 2, /* cost of moving SSE register */
1472 {4, 4, 3}, /* cost of loading SSE registers
1473 in SImode, DImode and TImode */
1474 {4, 4, 5}, /* cost of storing SSE registers
1475 in SImode, DImode and TImode */
1476 3, /* MMX or SSE register to integer */
1477 /* On K8:
1478 MOVD reg64, xmmreg Double FSTORE 4
1479 MOVD reg32, xmmreg Double FSTORE 4
1480 On AMDFAM10:
1481 MOVD reg64, xmmreg Double FADD 3
1482 1/1 1/1
1483 MOVD reg32, xmmreg Double FADD 3
1484 1/1 1/1 */
1485 32, /* size of l1 cache. */
1486 512, /* size of l2 cache. */
1487 64, /* size of prefetch block */
1488 100, /* number of parallel prefetches */
1489 2, /* Branch cost */
1490 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1491 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1492 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1493 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1494 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1495 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1496
1497 btver1_memcpy,
1498 btver1_memset,
1499 4, /* scalar_stmt_cost. */
1500 2, /* scalar load_cost. */
1501 2, /* scalar_store_cost. */
1502 6, /* vec_stmt_cost. */
1503 0, /* vec_to_scalar_cost. */
1504 2, /* scalar_to_vec_cost. */
1505 2, /* vec_align_load_cost. */
1506 2, /* vec_unalign_load_cost. */
1507 2, /* vec_store_cost. */
1508 2, /* cond_taken_branch_cost. */
1509 1, /* cond_not_taken_branch_cost. */
1510 };
1511
1512 static stringop_algs btver2_memcpy[2] = {
1513 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1514 {-1, rep_prefix_4_byte, false}}},
1515 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1516 {-1, libcall, false}}}};
1517 static stringop_algs btver2_memset[2] = {
1518 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1519 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1520 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1521 {-1, libcall, false}}}};
1522 const struct processor_costs btver2_cost = {
1523 COSTS_N_INSNS (1), /* cost of an add instruction */
1524 COSTS_N_INSNS (2), /* cost of a lea instruction */
1525 COSTS_N_INSNS (1), /* variable shift costs */
1526 COSTS_N_INSNS (1), /* constant shift costs */
1527 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1528 COSTS_N_INSNS (4), /* HI */
1529 COSTS_N_INSNS (3), /* SI */
1530 COSTS_N_INSNS (4), /* DI */
1531 COSTS_N_INSNS (5)}, /* other */
1532 0, /* cost of multiply per each bit set */
1533 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1534 COSTS_N_INSNS (35), /* HI */
1535 COSTS_N_INSNS (51), /* SI */
1536 COSTS_N_INSNS (83), /* DI */
1537 COSTS_N_INSNS (83)}, /* other */
1538 COSTS_N_INSNS (1), /* cost of movsx */
1539 COSTS_N_INSNS (1), /* cost of movzx */
1540 8, /* "large" insn */
1541 9, /* MOVE_RATIO */
1542 4, /* cost for loading QImode using movzbl */
1543 {3, 4, 3}, /* cost of loading integer registers
1544 in QImode, HImode and SImode.
1545 Relative to reg-reg move (2). */
1546 {3, 4, 3}, /* cost of storing integer registers */
1547 4, /* cost of reg,reg fld/fst */
1548 {4, 4, 12}, /* cost of loading fp registers
1549 in SFmode, DFmode and XFmode */
1550 {6, 6, 8}, /* cost of storing fp registers
1551 in SFmode, DFmode and XFmode */
1552 2, /* cost of moving MMX register */
1553 {3, 3}, /* cost of loading MMX registers
1554 in SImode and DImode */
1555 {4, 4}, /* cost of storing MMX registers
1556 in SImode and DImode */
1557 2, /* cost of moving SSE register */
1558 {4, 4, 3}, /* cost of loading SSE registers
1559 in SImode, DImode and TImode */
1560 {4, 4, 5}, /* cost of storing SSE registers
1561 in SImode, DImode and TImode */
1562 3, /* MMX or SSE register to integer */
1563 /* On K8:
1564 MOVD reg64, xmmreg Double FSTORE 4
1565 MOVD reg32, xmmreg Double FSTORE 4
1566 On AMDFAM10:
1567 MOVD reg64, xmmreg Double FADD 3
1568 1/1 1/1
1569 MOVD reg32, xmmreg Double FADD 3
1570 1/1 1/1 */
1571 32, /* size of l1 cache. */
1572 2048, /* size of l2 cache. */
1573 64, /* size of prefetch block */
1574 100, /* number of parallel prefetches */
1575 2, /* Branch cost */
1576 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1577 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1578 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1579 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1580 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1581 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1582 btver2_memcpy,
1583 btver2_memset,
1584 4, /* scalar_stmt_cost. */
1585 2, /* scalar load_cost. */
1586 2, /* scalar_store_cost. */
1587 6, /* vec_stmt_cost. */
1588 0, /* vec_to_scalar_cost. */
1589 2, /* scalar_to_vec_cost. */
1590 2, /* vec_align_load_cost. */
1591 2, /* vec_unalign_load_cost. */
1592 2, /* vec_store_cost. */
1593 2, /* cond_taken_branch_cost. */
1594 1, /* cond_not_taken_branch_cost. */
1595 };
1596
1597 static stringop_algs pentium4_memcpy[2] = {
1598 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1599 DUMMY_STRINGOP_ALGS};
1600 static stringop_algs pentium4_memset[2] = {
1601 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1602 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1603 DUMMY_STRINGOP_ALGS};
1604
1605 static const
1606 struct processor_costs pentium4_cost = {
1607 COSTS_N_INSNS (1), /* cost of an add instruction */
1608 COSTS_N_INSNS (3), /* cost of a lea instruction */
1609 COSTS_N_INSNS (4), /* variable shift costs */
1610 COSTS_N_INSNS (4), /* constant shift costs */
1611 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1612 COSTS_N_INSNS (15), /* HI */
1613 COSTS_N_INSNS (15), /* SI */
1614 COSTS_N_INSNS (15), /* DI */
1615 COSTS_N_INSNS (15)}, /* other */
1616 0, /* cost of multiply per each bit set */
1617 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1618 COSTS_N_INSNS (56), /* HI */
1619 COSTS_N_INSNS (56), /* SI */
1620 COSTS_N_INSNS (56), /* DI */
1621 COSTS_N_INSNS (56)}, /* other */
1622 COSTS_N_INSNS (1), /* cost of movsx */
1623 COSTS_N_INSNS (1), /* cost of movzx */
1624 16, /* "large" insn */
1625 6, /* MOVE_RATIO */
1626 2, /* cost for loading QImode using movzbl */
1627 {4, 5, 4}, /* cost of loading integer registers
1628 in QImode, HImode and SImode.
1629 Relative to reg-reg move (2). */
1630 {2, 3, 2}, /* cost of storing integer registers */
1631 2, /* cost of reg,reg fld/fst */
1632 {2, 2, 6}, /* cost of loading fp registers
1633 in SFmode, DFmode and XFmode */
1634 {4, 4, 6}, /* cost of storing fp registers
1635 in SFmode, DFmode and XFmode */
1636 2, /* cost of moving MMX register */
1637 {2, 2}, /* cost of loading MMX registers
1638 in SImode and DImode */
1639 {2, 2}, /* cost of storing MMX registers
1640 in SImode and DImode */
1641 12, /* cost of moving SSE register */
1642 {12, 12, 12}, /* cost of loading SSE registers
1643 in SImode, DImode and TImode */
1644 {2, 2, 8}, /* cost of storing SSE registers
1645 in SImode, DImode and TImode */
1646 10, /* MMX or SSE register to integer */
1647 8, /* size of l1 cache. */
1648 256, /* size of l2 cache. */
1649 64, /* size of prefetch block */
1650 6, /* number of parallel prefetches */
1651 2, /* Branch cost */
1652 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1653 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1654 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1655 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1656 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1657 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1658 pentium4_memcpy,
1659 pentium4_memset,
1660 1, /* scalar_stmt_cost. */
1661 1, /* scalar load_cost. */
1662 1, /* scalar_store_cost. */
1663 1, /* vec_stmt_cost. */
1664 1, /* vec_to_scalar_cost. */
1665 1, /* scalar_to_vec_cost. */
1666 1, /* vec_align_load_cost. */
1667 2, /* vec_unalign_load_cost. */
1668 1, /* vec_store_cost. */
1669 3, /* cond_taken_branch_cost. */
1670 1, /* cond_not_taken_branch_cost. */
1671 };
1672
1673 static stringop_algs nocona_memcpy[2] = {
1674 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1675 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1676 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1677
1678 static stringop_algs nocona_memset[2] = {
1679 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1680 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1681 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1682 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1683
1684 static const
1685 struct processor_costs nocona_cost = {
1686 COSTS_N_INSNS (1), /* cost of an add instruction */
1687 COSTS_N_INSNS (1), /* cost of a lea instruction */
1688 COSTS_N_INSNS (1), /* variable shift costs */
1689 COSTS_N_INSNS (1), /* constant shift costs */
1690 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1691 COSTS_N_INSNS (10), /* HI */
1692 COSTS_N_INSNS (10), /* SI */
1693 COSTS_N_INSNS (10), /* DI */
1694 COSTS_N_INSNS (10)}, /* other */
1695 0, /* cost of multiply per each bit set */
1696 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1697 COSTS_N_INSNS (66), /* HI */
1698 COSTS_N_INSNS (66), /* SI */
1699 COSTS_N_INSNS (66), /* DI */
1700 COSTS_N_INSNS (66)}, /* other */
1701 COSTS_N_INSNS (1), /* cost of movsx */
1702 COSTS_N_INSNS (1), /* cost of movzx */
1703 16, /* "large" insn */
1704 17, /* MOVE_RATIO */
1705 4, /* cost for loading QImode using movzbl */
1706 {4, 4, 4}, /* cost of loading integer registers
1707 in QImode, HImode and SImode.
1708 Relative to reg-reg move (2). */
1709 {4, 4, 4}, /* cost of storing integer registers */
1710 3, /* cost of reg,reg fld/fst */
1711 {12, 12, 12}, /* cost of loading fp registers
1712 in SFmode, DFmode and XFmode */
1713 {4, 4, 4}, /* cost of storing fp registers
1714 in SFmode, DFmode and XFmode */
1715 6, /* cost of moving MMX register */
1716 {12, 12}, /* cost of loading MMX registers
1717 in SImode and DImode */
1718 {12, 12}, /* cost of storing MMX registers
1719 in SImode and DImode */
1720 6, /* cost of moving SSE register */
1721 {12, 12, 12}, /* cost of loading SSE registers
1722 in SImode, DImode and TImode */
1723 {12, 12, 12}, /* cost of storing SSE registers
1724 in SImode, DImode and TImode */
1725 8, /* MMX or SSE register to integer */
1726 8, /* size of l1 cache. */
1727 1024, /* size of l2 cache. */
1728 64, /* size of prefetch block */
1729 8, /* number of parallel prefetches */
1730 1, /* Branch cost */
1731 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1732 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1733 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1734 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1735 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1736 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1737 nocona_memcpy,
1738 nocona_memset,
1739 1, /* scalar_stmt_cost. */
1740 1, /* scalar load_cost. */
1741 1, /* scalar_store_cost. */
1742 1, /* vec_stmt_cost. */
1743 1, /* vec_to_scalar_cost. */
1744 1, /* scalar_to_vec_cost. */
1745 1, /* vec_align_load_cost. */
1746 2, /* vec_unalign_load_cost. */
1747 1, /* vec_store_cost. */
1748 3, /* cond_taken_branch_cost. */
1749 1, /* cond_not_taken_branch_cost. */
1750 };
1751
1752 static stringop_algs atom_memcpy[2] = {
1753 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1754 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1755 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1756 static stringop_algs atom_memset[2] = {
1757 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1758 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1759 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1760 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1761 static const
1762 struct processor_costs atom_cost = {
1763 COSTS_N_INSNS (1), /* cost of an add instruction */
1764 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1765 COSTS_N_INSNS (1), /* variable shift costs */
1766 COSTS_N_INSNS (1), /* constant shift costs */
1767 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1768 COSTS_N_INSNS (4), /* HI */
1769 COSTS_N_INSNS (3), /* SI */
1770 COSTS_N_INSNS (4), /* DI */
1771 COSTS_N_INSNS (2)}, /* other */
1772 0, /* cost of multiply per each bit set */
1773 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1774 COSTS_N_INSNS (26), /* HI */
1775 COSTS_N_INSNS (42), /* SI */
1776 COSTS_N_INSNS (74), /* DI */
1777 COSTS_N_INSNS (74)}, /* other */
1778 COSTS_N_INSNS (1), /* cost of movsx */
1779 COSTS_N_INSNS (1), /* cost of movzx */
1780 8, /* "large" insn */
1781 17, /* MOVE_RATIO */
1782 4, /* cost for loading QImode using movzbl */
1783 {4, 4, 4}, /* cost of loading integer registers
1784 in QImode, HImode and SImode.
1785 Relative to reg-reg move (2). */
1786 {4, 4, 4}, /* cost of storing integer registers */
1787 4, /* cost of reg,reg fld/fst */
1788 {12, 12, 12}, /* cost of loading fp registers
1789 in SFmode, DFmode and XFmode */
1790 {6, 6, 8}, /* cost of storing fp registers
1791 in SFmode, DFmode and XFmode */
1792 2, /* cost of moving MMX register */
1793 {8, 8}, /* cost of loading MMX registers
1794 in SImode and DImode */
1795 {8, 8}, /* cost of storing MMX registers
1796 in SImode and DImode */
1797 2, /* cost of moving SSE register */
1798 {8, 8, 8}, /* cost of loading SSE registers
1799 in SImode, DImode and TImode */
1800 {8, 8, 8}, /* cost of storing SSE registers
1801 in SImode, DImode and TImode */
1802 5, /* MMX or SSE register to integer */
1803 32, /* size of l1 cache. */
1804 256, /* size of l2 cache. */
1805 64, /* size of prefetch block */
1806 6, /* number of parallel prefetches */
1807 3, /* Branch cost */
1808 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1809 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1810 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1811 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1812 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1813 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1814 atom_memcpy,
1815 atom_memset,
1816 1, /* scalar_stmt_cost. */
1817 1, /* scalar load_cost. */
1818 1, /* scalar_store_cost. */
1819 1, /* vec_stmt_cost. */
1820 1, /* vec_to_scalar_cost. */
1821 1, /* scalar_to_vec_cost. */
1822 1, /* vec_align_load_cost. */
1823 2, /* vec_unalign_load_cost. */
1824 1, /* vec_store_cost. */
1825 3, /* cond_taken_branch_cost. */
1826 1, /* cond_not_taken_branch_cost. */
1827 };
1828
1829 static stringop_algs slm_memcpy[2] = {
1830 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1831 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1832 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1833 static stringop_algs slm_memset[2] = {
1834 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1835 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1836 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1837 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1838 static const
1839 struct processor_costs slm_cost = {
1840 COSTS_N_INSNS (1), /* cost of an add instruction */
1841 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1842 COSTS_N_INSNS (1), /* variable shift costs */
1843 COSTS_N_INSNS (1), /* constant shift costs */
1844 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1845 COSTS_N_INSNS (3), /* HI */
1846 COSTS_N_INSNS (3), /* SI */
1847 COSTS_N_INSNS (4), /* DI */
1848 COSTS_N_INSNS (2)}, /* other */
1849 0, /* cost of multiply per each bit set */
1850 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1851 COSTS_N_INSNS (26), /* HI */
1852 COSTS_N_INSNS (42), /* SI */
1853 COSTS_N_INSNS (74), /* DI */
1854 COSTS_N_INSNS (74)}, /* other */
1855 COSTS_N_INSNS (1), /* cost of movsx */
1856 COSTS_N_INSNS (1), /* cost of movzx */
1857 8, /* "large" insn */
1858 17, /* MOVE_RATIO */
1859 4, /* cost for loading QImode using movzbl */
1860 {4, 4, 4}, /* cost of loading integer registers
1861 in QImode, HImode and SImode.
1862 Relative to reg-reg move (2). */
1863 {4, 4, 4}, /* cost of storing integer registers */
1864 4, /* cost of reg,reg fld/fst */
1865 {12, 12, 12}, /* cost of loading fp registers
1866 in SFmode, DFmode and XFmode */
1867 {6, 6, 8}, /* cost of storing fp registers
1868 in SFmode, DFmode and XFmode */
1869 2, /* cost of moving MMX register */
1870 {8, 8}, /* cost of loading MMX registers
1871 in SImode and DImode */
1872 {8, 8}, /* cost of storing MMX registers
1873 in SImode and DImode */
1874 2, /* cost of moving SSE register */
1875 {8, 8, 8}, /* cost of loading SSE registers
1876 in SImode, DImode and TImode */
1877 {8, 8, 8}, /* cost of storing SSE registers
1878 in SImode, DImode and TImode */
1879 5, /* MMX or SSE register to integer */
1880 32, /* size of l1 cache. */
1881 256, /* size of l2 cache. */
1882 64, /* size of prefetch block */
1883 6, /* number of parallel prefetches */
1884 3, /* Branch cost */
1885 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1886 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1887 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1888 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1889 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1890 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1891 slm_memcpy,
1892 slm_memset,
1893 1, /* scalar_stmt_cost. */
1894 1, /* scalar load_cost. */
1895 1, /* scalar_store_cost. */
1896 1, /* vec_stmt_cost. */
1897 4, /* vec_to_scalar_cost. */
1898 1, /* scalar_to_vec_cost. */
1899 1, /* vec_align_load_cost. */
1900 2, /* vec_unalign_load_cost. */
1901 1, /* vec_store_cost. */
1902 3, /* cond_taken_branch_cost. */
1903 1, /* cond_not_taken_branch_cost. */
1904 };
1905
1906 static stringop_algs intel_memcpy[2] = {
1907 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1908 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1909 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1910 static stringop_algs intel_memset[2] = {
1911 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1912 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1913 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1914 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1915 static const
1916 struct processor_costs intel_cost = {
1917 COSTS_N_INSNS (1), /* cost of an add instruction */
1918 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1919 COSTS_N_INSNS (1), /* variable shift costs */
1920 COSTS_N_INSNS (1), /* constant shift costs */
1921 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1922 COSTS_N_INSNS (3), /* HI */
1923 COSTS_N_INSNS (3), /* SI */
1924 COSTS_N_INSNS (4), /* DI */
1925 COSTS_N_INSNS (2)}, /* other */
1926 0, /* cost of multiply per each bit set */
1927 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1928 COSTS_N_INSNS (26), /* HI */
1929 COSTS_N_INSNS (42), /* SI */
1930 COSTS_N_INSNS (74), /* DI */
1931 COSTS_N_INSNS (74)}, /* other */
1932 COSTS_N_INSNS (1), /* cost of movsx */
1933 COSTS_N_INSNS (1), /* cost of movzx */
1934 8, /* "large" insn */
1935 17, /* MOVE_RATIO */
1936 4, /* cost for loading QImode using movzbl */
1937 {4, 4, 4}, /* cost of loading integer registers
1938 in QImode, HImode and SImode.
1939 Relative to reg-reg move (2). */
1940 {4, 4, 4}, /* cost of storing integer registers */
1941 4, /* cost of reg,reg fld/fst */
1942 {12, 12, 12}, /* cost of loading fp registers
1943 in SFmode, DFmode and XFmode */
1944 {6, 6, 8}, /* cost of storing fp registers
1945 in SFmode, DFmode and XFmode */
1946 2, /* cost of moving MMX register */
1947 {8, 8}, /* cost of loading MMX registers
1948 in SImode and DImode */
1949 {8, 8}, /* cost of storing MMX registers
1950 in SImode and DImode */
1951 2, /* cost of moving SSE register */
1952 {8, 8, 8}, /* cost of loading SSE registers
1953 in SImode, DImode and TImode */
1954 {8, 8, 8}, /* cost of storing SSE registers
1955 in SImode, DImode and TImode */
1956 5, /* MMX or SSE register to integer */
1957 32, /* size of l1 cache. */
1958 256, /* size of l2 cache. */
1959 64, /* size of prefetch block */
1960 6, /* number of parallel prefetches */
1961 3, /* Branch cost */
1962 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1963 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1964 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1965 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1966 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1967 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1968 intel_memcpy,
1969 intel_memset,
1970 1, /* scalar_stmt_cost. */
1971 1, /* scalar load_cost. */
1972 1, /* scalar_store_cost. */
1973 1, /* vec_stmt_cost. */
1974 4, /* vec_to_scalar_cost. */
1975 1, /* scalar_to_vec_cost. */
1976 1, /* vec_align_load_cost. */
1977 2, /* vec_unalign_load_cost. */
1978 1, /* vec_store_cost. */
1979 3, /* cond_taken_branch_cost. */
1980 1, /* cond_not_taken_branch_cost. */
1981 };
1982
1983 /* Generic should produce code tuned for Core-i7 (and newer chips)
1984 and btver1 (and newer chips). */
1985
1986 static stringop_algs generic_memcpy[2] = {
1987 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1988 {-1, libcall, false}}},
1989 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1990 {-1, libcall, false}}}};
1991 static stringop_algs generic_memset[2] = {
1992 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1993 {-1, libcall, false}}},
1994 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1995 {-1, libcall, false}}}};
1996 static const
1997 struct processor_costs generic_cost = {
1998 COSTS_N_INSNS (1), /* cost of an add instruction */
1999 /* On all chips taken into consideration lea is 2 cycles and more. With
2000 this cost however our current implementation of synth_mult results in
2001 use of unnecessary temporary registers causing regression on several
2002 SPECfp benchmarks. */
2003 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
2004 COSTS_N_INSNS (1), /* variable shift costs */
2005 COSTS_N_INSNS (1), /* constant shift costs */
2006 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
2007 COSTS_N_INSNS (4), /* HI */
2008 COSTS_N_INSNS (3), /* SI */
2009 COSTS_N_INSNS (4), /* DI */
2010 COSTS_N_INSNS (2)}, /* other */
2011 0, /* cost of multiply per each bit set */
2012 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
2013 COSTS_N_INSNS (26), /* HI */
2014 COSTS_N_INSNS (42), /* SI */
2015 COSTS_N_INSNS (74), /* DI */
2016 COSTS_N_INSNS (74)}, /* other */
2017 COSTS_N_INSNS (1), /* cost of movsx */
2018 COSTS_N_INSNS (1), /* cost of movzx */
2019 8, /* "large" insn */
2020 17, /* MOVE_RATIO */
2021 4, /* cost for loading QImode using movzbl */
2022 {4, 4, 4}, /* cost of loading integer registers
2023 in QImode, HImode and SImode.
2024 Relative to reg-reg move (2). */
2025 {4, 4, 4}, /* cost of storing integer registers */
2026 4, /* cost of reg,reg fld/fst */
2027 {12, 12, 12}, /* cost of loading fp registers
2028 in SFmode, DFmode and XFmode */
2029 {6, 6, 8}, /* cost of storing fp registers
2030 in SFmode, DFmode and XFmode */
2031 2, /* cost of moving MMX register */
2032 {8, 8}, /* cost of loading MMX registers
2033 in SImode and DImode */
2034 {8, 8}, /* cost of storing MMX registers
2035 in SImode and DImode */
2036 2, /* cost of moving SSE register */
2037 {8, 8, 8}, /* cost of loading SSE registers
2038 in SImode, DImode and TImode */
2039 {8, 8, 8}, /* cost of storing SSE registers
2040 in SImode, DImode and TImode */
2041 5, /* MMX or SSE register to integer */
2042 32, /* size of l1 cache. */
2043 512, /* size of l2 cache. */
2044 64, /* size of prefetch block */
2045 6, /* number of parallel prefetches */
2046 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
2047 value is increased to perhaps more appropriate value of 5. */
2048 3, /* Branch cost */
2049 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2050 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2051 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2052 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2053 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2054 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2055 generic_memcpy,
2056 generic_memset,
2057 1, /* scalar_stmt_cost. */
2058 1, /* scalar load_cost. */
2059 1, /* scalar_store_cost. */
2060 1, /* vec_stmt_cost. */
2061 1, /* vec_to_scalar_cost. */
2062 1, /* scalar_to_vec_cost. */
2063 1, /* vec_align_load_cost. */
2064 2, /* vec_unalign_load_cost. */
2065 1, /* vec_store_cost. */
2066 3, /* cond_taken_branch_cost. */
2067 1, /* cond_not_taken_branch_cost. */
2068 };
2069
2070 /* core_cost should produce code tuned for Core familly of CPUs. */
2071 static stringop_algs core_memcpy[2] = {
2072 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
2073 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
2074 {-1, libcall, false}}}};
2075 static stringop_algs core_memset[2] = {
2076 {libcall, {{6, loop_1_byte, true},
2077 {24, loop, true},
2078 {8192, rep_prefix_4_byte, true},
2079 {-1, libcall, false}}},
2080 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
2081 {-1, libcall, false}}}};
2082
2083 static const
2084 struct processor_costs core_cost = {
2085 COSTS_N_INSNS (1), /* cost of an add instruction */
2086 /* On all chips taken into consideration lea is 2 cycles and more. With
2087 this cost however our current implementation of synth_mult results in
2088 use of unnecessary temporary registers causing regression on several
2089 SPECfp benchmarks. */
2090 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
2091 COSTS_N_INSNS (1), /* variable shift costs */
2092 COSTS_N_INSNS (1), /* constant shift costs */
2093 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
2094 COSTS_N_INSNS (4), /* HI */
2095 COSTS_N_INSNS (3), /* SI */
2096 COSTS_N_INSNS (4), /* DI */
2097 COSTS_N_INSNS (2)}, /* other */
2098 0, /* cost of multiply per each bit set */
2099 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
2100 COSTS_N_INSNS (26), /* HI */
2101 COSTS_N_INSNS (42), /* SI */
2102 COSTS_N_INSNS (74), /* DI */
2103 COSTS_N_INSNS (74)}, /* other */
2104 COSTS_N_INSNS (1), /* cost of movsx */
2105 COSTS_N_INSNS (1), /* cost of movzx */
2106 8, /* "large" insn */
2107 17, /* MOVE_RATIO */
2108 4, /* cost for loading QImode using movzbl */
2109 {4, 4, 4}, /* cost of loading integer registers
2110 in QImode, HImode and SImode.
2111 Relative to reg-reg move (2). */
2112 {4, 4, 4}, /* cost of storing integer registers */
2113 4, /* cost of reg,reg fld/fst */
2114 {12, 12, 12}, /* cost of loading fp registers
2115 in SFmode, DFmode and XFmode */
2116 {6, 6, 8}, /* cost of storing fp registers
2117 in SFmode, DFmode and XFmode */
2118 2, /* cost of moving MMX register */
2119 {8, 8}, /* cost of loading MMX registers
2120 in SImode and DImode */
2121 {8, 8}, /* cost of storing MMX registers
2122 in SImode and DImode */
2123 2, /* cost of moving SSE register */
2124 {8, 8, 8}, /* cost of loading SSE registers
2125 in SImode, DImode and TImode */
2126 {8, 8, 8}, /* cost of storing SSE registers
2127 in SImode, DImode and TImode */
2128 5, /* MMX or SSE register to integer */
2129 64, /* size of l1 cache. */
2130 512, /* size of l2 cache. */
2131 64, /* size of prefetch block */
2132 6, /* number of parallel prefetches */
2133 /* FIXME perhaps more appropriate value is 5. */
2134 3, /* Branch cost */
2135 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2136 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2137 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2138 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2139 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2140 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2141 core_memcpy,
2142 core_memset,
2143 1, /* scalar_stmt_cost. */
2144 1, /* scalar load_cost. */
2145 1, /* scalar_store_cost. */
2146 1, /* vec_stmt_cost. */
2147 1, /* vec_to_scalar_cost. */
2148 1, /* scalar_to_vec_cost. */
2149 1, /* vec_align_load_cost. */
2150 2, /* vec_unalign_load_cost. */
2151 1, /* vec_store_cost. */
2152 3, /* cond_taken_branch_cost. */
2153 1, /* cond_not_taken_branch_cost. */
2154 };
2155
2156
2157 /* Set by -mtune. */
2158 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2159
2160 /* Set by -mtune or -Os. */
2161 const struct processor_costs *ix86_cost = &pentium_cost;
2162
2163 /* Processor feature/optimization bitmasks. */
2164 #define m_386 (1<<PROCESSOR_I386)
2165 #define m_486 (1<<PROCESSOR_I486)
2166 #define m_PENT (1<<PROCESSOR_PENTIUM)
2167 #define m_LAKEMONT (1<<PROCESSOR_LAKEMONT)
2168 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2169 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2170 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2171 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2172 #define m_CORE2 (1<<PROCESSOR_CORE2)
2173 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2174 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2175 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2176 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2177 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2178 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2179 #define m_KNL (1<<PROCESSOR_KNL)
2180 #define m_SKYLAKE_AVX512 (1<<PROCESSOR_SKYLAKE_AVX512)
2181 #define m_INTEL (1<<PROCESSOR_INTEL)
2182
2183 #define m_GEODE (1<<PROCESSOR_GEODE)
2184 #define m_K6 (1<<PROCESSOR_K6)
2185 #define m_K6_GEODE (m_K6 | m_GEODE)
2186 #define m_K8 (1<<PROCESSOR_K8)
2187 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2188 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2189 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2190 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2191 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2192 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2193 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2194 #define m_ZNVER1 (1<<PROCESSOR_ZNVER1)
2195 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2196 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2197 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2198 #define m_BTVER (m_BTVER1 | m_BTVER2)
2199 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER \
2200 | m_ZNVER1)
2201
2202 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2203
2204 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2205 #undef DEF_TUNE
2206 #define DEF_TUNE(tune, name, selector) name,
2207 #include "x86-tune.def"
2208 #undef DEF_TUNE
2209 };
2210
2211 /* Feature tests against the various tunings. */
2212 unsigned char ix86_tune_features[X86_TUNE_LAST];
2213
2214 /* Feature tests against the various tunings used to create ix86_tune_features
2215 based on the processor mask. */
2216 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2217 #undef DEF_TUNE
2218 #define DEF_TUNE(tune, name, selector) selector,
2219 #include "x86-tune.def"
2220 #undef DEF_TUNE
2221 };
2222
2223 /* Feature tests against the various architecture variations. */
2224 unsigned char ix86_arch_features[X86_ARCH_LAST];
2225
2226 /* Feature tests against the various architecture variations, used to create
2227 ix86_arch_features based on the processor mask. */
2228 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2229 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2230 ~(m_386 | m_486 | m_PENT | m_LAKEMONT | m_K6),
2231
2232 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2233 ~m_386,
2234
2235 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2236 ~(m_386 | m_486),
2237
2238 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2239 ~m_386,
2240
2241 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2242 ~m_386,
2243 };
2244
2245 /* In case the average insn count for single function invocation is
2246 lower than this constant, emit fast (but longer) prologue and
2247 epilogue code. */
2248 #define FAST_PROLOGUE_INSN_COUNT 20
2249
2250 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2251 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2252 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2253 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2254
2255 /* Array of the smallest class containing reg number REGNO, indexed by
2256 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2257
2258 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2259 {
2260 /* ax, dx, cx, bx */
2261 AREG, DREG, CREG, BREG,
2262 /* si, di, bp, sp */
2263 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2264 /* FP registers */
2265 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2266 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2267 /* arg pointer */
2268 NON_Q_REGS,
2269 /* flags, fpsr, fpcr, frame */
2270 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2271 /* SSE registers */
2272 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2273 SSE_REGS, SSE_REGS,
2274 /* MMX registers */
2275 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2276 MMX_REGS, MMX_REGS,
2277 /* REX registers */
2278 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2279 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2280 /* SSE REX registers */
2281 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2282 SSE_REGS, SSE_REGS,
2283 /* AVX-512 SSE registers */
2284 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2285 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2286 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2287 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2288 /* Mask registers. */
2289 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2290 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2291 /* MPX bound registers */
2292 BND_REGS, BND_REGS, BND_REGS, BND_REGS,
2293 };
2294
2295 /* The "default" register map used in 32bit mode. */
2296
2297 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2298 {
2299 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2300 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2301 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2302 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2303 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2304 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2305 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2306 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2307 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2308 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2309 101, 102, 103, 104, /* bound registers */
2310 };
2311
2312 /* The "default" register map used in 64bit mode. */
2313
2314 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2315 {
2316 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2317 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2318 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2319 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2320 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2321 8,9,10,11,12,13,14,15, /* extended integer registers */
2322 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2323 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2324 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2325 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2326 126, 127, 128, 129, /* bound registers */
2327 };
2328
2329 /* Define the register numbers to be used in Dwarf debugging information.
2330 The SVR4 reference port C compiler uses the following register numbers
2331 in its Dwarf output code:
2332 0 for %eax (gcc regno = 0)
2333 1 for %ecx (gcc regno = 2)
2334 2 for %edx (gcc regno = 1)
2335 3 for %ebx (gcc regno = 3)
2336 4 for %esp (gcc regno = 7)
2337 5 for %ebp (gcc regno = 6)
2338 6 for %esi (gcc regno = 4)
2339 7 for %edi (gcc regno = 5)
2340 The following three DWARF register numbers are never generated by
2341 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2342 believes these numbers have these meanings.
2343 8 for %eip (no gcc equivalent)
2344 9 for %eflags (gcc regno = 17)
2345 10 for %trapno (no gcc equivalent)
2346 It is not at all clear how we should number the FP stack registers
2347 for the x86 architecture. If the version of SDB on x86/svr4 were
2348 a bit less brain dead with respect to floating-point then we would
2349 have a precedent to follow with respect to DWARF register numbers
2350 for x86 FP registers, but the SDB on x86/svr4 is so completely
2351 broken with respect to FP registers that it is hardly worth thinking
2352 of it as something to strive for compatibility with.
2353 The version of x86/svr4 SDB I have at the moment does (partially)
2354 seem to believe that DWARF register number 11 is associated with
2355 the x86 register %st(0), but that's about all. Higher DWARF
2356 register numbers don't seem to be associated with anything in
2357 particular, and even for DWARF regno 11, SDB only seems to under-
2358 stand that it should say that a variable lives in %st(0) (when
2359 asked via an `=' command) if we said it was in DWARF regno 11,
2360 but SDB still prints garbage when asked for the value of the
2361 variable in question (via a `/' command).
2362 (Also note that the labels SDB prints for various FP stack regs
2363 when doing an `x' command are all wrong.)
2364 Note that these problems generally don't affect the native SVR4
2365 C compiler because it doesn't allow the use of -O with -g and
2366 because when it is *not* optimizing, it allocates a memory
2367 location for each floating-point variable, and the memory
2368 location is what gets described in the DWARF AT_location
2369 attribute for the variable in question.
2370 Regardless of the severe mental illness of the x86/svr4 SDB, we
2371 do something sensible here and we use the following DWARF
2372 register numbers. Note that these are all stack-top-relative
2373 numbers.
2374 11 for %st(0) (gcc regno = 8)
2375 12 for %st(1) (gcc regno = 9)
2376 13 for %st(2) (gcc regno = 10)
2377 14 for %st(3) (gcc regno = 11)
2378 15 for %st(4) (gcc regno = 12)
2379 16 for %st(5) (gcc regno = 13)
2380 17 for %st(6) (gcc regno = 14)
2381 18 for %st(7) (gcc regno = 15)
2382 */
2383 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2384 {
2385 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2386 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2387 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2388 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2389 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2390 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2391 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2392 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2393 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2394 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2395 101, 102, 103, 104, /* bound registers */
2396 };
2397
2398 /* Define parameter passing and return registers. */
2399
2400 static int const x86_64_int_parameter_registers[6] =
2401 {
2402 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2403 };
2404
2405 static int const x86_64_ms_abi_int_parameter_registers[4] =
2406 {
2407 CX_REG, DX_REG, R8_REG, R9_REG
2408 };
2409
2410 static int const x86_64_int_return_registers[4] =
2411 {
2412 AX_REG, DX_REG, DI_REG, SI_REG
2413 };
2414
2415 /* Additional registers that are clobbered by SYSV calls. */
2416
2417 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2418 {
2419 SI_REG, DI_REG,
2420 XMM6_REG, XMM7_REG,
2421 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2422 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2423 };
2424
2425 /* Define the structure for the machine field in struct function. */
2426
2427 struct GTY(()) stack_local_entry {
2428 unsigned short mode;
2429 unsigned short n;
2430 rtx rtl;
2431 struct stack_local_entry *next;
2432 };
2433
2434 /* Structure describing stack frame layout.
2435 Stack grows downward:
2436
2437 [arguments]
2438 <- ARG_POINTER
2439 saved pc
2440
2441 saved static chain if ix86_static_chain_on_stack
2442
2443 saved frame pointer if frame_pointer_needed
2444 <- HARD_FRAME_POINTER
2445 [saved regs]
2446 <- regs_save_offset
2447 [padding0]
2448
2449 [saved SSE regs]
2450 <- sse_regs_save_offset
2451 [padding1] |
2452 | <- FRAME_POINTER
2453 [va_arg registers] |
2454 |
2455 [frame] |
2456 |
2457 [padding2] | = to_allocate
2458 <- STACK_POINTER
2459 */
2460 struct ix86_frame
2461 {
2462 int nsseregs;
2463 int nregs;
2464 int va_arg_size;
2465 int red_zone_size;
2466 int outgoing_arguments_size;
2467
2468 /* The offsets relative to ARG_POINTER. */
2469 HOST_WIDE_INT frame_pointer_offset;
2470 HOST_WIDE_INT hard_frame_pointer_offset;
2471 HOST_WIDE_INT stack_pointer_offset;
2472 HOST_WIDE_INT hfp_save_offset;
2473 HOST_WIDE_INT reg_save_offset;
2474 HOST_WIDE_INT sse_reg_save_offset;
2475
2476 /* When save_regs_using_mov is set, emit prologue using
2477 move instead of push instructions. */
2478 bool save_regs_using_mov;
2479 };
2480
2481 /* Which cpu are we scheduling for. */
2482 enum attr_cpu ix86_schedule;
2483
2484 /* Which cpu are we optimizing for. */
2485 enum processor_type ix86_tune;
2486
2487 /* Which instruction set architecture to use. */
2488 enum processor_type ix86_arch;
2489
2490 /* True if processor has SSE prefetch instruction. */
2491 unsigned char x86_prefetch_sse;
2492
2493 /* -mstackrealign option */
2494 static const char ix86_force_align_arg_pointer_string[]
2495 = "force_align_arg_pointer";
2496
2497 static rtx (*ix86_gen_leave) (void);
2498 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2499 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2500 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2501 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2502 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2503 static rtx (*ix86_gen_monitorx) (rtx, rtx, rtx);
2504 static rtx (*ix86_gen_clzero) (rtx);
2505 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2506 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2507 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2508 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2509 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2510 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2511
2512 /* Preferred alignment for stack boundary in bits. */
2513 unsigned int ix86_preferred_stack_boundary;
2514
2515 /* Alignment for incoming stack boundary in bits specified at
2516 command line. */
2517 static unsigned int ix86_user_incoming_stack_boundary;
2518
2519 /* Default alignment for incoming stack boundary in bits. */
2520 static unsigned int ix86_default_incoming_stack_boundary;
2521
2522 /* Alignment for incoming stack boundary in bits. */
2523 unsigned int ix86_incoming_stack_boundary;
2524
2525 /* Calling abi specific va_list type nodes. */
2526 static GTY(()) tree sysv_va_list_type_node;
2527 static GTY(()) tree ms_va_list_type_node;
2528
2529 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2530 char internal_label_prefix[16];
2531 int internal_label_prefix_len;
2532
2533 /* Fence to use after loop using movnt. */
2534 tree x86_mfence;
2535
2536 /* Register class used for passing given 64bit part of the argument.
2537 These represent classes as documented by the PS ABI, with the exception
2538 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2539 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2540
2541 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2542 whenever possible (upper half does contain padding). */
2543 enum x86_64_reg_class
2544 {
2545 X86_64_NO_CLASS,
2546 X86_64_INTEGER_CLASS,
2547 X86_64_INTEGERSI_CLASS,
2548 X86_64_SSE_CLASS,
2549 X86_64_SSESF_CLASS,
2550 X86_64_SSEDF_CLASS,
2551 X86_64_SSEUP_CLASS,
2552 X86_64_X87_CLASS,
2553 X86_64_X87UP_CLASS,
2554 X86_64_COMPLEX_X87_CLASS,
2555 X86_64_MEMORY_CLASS
2556 };
2557
2558 #define MAX_CLASSES 8
2559
2560 /* Table of constants used by fldpi, fldln2, etc.... */
2561 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2562 static bool ext_80387_constants_init = 0;
2563
2564 \f
2565 static struct machine_function * ix86_init_machine_status (void);
2566 static rtx ix86_function_value (const_tree, const_tree, bool);
2567 static bool ix86_function_value_regno_p (const unsigned int);
2568 static unsigned int ix86_function_arg_boundary (machine_mode,
2569 const_tree);
2570 static rtx ix86_static_chain (const_tree, bool);
2571 static int ix86_function_regparm (const_tree, const_tree);
2572 static void ix86_compute_frame_layout (struct ix86_frame *);
2573 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
2574 rtx, rtx, int);
2575 static void ix86_add_new_builtins (HOST_WIDE_INT);
2576 static tree ix86_canonical_va_list_type (tree);
2577 static void predict_jump (int);
2578 static unsigned int split_stack_prologue_scratch_regno (void);
2579 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2580
2581 enum ix86_function_specific_strings
2582 {
2583 IX86_FUNCTION_SPECIFIC_ARCH,
2584 IX86_FUNCTION_SPECIFIC_TUNE,
2585 IX86_FUNCTION_SPECIFIC_MAX
2586 };
2587
2588 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2589 const char *, enum fpmath_unit, bool);
2590 static void ix86_function_specific_save (struct cl_target_option *,
2591 struct gcc_options *opts);
2592 static void ix86_function_specific_restore (struct gcc_options *opts,
2593 struct cl_target_option *);
2594 static void ix86_function_specific_post_stream_in (struct cl_target_option *);
2595 static void ix86_function_specific_print (FILE *, int,
2596 struct cl_target_option *);
2597 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2598 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2599 struct gcc_options *,
2600 struct gcc_options *,
2601 struct gcc_options *);
2602 static bool ix86_can_inline_p (tree, tree);
2603 static void ix86_set_current_function (tree);
2604 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2605
2606 static enum calling_abi ix86_function_abi (const_tree);
2607
2608 \f
2609 #ifndef SUBTARGET32_DEFAULT_CPU
2610 #define SUBTARGET32_DEFAULT_CPU "i386"
2611 #endif
2612
2613 /* Whether -mtune= or -march= were specified */
2614 static int ix86_tune_defaulted;
2615 static int ix86_arch_specified;
2616
2617 /* Vectorization library interface and handlers. */
2618 static tree (*ix86_veclib_handler) (combined_fn, tree, tree);
2619
2620 static tree ix86_veclibabi_svml (combined_fn, tree, tree);
2621 static tree ix86_veclibabi_acml (combined_fn, tree, tree);
2622
2623 /* Processor target table, indexed by processor number */
2624 struct ptt
2625 {
2626 const char *const name; /* processor name */
2627 const struct processor_costs *cost; /* Processor costs */
2628 const int align_loop; /* Default alignments. */
2629 const int align_loop_max_skip;
2630 const int align_jump;
2631 const int align_jump_max_skip;
2632 const int align_func;
2633 };
2634
2635 /* This table must be in sync with enum processor_type in i386.h. */
2636 static const struct ptt processor_target_table[PROCESSOR_max] =
2637 {
2638 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2639 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2640 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2641 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2642 {"lakemont", &lakemont_cost, 16, 7, 16, 7, 16},
2643 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2644 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2645 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2646 {"core2", &core_cost, 16, 10, 16, 10, 16},
2647 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2648 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2649 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2650 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2651 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2652 {"knl", &slm_cost, 16, 15, 16, 7, 16},
2653 {"skylake-avx512", &core_cost, 16, 10, 16, 10, 16},
2654 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2655 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2656 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2657 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2658 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2659 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2660 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2661 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2662 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2663 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2664 {"znver1", &znver1_cost, 16, 10, 16, 7, 11},
2665 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2666 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2667 };
2668 \f
2669 static unsigned int
2670 rest_of_handle_insert_vzeroupper (void)
2671 {
2672 int i;
2673
2674 /* vzeroupper instructions are inserted immediately after reload to
2675 account for possible spills from 256bit registers. The pass
2676 reuses mode switching infrastructure by re-running mode insertion
2677 pass, so disable entities that have already been processed. */
2678 for (i = 0; i < MAX_386_ENTITIES; i++)
2679 ix86_optimize_mode_switching[i] = 0;
2680
2681 ix86_optimize_mode_switching[AVX_U128] = 1;
2682
2683 /* Call optimize_mode_switching. */
2684 g->get_passes ()->execute_pass_mode_switching ();
2685 return 0;
2686 }
2687
2688 /* Return 1 if INSN uses or defines a hard register.
2689 Hard register uses in a memory address are ignored.
2690 Clobbers and flags definitions are ignored. */
2691
2692 static bool
2693 has_non_address_hard_reg (rtx_insn *insn)
2694 {
2695 df_ref ref;
2696 FOR_EACH_INSN_DEF (ref, insn)
2697 if (HARD_REGISTER_P (DF_REF_REAL_REG (ref))
2698 && !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER)
2699 && DF_REF_REGNO (ref) != FLAGS_REG)
2700 return true;
2701
2702 FOR_EACH_INSN_USE (ref, insn)
2703 if (!DF_REF_REG_MEM_P (ref) && HARD_REGISTER_P (DF_REF_REAL_REG (ref)))
2704 return true;
2705
2706 return false;
2707 }
2708
2709 /* Check if comparison INSN may be transformed
2710 into vector comparison. Currently we transform
2711 zero checks only which look like:
2712
2713 (set (reg:CCZ 17 flags)
2714 (compare:CCZ (ior:SI (subreg:SI (reg:DI x) 4)
2715 (subreg:SI (reg:DI x) 0))
2716 (const_int 0 [0]))) */
2717
2718 static bool
2719 convertible_comparison_p (rtx_insn *insn)
2720 {
2721 if (!TARGET_SSE4_1)
2722 return false;
2723
2724 rtx def_set = single_set (insn);
2725
2726 gcc_assert (def_set);
2727
2728 rtx src = SET_SRC (def_set);
2729 rtx dst = SET_DEST (def_set);
2730
2731 gcc_assert (GET_CODE (src) == COMPARE);
2732
2733 if (GET_CODE (dst) != REG
2734 || REGNO (dst) != FLAGS_REG
2735 || GET_MODE (dst) != CCZmode)
2736 return false;
2737
2738 rtx op1 = XEXP (src, 0);
2739 rtx op2 = XEXP (src, 1);
2740
2741 if (op2 != CONST0_RTX (GET_MODE (op2)))
2742 return false;
2743
2744 if (GET_CODE (op1) != IOR)
2745 return false;
2746
2747 op2 = XEXP (op1, 1);
2748 op1 = XEXP (op1, 0);
2749
2750 if (!SUBREG_P (op1)
2751 || !SUBREG_P (op2)
2752 || GET_MODE (op1) != SImode
2753 || GET_MODE (op2) != SImode
2754 || ((SUBREG_BYTE (op1) != 0
2755 || SUBREG_BYTE (op2) != GET_MODE_SIZE (SImode))
2756 && (SUBREG_BYTE (op2) != 0
2757 || SUBREG_BYTE (op1) != GET_MODE_SIZE (SImode))))
2758 return false;
2759
2760 op1 = SUBREG_REG (op1);
2761 op2 = SUBREG_REG (op2);
2762
2763 if (op1 != op2
2764 || !REG_P (op1)
2765 || GET_MODE (op1) != DImode)
2766 return false;
2767
2768 return true;
2769 }
2770
2771 /* Return 1 if INSN may be converted into vector
2772 instruction. */
2773
2774 static bool
2775 scalar_to_vector_candidate_p (rtx_insn *insn)
2776 {
2777 rtx def_set = single_set (insn);
2778
2779 if (!def_set)
2780 return false;
2781
2782 if (has_non_address_hard_reg (insn))
2783 return false;
2784
2785 rtx src = SET_SRC (def_set);
2786 rtx dst = SET_DEST (def_set);
2787
2788 if (GET_CODE (src) == COMPARE)
2789 return convertible_comparison_p (insn);
2790
2791 /* We are interested in DImode promotion only. */
2792 if (GET_MODE (src) != DImode
2793 || GET_MODE (dst) != DImode)
2794 return false;
2795
2796 if (!REG_P (dst) && !MEM_P (dst))
2797 return false;
2798
2799 switch (GET_CODE (src))
2800 {
2801 case PLUS:
2802 case MINUS:
2803 case IOR:
2804 case XOR:
2805 case AND:
2806 break;
2807
2808 case REG:
2809 return true;
2810
2811 case MEM:
2812 return REG_P (dst);
2813
2814 default:
2815 return false;
2816 }
2817
2818 if (!REG_P (XEXP (src, 0)) && !MEM_P (XEXP (src, 0)))
2819 return false;
2820
2821 if (!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)))
2822 return false;
2823
2824 if (GET_MODE (XEXP (src, 0)) != DImode
2825 || GET_MODE (XEXP (src, 1)) != DImode)
2826 return false;
2827
2828 return true;
2829 }
2830
2831 /* For a given bitmap of insn UIDs scans all instruction and
2832 remove insn from CANDIDATES in case it has both convertible
2833 and not convertible definitions.
2834
2835 All insns in a bitmap are conversion candidates according to
2836 scalar_to_vector_candidate_p. Currently it implies all insns
2837 are single_set. */
2838
2839 static void
2840 remove_non_convertible_regs (bitmap candidates)
2841 {
2842 bitmap_iterator bi;
2843 unsigned id;
2844 bitmap regs = BITMAP_ALLOC (NULL);
2845
2846 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi)
2847 {
2848 rtx def_set = single_set (DF_INSN_UID_GET (id)->insn);
2849 rtx reg = SET_DEST (def_set);
2850
2851 if (!REG_P (reg)
2852 || bitmap_bit_p (regs, REGNO (reg))
2853 || HARD_REGISTER_P (reg))
2854 continue;
2855
2856 for (df_ref def = DF_REG_DEF_CHAIN (REGNO (reg));
2857 def;
2858 def = DF_REF_NEXT_REG (def))
2859 {
2860 if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
2861 {
2862 if (dump_file)
2863 fprintf (dump_file,
2864 "r%d has non convertible definition in insn %d\n",
2865 REGNO (reg), DF_REF_INSN_UID (def));
2866
2867 bitmap_set_bit (regs, REGNO (reg));
2868 break;
2869 }
2870 }
2871 }
2872
2873 EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi)
2874 {
2875 for (df_ref def = DF_REG_DEF_CHAIN (id);
2876 def;
2877 def = DF_REF_NEXT_REG (def))
2878 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
2879 {
2880 if (dump_file)
2881 fprintf (dump_file, "Removing insn %d from candidates list\n",
2882 DF_REF_INSN_UID (def));
2883
2884 bitmap_clear_bit (candidates, DF_REF_INSN_UID (def));
2885 }
2886 }
2887
2888 BITMAP_FREE (regs);
2889 }
2890
2891 class scalar_chain
2892 {
2893 public:
2894 scalar_chain ();
2895 ~scalar_chain ();
2896
2897 static unsigned max_id;
2898
2899 /* ID of a chain. */
2900 unsigned int chain_id;
2901 /* A queue of instructions to be included into a chain. */
2902 bitmap queue;
2903 /* Instructions included into a chain. */
2904 bitmap insns;
2905 /* All registers defined by a chain. */
2906 bitmap defs;
2907 /* Registers used in both vector and sclar modes. */
2908 bitmap defs_conv;
2909
2910 void build (bitmap candidates, unsigned insn_uid);
2911 int compute_convert_gain ();
2912 int convert ();
2913
2914 private:
2915 void add_insn (bitmap candidates, unsigned insn_uid);
2916 void add_to_queue (unsigned insn_uid);
2917 void mark_dual_mode_def (df_ref def);
2918 void analyze_register_chain (bitmap candidates, df_ref ref);
2919 rtx replace_with_subreg (rtx x, rtx reg, rtx subreg);
2920 void emit_conversion_insns (rtx insns, rtx_insn *pos);
2921 void replace_with_subreg_in_insn (rtx_insn *insn, rtx reg, rtx subreg);
2922 void convert_insn (rtx_insn *insn);
2923 void convert_op (rtx *op, rtx_insn *insn);
2924 void convert_reg (unsigned regno);
2925 void make_vector_copies (unsigned regno);
2926 };
2927
2928 unsigned scalar_chain::max_id = 0;
2929
2930 /* Initialize new chain. */
2931
2932 scalar_chain::scalar_chain ()
2933 {
2934 chain_id = ++max_id;
2935
2936 if (dump_file)
2937 fprintf (dump_file, "Created a new instruction chain #%d\n", chain_id);
2938
2939 bitmap_obstack_initialize (NULL);
2940 insns = BITMAP_ALLOC (NULL);
2941 defs = BITMAP_ALLOC (NULL);
2942 defs_conv = BITMAP_ALLOC (NULL);
2943 queue = NULL;
2944 }
2945
2946 /* Free chain's data. */
2947
2948 scalar_chain::~scalar_chain ()
2949 {
2950 BITMAP_FREE (insns);
2951 BITMAP_FREE (defs);
2952 BITMAP_FREE (defs_conv);
2953 bitmap_obstack_release (NULL);
2954 }
2955
2956 /* Add instruction into chains' queue. */
2957
2958 void
2959 scalar_chain::add_to_queue (unsigned insn_uid)
2960 {
2961 if (bitmap_bit_p (insns, insn_uid)
2962 || bitmap_bit_p (queue, insn_uid))
2963 return;
2964
2965 if (dump_file)
2966 fprintf (dump_file, " Adding insn %d into chain's #%d queue\n",
2967 insn_uid, chain_id);
2968 bitmap_set_bit (queue, insn_uid);
2969 }
2970
2971 /* Mark register defined by DEF as requiring conversion. */
2972
2973 void
2974 scalar_chain::mark_dual_mode_def (df_ref def)
2975 {
2976 gcc_assert (DF_REF_REG_DEF_P (def));
2977
2978 if (bitmap_bit_p (defs_conv, DF_REF_REGNO (def)))
2979 return;
2980
2981 if (dump_file)
2982 fprintf (dump_file,
2983 " Mark r%d def in insn %d as requiring both modes in chain #%d\n",
2984 DF_REF_REGNO (def), DF_REF_INSN_UID (def), chain_id);
2985
2986 bitmap_set_bit (defs_conv, DF_REF_REGNO (def));
2987 }
2988
2989 /* Check REF's chain to add new insns into a queue
2990 and find registers requiring conversion. */
2991
2992 void
2993 scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref)
2994 {
2995 df_link *chain;
2996
2997 gcc_assert (bitmap_bit_p (insns, DF_REF_INSN_UID (ref))
2998 || bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)));
2999 add_to_queue (DF_REF_INSN_UID (ref));
3000
3001 for (chain = DF_REF_CHAIN (ref); chain; chain = chain->next)
3002 {
3003 unsigned uid = DF_REF_INSN_UID (chain->ref);
3004
3005 if (!NONDEBUG_INSN_P (DF_REF_INSN (chain->ref)))
3006 continue;
3007
3008 if (!DF_REF_REG_MEM_P (chain->ref))
3009 {
3010 if (bitmap_bit_p (insns, uid))
3011 continue;
3012
3013 if (bitmap_bit_p (candidates, uid))
3014 {
3015 add_to_queue (uid);
3016 continue;
3017 }
3018 }
3019
3020 if (DF_REF_REG_DEF_P (chain->ref))
3021 {
3022 if (dump_file)
3023 fprintf (dump_file, " r%d def in insn %d isn't convertible\n",
3024 DF_REF_REGNO (chain->ref), uid);
3025 mark_dual_mode_def (chain->ref);
3026 }
3027 else
3028 {
3029 if (dump_file)
3030 fprintf (dump_file, " r%d use in insn %d isn't convertible\n",
3031 DF_REF_REGNO (chain->ref), uid);
3032 mark_dual_mode_def (ref);
3033 }
3034 }
3035 }
3036
3037 /* Add instruction into a chain. */
3038
3039 void
3040 scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid)
3041 {
3042 if (bitmap_bit_p (insns, insn_uid))
3043 return;
3044
3045 if (dump_file)
3046 fprintf (dump_file, " Adding insn %d to chain #%d\n", insn_uid, chain_id);
3047
3048 bitmap_set_bit (insns, insn_uid);
3049
3050 rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
3051 rtx def_set = single_set (insn);
3052 if (def_set && REG_P (SET_DEST (def_set))
3053 && !HARD_REGISTER_P (SET_DEST (def_set)))
3054 bitmap_set_bit (defs, REGNO (SET_DEST (def_set)));
3055
3056 df_ref ref;
3057 df_ref def;
3058 for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
3059 if (!HARD_REGISTER_P (DF_REF_REG (ref)))
3060 for (def = DF_REG_DEF_CHAIN (DF_REF_REGNO (ref));
3061 def;
3062 def = DF_REF_NEXT_REG (def))
3063 analyze_register_chain (candidates, def);
3064 for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
3065 if (!DF_REF_REG_MEM_P (ref))
3066 analyze_register_chain (candidates, ref);
3067 }
3068
3069 /* Build new chain starting from insn INSN_UID recursively
3070 adding all dependent uses and definitions. */
3071
3072 void
3073 scalar_chain::build (bitmap candidates, unsigned insn_uid)
3074 {
3075 queue = BITMAP_ALLOC (NULL);
3076 bitmap_set_bit (queue, insn_uid);
3077
3078 if (dump_file)
3079 fprintf (dump_file, "Building chain #%d...\n", chain_id);
3080
3081 while (!bitmap_empty_p (queue))
3082 {
3083 insn_uid = bitmap_first_set_bit (queue);
3084 bitmap_clear_bit (queue, insn_uid);
3085 bitmap_clear_bit (candidates, insn_uid);
3086 add_insn (candidates, insn_uid);
3087 }
3088
3089 if (dump_file)
3090 {
3091 fprintf (dump_file, "Collected chain #%d...\n", chain_id);
3092 fprintf (dump_file, " insns: ");
3093 dump_bitmap (dump_file, insns);
3094 if (!bitmap_empty_p (defs_conv))
3095 {
3096 bitmap_iterator bi;
3097 unsigned id;
3098 const char *comma = "";
3099 fprintf (dump_file, " defs to convert: ");
3100 EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi)
3101 {
3102 fprintf (dump_file, "%sr%d", comma, id);
3103 comma = ", ";
3104 }
3105 fprintf (dump_file, "\n");
3106 }
3107 }
3108
3109 BITMAP_FREE (queue);
3110 }
3111
3112 /* Compute a gain for chain conversion. */
3113
3114 int
3115 scalar_chain::compute_convert_gain ()
3116 {
3117 bitmap_iterator bi;
3118 unsigned insn_uid;
3119 int gain = 0;
3120 int cost = 0;
3121
3122 if (dump_file)
3123 fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id);
3124
3125 EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi)
3126 {
3127 rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
3128 rtx def_set = single_set (insn);
3129 rtx src = SET_SRC (def_set);
3130 rtx dst = SET_DEST (def_set);
3131
3132 if (REG_P (src) && REG_P (dst))
3133 gain += COSTS_N_INSNS (2) - ix86_cost->sse_move;
3134 else if (REG_P (src) && MEM_P (dst))
3135 gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1];
3136 else if (MEM_P (src) && REG_P (dst))
3137 gain += 2 * ix86_cost->int_load[2] - ix86_cost->sse_load[1];
3138 else if (GET_CODE (src) == PLUS
3139 || GET_CODE (src) == MINUS
3140 || GET_CODE (src) == IOR
3141 || GET_CODE (src) == XOR
3142 || GET_CODE (src) == AND)
3143 gain += ix86_cost->add;
3144 else if (GET_CODE (src) == COMPARE)
3145 {
3146 /* Assume comparison cost is the same. */
3147 }
3148 else
3149 gcc_unreachable ();
3150 }
3151
3152 if (dump_file)
3153 fprintf (dump_file, " Instruction convertion gain: %d\n", gain);
3154
3155 EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, insn_uid, bi)
3156 cost += DF_REG_DEF_COUNT (insn_uid) * ix86_cost->mmxsse_to_integer;
3157
3158 if (dump_file)
3159 fprintf (dump_file, " Registers convertion cost: %d\n", cost);
3160
3161 gain -= cost;
3162
3163 if (dump_file)
3164 fprintf (dump_file, " Total gain: %d\n", gain);
3165
3166 return gain;
3167 }
3168
3169 /* Replace REG in X with a V2DI subreg of NEW_REG. */
3170
3171 rtx
3172 scalar_chain::replace_with_subreg (rtx x, rtx reg, rtx new_reg)
3173 {
3174 if (x == reg)
3175 return gen_rtx_SUBREG (V2DImode, new_reg, 0);
3176
3177 const char *fmt = GET_RTX_FORMAT (GET_CODE (x));
3178 int i, j;
3179 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3180 {
3181 if (fmt[i] == 'e')
3182 XEXP (x, i) = replace_with_subreg (XEXP (x, i), reg, new_reg);
3183 else if (fmt[i] == 'E')
3184 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3185 XVECEXP (x, i, j) = replace_with_subreg (XVECEXP (x, i, j),
3186 reg, new_reg);
3187 }
3188
3189 return x;
3190 }
3191
3192 /* Replace REG in INSN with a V2DI subreg of NEW_REG. */
3193
3194 void
3195 scalar_chain::replace_with_subreg_in_insn (rtx_insn *insn, rtx reg, rtx new_reg)
3196 {
3197 replace_with_subreg (single_set (insn), reg, new_reg);
3198 }
3199
3200 /* Insert generated conversion instruction sequence INSNS
3201 after instruction AFTER. New BB may be required in case
3202 instruction has EH region attached. */
3203
3204 void
3205 scalar_chain::emit_conversion_insns (rtx insns, rtx_insn *after)
3206 {
3207 if (!control_flow_insn_p (after))
3208 {
3209 emit_insn_after (insns, after);
3210 return;
3211 }
3212
3213 basic_block bb = BLOCK_FOR_INSN (after);
3214 edge e = find_fallthru_edge (bb->succs);
3215 gcc_assert (e);
3216
3217 basic_block new_bb = split_edge (e);
3218 emit_insn_after (insns, BB_HEAD (new_bb));
3219 }
3220
3221 /* Make vector copies for all register REGNO definitions
3222 and replace its uses in a chain. */
3223
3224 void
3225 scalar_chain::make_vector_copies (unsigned regno)
3226 {
3227 rtx reg = regno_reg_rtx[regno];
3228 rtx vreg = gen_reg_rtx (DImode);
3229 df_ref ref;
3230
3231 for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
3232 if (!bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
3233 {
3234 rtx_insn *insn = DF_REF_INSN (ref);
3235
3236 start_sequence ();
3237 if (TARGET_SSE4_1)
3238 {
3239 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
3240 CONST0_RTX (V4SImode),
3241 gen_rtx_SUBREG (SImode, reg, 0)));
3242 emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode, vreg, 0),
3243 gen_rtx_SUBREG (V4SImode, vreg, 0),
3244 gen_rtx_SUBREG (SImode, reg, 4),
3245 GEN_INT (2)));
3246 }
3247 else if (TARGET_INTER_UNIT_MOVES_TO_VEC)
3248 {
3249 rtx tmp = gen_reg_rtx (DImode);
3250 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
3251 CONST0_RTX (V4SImode),
3252 gen_rtx_SUBREG (SImode, reg, 0)));
3253 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, tmp, 0),
3254 CONST0_RTX (V4SImode),
3255 gen_rtx_SUBREG (SImode, reg, 4)));
3256 emit_insn (gen_vec_interleave_lowv4si
3257 (gen_rtx_SUBREG (V4SImode, vreg, 0),
3258 gen_rtx_SUBREG (V4SImode, vreg, 0),
3259 gen_rtx_SUBREG (V4SImode, tmp, 0)));
3260 }
3261 else
3262 {
3263 rtx tmp = assign_386_stack_local (DImode, SLOT_TEMP);
3264 emit_move_insn (adjust_address (tmp, SImode, 0),
3265 gen_rtx_SUBREG (SImode, reg, 0));
3266 emit_move_insn (adjust_address (tmp, SImode, 4),
3267 gen_rtx_SUBREG (SImode, reg, 4));
3268 emit_move_insn (vreg, tmp);
3269 }
3270 emit_conversion_insns (get_insns (), insn);
3271 end_sequence ();
3272
3273 if (dump_file)
3274 fprintf (dump_file,
3275 " Copied r%d to a vector register r%d for insn %d\n",
3276 regno, REGNO (vreg), DF_REF_INSN_UID (ref));
3277 }
3278
3279 for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
3280 if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
3281 {
3282 replace_with_subreg_in_insn (DF_REF_INSN (ref), reg, vreg);
3283
3284 if (dump_file)
3285 fprintf (dump_file, " Replaced r%d with r%d in insn %d\n",
3286 regno, REGNO (vreg), DF_REF_INSN_UID (ref));
3287 }
3288 }
3289
3290 /* Convert all definitions of register REGNO
3291 and fix its uses. Scalar copies may be created
3292 in case register is used in not convertible insn. */
3293
3294 void
3295 scalar_chain::convert_reg (unsigned regno)
3296 {
3297 bool scalar_copy = bitmap_bit_p (defs_conv, regno);
3298 rtx reg = regno_reg_rtx[regno];
3299 rtx scopy = NULL_RTX;
3300 df_ref ref;
3301 bitmap conv;
3302
3303 conv = BITMAP_ALLOC (NULL);
3304 bitmap_copy (conv, insns);
3305
3306 if (scalar_copy)
3307 scopy = gen_reg_rtx (DImode);
3308
3309 for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
3310 {
3311 rtx_insn *insn = DF_REF_INSN (ref);
3312 rtx def_set = single_set (insn);
3313 rtx src = SET_SRC (def_set);
3314 rtx reg = DF_REF_REG (ref);
3315
3316 if (!MEM_P (src))
3317 {
3318 replace_with_subreg_in_insn (insn, reg, reg);
3319 bitmap_clear_bit (conv, INSN_UID (insn));
3320 }
3321
3322 if (scalar_copy)
3323 {
3324 rtx vcopy = gen_reg_rtx (V2DImode);
3325
3326 start_sequence ();
3327 if (TARGET_INTER_UNIT_MOVES_FROM_VEC)
3328 {
3329 emit_move_insn (vcopy, gen_rtx_SUBREG (V2DImode, reg, 0));
3330 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0),
3331 gen_rtx_SUBREG (SImode, vcopy, 0));
3332 emit_move_insn (vcopy,
3333 gen_rtx_LSHIFTRT (V2DImode, vcopy, GEN_INT (32)));
3334 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4),
3335 gen_rtx_SUBREG (SImode, vcopy, 0));
3336 }
3337 else
3338 {
3339 rtx tmp = assign_386_stack_local (DImode, SLOT_TEMP);
3340 emit_move_insn (tmp, reg);
3341 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0),
3342 adjust_address (tmp, SImode, 0));
3343 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4),
3344 adjust_address (tmp, SImode, 4));
3345 }
3346 emit_conversion_insns (get_insns (), insn);
3347 end_sequence ();
3348
3349 if (dump_file)
3350 fprintf (dump_file,
3351 " Copied r%d to a scalar register r%d for insn %d\n",
3352 regno, REGNO (scopy), INSN_UID (insn));
3353 }
3354 }
3355
3356 for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
3357 if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
3358 {
3359 if (bitmap_bit_p (conv, DF_REF_INSN_UID (ref)))
3360 {
3361 rtx def_set = single_set (DF_REF_INSN (ref));
3362 if (!MEM_P (SET_DEST (def_set))
3363 || !REG_P (SET_SRC (def_set)))
3364 replace_with_subreg_in_insn (DF_REF_INSN (ref), reg, reg);
3365 bitmap_clear_bit (conv, DF_REF_INSN_UID (ref));
3366 }
3367 }
3368 else if (NONDEBUG_INSN_P (DF_REF_INSN (ref)))
3369 {
3370 replace_rtx (DF_REF_INSN (ref), reg, scopy);
3371 df_insn_rescan (DF_REF_INSN (ref));
3372 }
3373
3374 BITMAP_FREE (conv);
3375 }
3376
3377 /* Convert operand OP in INSN. All register uses
3378 are converted during registers conversion.
3379 Therefore we should just handle memory operands. */
3380
3381 void
3382 scalar_chain::convert_op (rtx *op, rtx_insn *insn)
3383 {
3384 *op = copy_rtx_if_shared (*op);
3385
3386 if (MEM_P (*op))
3387 {
3388 rtx tmp = gen_reg_rtx (DImode);
3389
3390 emit_insn_before (gen_move_insn (tmp, *op), insn);
3391 *op = gen_rtx_SUBREG (V2DImode, tmp, 0);
3392
3393 if (dump_file)
3394 fprintf (dump_file, " Preloading operand for insn %d into r%d\n",
3395 INSN_UID (insn), REGNO (tmp));
3396 }
3397 else
3398 {
3399 gcc_assert (SUBREG_P (*op));
3400 gcc_assert (GET_MODE (*op) == V2DImode);
3401 }
3402 }
3403
3404 /* Convert INSN to vector mode. */
3405
3406 void
3407 scalar_chain::convert_insn (rtx_insn *insn)
3408 {
3409 rtx def_set = single_set (insn);
3410 rtx src = SET_SRC (def_set);
3411 rtx dst = SET_DEST (def_set);
3412 rtx subreg;
3413
3414 if (MEM_P (dst) && !REG_P (src))
3415 {
3416 /* There are no scalar integer instructions and therefore
3417 temporary register usage is required. */
3418 rtx tmp = gen_reg_rtx (DImode);
3419 emit_conversion_insns (gen_move_insn (dst, tmp), insn);
3420 dst = gen_rtx_SUBREG (V2DImode, tmp, 0);
3421 }
3422
3423 switch (GET_CODE (src))
3424 {
3425 case PLUS:
3426 case MINUS:
3427 case IOR:
3428 case XOR:
3429 case AND:
3430 convert_op (&XEXP (src, 0), insn);
3431 convert_op (&XEXP (src, 1), insn);
3432 PUT_MODE (src, V2DImode);
3433 break;
3434
3435 case MEM:
3436 if (!REG_P (dst))
3437 convert_op (&src, insn);
3438 break;
3439
3440 case REG:
3441 break;
3442
3443 case SUBREG:
3444 gcc_assert (GET_MODE (src) == V2DImode);
3445 break;
3446
3447 case COMPARE:
3448 src = SUBREG_REG (XEXP (XEXP (src, 0), 0));
3449
3450 gcc_assert ((REG_P (src) && GET_MODE (src) == DImode)
3451 || (SUBREG_P (src) && GET_MODE (src) == V2DImode));
3452
3453 if (REG_P (src))
3454 subreg = gen_rtx_SUBREG (V2DImode, src, 0);
3455 else
3456 subreg = copy_rtx_if_shared (src);
3457 emit_insn_before (gen_vec_interleave_lowv2di (copy_rtx_if_shared (subreg),
3458 copy_rtx_if_shared (subreg),
3459 copy_rtx_if_shared (subreg)),
3460 insn);
3461 dst = gen_rtx_REG (CCmode, FLAGS_REG);
3462 src = gen_rtx_UNSPEC (CCmode, gen_rtvec (2, copy_rtx_if_shared (src),
3463 copy_rtx_if_shared (src)),
3464 UNSPEC_PTEST);
3465 break;
3466
3467 default:
3468 gcc_unreachable ();
3469 }
3470
3471 SET_SRC (def_set) = src;
3472 SET_DEST (def_set) = dst;
3473
3474 /* Drop possible dead definitions. */
3475 PATTERN (insn) = def_set;
3476
3477 INSN_CODE (insn) = -1;
3478 recog_memoized (insn);
3479 df_insn_rescan (insn);
3480 }
3481
3482 /* Convert whole chain creating required register
3483 conversions and copies. */
3484
3485 int
3486 scalar_chain::convert ()
3487 {
3488 bitmap_iterator bi;
3489 unsigned id;
3490 int converted_insns = 0;
3491
3492 if (!dbg_cnt (stv_conversion))
3493 return 0;
3494
3495 if (dump_file)
3496 fprintf (dump_file, "Converting chain #%d...\n", chain_id);
3497
3498 EXECUTE_IF_SET_IN_BITMAP (defs, 0, id, bi)
3499 convert_reg (id);
3500
3501 EXECUTE_IF_AND_COMPL_IN_BITMAP (defs_conv, defs, 0, id, bi)
3502 make_vector_copies (id);
3503
3504 EXECUTE_IF_SET_IN_BITMAP (insns, 0, id, bi)
3505 {
3506 convert_insn (DF_INSN_UID_GET (id)->insn);
3507 converted_insns++;
3508 }
3509
3510 return converted_insns;
3511 }
3512
3513 /* Main STV pass function. Find and convert scalar
3514 instructions into vector mode when profitable. */
3515
3516 static unsigned int
3517 convert_scalars_to_vector ()
3518 {
3519 basic_block bb;
3520 bitmap candidates;
3521 int converted_insns = 0;
3522
3523 bitmap_obstack_initialize (NULL);
3524 candidates = BITMAP_ALLOC (NULL);
3525
3526 calculate_dominance_info (CDI_DOMINATORS);
3527 df_set_flags (DF_DEFER_INSN_RESCAN);
3528 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
3529 df_md_add_problem ();
3530 df_analyze ();
3531
3532 /* Find all instructions we want to convert into vector mode. */
3533 if (dump_file)
3534 fprintf (dump_file, "Searching for mode convertion candidates...\n");
3535
3536 FOR_EACH_BB_FN (bb, cfun)
3537 {
3538 rtx_insn *insn;
3539 FOR_BB_INSNS (bb, insn)
3540 if (scalar_to_vector_candidate_p (insn))
3541 {
3542 if (dump_file)
3543 fprintf (dump_file, " insn %d is marked as a candidate\n",
3544 INSN_UID (insn));
3545
3546 bitmap_set_bit (candidates, INSN_UID (insn));
3547 }
3548 }
3549
3550 remove_non_convertible_regs (candidates);
3551
3552 if (bitmap_empty_p (candidates))
3553 if (dump_file)
3554 fprintf (dump_file, "There are no candidates for optimization.\n");
3555
3556 while (!bitmap_empty_p (candidates))
3557 {
3558 unsigned uid = bitmap_first_set_bit (candidates);
3559 scalar_chain chain;
3560
3561 /* Find instructions chain we want to convert to vector mode.
3562 Check all uses and definitions to estimate all required
3563 conversions. */
3564 chain.build (candidates, uid);
3565
3566 if (chain.compute_convert_gain () > 0)
3567 converted_insns += chain.convert ();
3568 else
3569 if (dump_file)
3570 fprintf (dump_file, "Chain #%d conversion is not profitable\n",
3571 chain.chain_id);
3572 }
3573
3574 if (dump_file)
3575 fprintf (dump_file, "Total insns converted: %d\n", converted_insns);
3576
3577 BITMAP_FREE (candidates);
3578 bitmap_obstack_release (NULL);
3579 df_process_deferred_rescans ();
3580
3581 /* Conversion means we may have 128bit register spills/fills
3582 which require aligned stack. */
3583 if (converted_insns)
3584 {
3585 if (crtl->stack_alignment_needed < 128)
3586 crtl->stack_alignment_needed = 128;
3587 if (crtl->stack_alignment_estimated < 128)
3588 crtl->stack_alignment_estimated = 128;
3589 }
3590
3591 return 0;
3592 }
3593
3594 namespace {
3595
3596 const pass_data pass_data_insert_vzeroupper =
3597 {
3598 RTL_PASS, /* type */
3599 "vzeroupper", /* name */
3600 OPTGROUP_NONE, /* optinfo_flags */
3601 TV_NONE, /* tv_id */
3602 0, /* properties_required */
3603 0, /* properties_provided */
3604 0, /* properties_destroyed */
3605 0, /* todo_flags_start */
3606 TODO_df_finish, /* todo_flags_finish */
3607 };
3608
3609 class pass_insert_vzeroupper : public rtl_opt_pass
3610 {
3611 public:
3612 pass_insert_vzeroupper(gcc::context *ctxt)
3613 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
3614 {}
3615
3616 /* opt_pass methods: */
3617 virtual bool gate (function *)
3618 {
3619 return TARGET_AVX && !TARGET_AVX512F
3620 && TARGET_VZEROUPPER && flag_expensive_optimizations
3621 && !optimize_size;
3622 }
3623
3624 virtual unsigned int execute (function *)
3625 {
3626 return rest_of_handle_insert_vzeroupper ();
3627 }
3628
3629 }; // class pass_insert_vzeroupper
3630
3631 const pass_data pass_data_stv =
3632 {
3633 RTL_PASS, /* type */
3634 "stv", /* name */
3635 OPTGROUP_NONE, /* optinfo_flags */
3636 TV_NONE, /* tv_id */
3637 0, /* properties_required */
3638 0, /* properties_provided */
3639 0, /* properties_destroyed */
3640 0, /* todo_flags_start */
3641 TODO_df_finish, /* todo_flags_finish */
3642 };
3643
3644 class pass_stv : public rtl_opt_pass
3645 {
3646 public:
3647 pass_stv (gcc::context *ctxt)
3648 : rtl_opt_pass (pass_data_stv, ctxt)
3649 {}
3650
3651 /* opt_pass methods: */
3652 virtual bool gate (function *)
3653 {
3654 return !TARGET_64BIT && TARGET_STV && TARGET_SSE2 && optimize > 1;
3655 }
3656
3657 virtual unsigned int execute (function *)
3658 {
3659 return convert_scalars_to_vector ();
3660 }
3661
3662 }; // class pass_stv
3663
3664 } // anon namespace
3665
3666 rtl_opt_pass *
3667 make_pass_insert_vzeroupper (gcc::context *ctxt)
3668 {
3669 return new pass_insert_vzeroupper (ctxt);
3670 }
3671
3672 rtl_opt_pass *
3673 make_pass_stv (gcc::context *ctxt)
3674 {
3675 return new pass_stv (ctxt);
3676 }
3677
3678 /* Return true if a red-zone is in use. */
3679
3680 static inline bool
3681 ix86_using_red_zone (void)
3682 {
3683 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
3684 }
3685 \f
3686 /* Return a string that documents the current -m options. The caller is
3687 responsible for freeing the string. */
3688
3689 static char *
3690 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
3691 const char *tune, enum fpmath_unit fpmath,
3692 bool add_nl_p)
3693 {
3694 struct ix86_target_opts
3695 {
3696 const char *option; /* option string */
3697 HOST_WIDE_INT mask; /* isa mask options */
3698 };
3699
3700 /* This table is ordered so that options like -msse4.2 that imply
3701 preceding options while match those first. */
3702 static struct ix86_target_opts isa_opts[] =
3703 {
3704 { "-mfma4", OPTION_MASK_ISA_FMA4 },
3705 { "-mfma", OPTION_MASK_ISA_FMA },
3706 { "-mxop", OPTION_MASK_ISA_XOP },
3707 { "-mlwp", OPTION_MASK_ISA_LWP },
3708 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
3709 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
3710 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
3711 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
3712 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
3713 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
3714 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
3715 { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA },
3716 { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI },
3717 { "-msse4a", OPTION_MASK_ISA_SSE4A },
3718 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
3719 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
3720 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
3721 { "-msse3", OPTION_MASK_ISA_SSE3 },
3722 { "-msse2", OPTION_MASK_ISA_SSE2 },
3723 { "-msse", OPTION_MASK_ISA_SSE },
3724 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
3725 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
3726 { "-mmmx", OPTION_MASK_ISA_MMX },
3727 { "-mabm", OPTION_MASK_ISA_ABM },
3728 { "-mbmi", OPTION_MASK_ISA_BMI },
3729 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
3730 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
3731 { "-mhle", OPTION_MASK_ISA_HLE },
3732 { "-mfxsr", OPTION_MASK_ISA_FXSR },
3733 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
3734 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
3735 { "-madx", OPTION_MASK_ISA_ADX },
3736 { "-mtbm", OPTION_MASK_ISA_TBM },
3737 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
3738 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
3739 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
3740 { "-maes", OPTION_MASK_ISA_AES },
3741 { "-msha", OPTION_MASK_ISA_SHA },
3742 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
3743 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
3744 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
3745 { "-mf16c", OPTION_MASK_ISA_F16C },
3746 { "-mrtm", OPTION_MASK_ISA_RTM },
3747 { "-mxsave", OPTION_MASK_ISA_XSAVE },
3748 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
3749 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
3750 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
3751 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
3752 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
3753 { "-mmpx", OPTION_MASK_ISA_MPX },
3754 { "-mclwb", OPTION_MASK_ISA_CLWB },
3755 { "-mpcommit", OPTION_MASK_ISA_PCOMMIT },
3756 { "-mmwaitx", OPTION_MASK_ISA_MWAITX },
3757 { "-mclzero", OPTION_MASK_ISA_CLZERO },
3758 { "-mpku", OPTION_MASK_ISA_PKU },
3759 };
3760
3761 /* Flag options. */
3762 static struct ix86_target_opts flag_opts[] =
3763 {
3764 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
3765 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
3766 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
3767 { "-m80387", MASK_80387 },
3768 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
3769 { "-malign-double", MASK_ALIGN_DOUBLE },
3770 { "-mcld", MASK_CLD },
3771 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
3772 { "-mieee-fp", MASK_IEEE_FP },
3773 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
3774 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
3775 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
3776 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
3777 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
3778 { "-mno-push-args", MASK_NO_PUSH_ARGS },
3779 { "-mno-red-zone", MASK_NO_RED_ZONE },
3780 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
3781 { "-mrecip", MASK_RECIP },
3782 { "-mrtd", MASK_RTD },
3783 { "-msseregparm", MASK_SSEREGPARM },
3784 { "-mstack-arg-probe", MASK_STACK_PROBE },
3785 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
3786 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
3787 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
3788 { "-mvzeroupper", MASK_VZEROUPPER },
3789 { "-mstv", MASK_STV},
3790 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
3791 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
3792 { "-mprefer-avx128", MASK_PREFER_AVX128},
3793 };
3794
3795 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
3796
3797 char isa_other[40];
3798 char target_other[40];
3799 unsigned num = 0;
3800 unsigned i, j;
3801 char *ret;
3802 char *ptr;
3803 size_t len;
3804 size_t line_len;
3805 size_t sep_len;
3806 const char *abi;
3807
3808 memset (opts, '\0', sizeof (opts));
3809
3810 /* Add -march= option. */
3811 if (arch)
3812 {
3813 opts[num][0] = "-march=";
3814 opts[num++][1] = arch;
3815 }
3816
3817 /* Add -mtune= option. */
3818 if (tune)
3819 {
3820 opts[num][0] = "-mtune=";
3821 opts[num++][1] = tune;
3822 }
3823
3824 /* Add -m32/-m64/-mx32. */
3825 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
3826 {
3827 if ((isa & OPTION_MASK_ABI_64) != 0)
3828 abi = "-m64";
3829 else
3830 abi = "-mx32";
3831 isa &= ~ (OPTION_MASK_ISA_64BIT
3832 | OPTION_MASK_ABI_64
3833 | OPTION_MASK_ABI_X32);
3834 }
3835 else
3836 abi = "-m32";
3837 opts[num++][0] = abi;
3838
3839 /* Pick out the options in isa options. */
3840 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
3841 {
3842 if ((isa & isa_opts[i].mask) != 0)
3843 {
3844 opts[num++][0] = isa_opts[i].option;
3845 isa &= ~ isa_opts[i].mask;
3846 }
3847 }
3848
3849 if (isa && add_nl_p)
3850 {
3851 opts[num++][0] = isa_other;
3852 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
3853 isa);
3854 }
3855
3856 /* Add flag options. */
3857 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
3858 {
3859 if ((flags & flag_opts[i].mask) != 0)
3860 {
3861 opts[num++][0] = flag_opts[i].option;
3862 flags &= ~ flag_opts[i].mask;
3863 }
3864 }
3865
3866 if (flags && add_nl_p)
3867 {
3868 opts[num++][0] = target_other;
3869 sprintf (target_other, "(other flags: %#x)", flags);
3870 }
3871
3872 /* Add -fpmath= option. */
3873 if (fpmath)
3874 {
3875 opts[num][0] = "-mfpmath=";
3876 switch ((int) fpmath)
3877 {
3878 case FPMATH_387:
3879 opts[num++][1] = "387";
3880 break;
3881
3882 case FPMATH_SSE:
3883 opts[num++][1] = "sse";
3884 break;
3885
3886 case FPMATH_387 | FPMATH_SSE:
3887 opts[num++][1] = "sse+387";
3888 break;
3889
3890 default:
3891 gcc_unreachable ();
3892 }
3893 }
3894
3895 /* Any options? */
3896 if (num == 0)
3897 return NULL;
3898
3899 gcc_assert (num < ARRAY_SIZE (opts));
3900
3901 /* Size the string. */
3902 len = 0;
3903 sep_len = (add_nl_p) ? 3 : 1;
3904 for (i = 0; i < num; i++)
3905 {
3906 len += sep_len;
3907 for (j = 0; j < 2; j++)
3908 if (opts[i][j])
3909 len += strlen (opts[i][j]);
3910 }
3911
3912 /* Build the string. */
3913 ret = ptr = (char *) xmalloc (len);
3914 line_len = 0;
3915
3916 for (i = 0; i < num; i++)
3917 {
3918 size_t len2[2];
3919
3920 for (j = 0; j < 2; j++)
3921 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
3922
3923 if (i != 0)
3924 {
3925 *ptr++ = ' ';
3926 line_len++;
3927
3928 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
3929 {
3930 *ptr++ = '\\';
3931 *ptr++ = '\n';
3932 line_len = 0;
3933 }
3934 }
3935
3936 for (j = 0; j < 2; j++)
3937 if (opts[i][j])
3938 {
3939 memcpy (ptr, opts[i][j], len2[j]);
3940 ptr += len2[j];
3941 line_len += len2[j];
3942 }
3943 }
3944
3945 *ptr = '\0';
3946 gcc_assert (ret + len >= ptr);
3947
3948 return ret;
3949 }
3950
3951 /* Return true, if profiling code should be emitted before
3952 prologue. Otherwise it returns false.
3953 Note: For x86 with "hotfix" it is sorried. */
3954 static bool
3955 ix86_profile_before_prologue (void)
3956 {
3957 return flag_fentry != 0;
3958 }
3959
3960 /* Function that is callable from the debugger to print the current
3961 options. */
3962 void ATTRIBUTE_UNUSED
3963 ix86_debug_options (void)
3964 {
3965 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
3966 ix86_arch_string, ix86_tune_string,
3967 ix86_fpmath, true);
3968
3969 if (opts)
3970 {
3971 fprintf (stderr, "%s\n\n", opts);
3972 free (opts);
3973 }
3974 else
3975 fputs ("<no options>\n\n", stderr);
3976
3977 return;
3978 }
3979
3980 /* Return true if T is one of the bytes we should avoid with
3981 -fmitigate-rop. */
3982
3983 static bool
3984 ix86_rop_should_change_byte_p (int t)
3985 {
3986 return t == 0xc2 || t == 0xc3 || t == 0xca || t == 0xcb;
3987 }
3988
3989 static const char *stringop_alg_names[] = {
3990 #define DEF_ENUM
3991 #define DEF_ALG(alg, name) #name,
3992 #include "stringop.def"
3993 #undef DEF_ENUM
3994 #undef DEF_ALG
3995 };
3996
3997 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
3998 The string is of the following form (or comma separated list of it):
3999
4000 strategy_alg:max_size:[align|noalign]
4001
4002 where the full size range for the strategy is either [0, max_size] or
4003 [min_size, max_size], in which min_size is the max_size + 1 of the
4004 preceding range. The last size range must have max_size == -1.
4005
4006 Examples:
4007
4008 1.
4009 -mmemcpy-strategy=libcall:-1:noalign
4010
4011 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
4012
4013
4014 2.
4015 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
4016
4017 This is to tell the compiler to use the following strategy for memset
4018 1) when the expected size is between [1, 16], use rep_8byte strategy;
4019 2) when the size is between [17, 2048], use vector_loop;
4020 3) when the size is > 2048, use libcall. */
4021
4022 struct stringop_size_range
4023 {
4024 int max;
4025 stringop_alg alg;
4026 bool noalign;
4027 };
4028
4029 static void
4030 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
4031 {
4032 const struct stringop_algs *default_algs;
4033 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
4034 char *curr_range_str, *next_range_str;
4035 int i = 0, n = 0;
4036
4037 if (is_memset)
4038 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
4039 else
4040 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
4041
4042 curr_range_str = strategy_str;
4043
4044 do
4045 {
4046 int maxs;
4047 char alg_name[128];
4048 char align[16];
4049 next_range_str = strchr (curr_range_str, ',');
4050 if (next_range_str)
4051 *next_range_str++ = '\0';
4052
4053 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
4054 alg_name, &maxs, align))
4055 {
4056 error ("wrong arg %s to option %s", curr_range_str,
4057 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4058 return;
4059 }
4060
4061 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
4062 {
4063 error ("size ranges of option %s should be increasing",
4064 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4065 return;
4066 }
4067
4068 for (i = 0; i < last_alg; i++)
4069 if (!strcmp (alg_name, stringop_alg_names[i]))
4070 break;
4071
4072 if (i == last_alg)
4073 {
4074 error ("wrong stringop strategy name %s specified for option %s",
4075 alg_name,
4076 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4077 return;
4078 }
4079
4080 if ((stringop_alg) i == rep_prefix_8_byte
4081 && !TARGET_64BIT)
4082 {
4083 /* rep; movq isn't available in 32-bit code. */
4084 error ("stringop strategy name %s specified for option %s "
4085 "not supported for 32-bit code",
4086 alg_name,
4087 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4088 return;
4089 }
4090
4091 input_ranges[n].max = maxs;
4092 input_ranges[n].alg = (stringop_alg) i;
4093 if (!strcmp (align, "align"))
4094 input_ranges[n].noalign = false;
4095 else if (!strcmp (align, "noalign"))
4096 input_ranges[n].noalign = true;
4097 else
4098 {
4099 error ("unknown alignment %s specified for option %s",
4100 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4101 return;
4102 }
4103 n++;
4104 curr_range_str = next_range_str;
4105 }
4106 while (curr_range_str);
4107
4108 if (input_ranges[n - 1].max != -1)
4109 {
4110 error ("the max value for the last size range should be -1"
4111 " for option %s",
4112 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4113 return;
4114 }
4115
4116 if (n > MAX_STRINGOP_ALGS)
4117 {
4118 error ("too many size ranges specified in option %s",
4119 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4120 return;
4121 }
4122
4123 /* Now override the default algs array. */
4124 for (i = 0; i < n; i++)
4125 {
4126 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
4127 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
4128 = input_ranges[i].alg;
4129 *const_cast<int *>(&default_algs->size[i].noalign)
4130 = input_ranges[i].noalign;
4131 }
4132 }
4133
4134 \f
4135 /* parse -mtune-ctrl= option. When DUMP is true,
4136 print the features that are explicitly set. */
4137
4138 static void
4139 parse_mtune_ctrl_str (bool dump)
4140 {
4141 if (!ix86_tune_ctrl_string)
4142 return;
4143
4144 char *next_feature_string = NULL;
4145 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
4146 char *orig = curr_feature_string;
4147 int i;
4148 do
4149 {
4150 bool clear = false;
4151
4152 next_feature_string = strchr (curr_feature_string, ',');
4153 if (next_feature_string)
4154 *next_feature_string++ = '\0';
4155 if (*curr_feature_string == '^')
4156 {
4157 curr_feature_string++;
4158 clear = true;
4159 }
4160 for (i = 0; i < X86_TUNE_LAST; i++)
4161 {
4162 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
4163 {
4164 ix86_tune_features[i] = !clear;
4165 if (dump)
4166 fprintf (stderr, "Explicitly %s feature %s\n",
4167 clear ? "clear" : "set", ix86_tune_feature_names[i]);
4168 break;
4169 }
4170 }
4171 if (i == X86_TUNE_LAST)
4172 error ("Unknown parameter to option -mtune-ctrl: %s",
4173 clear ? curr_feature_string - 1 : curr_feature_string);
4174 curr_feature_string = next_feature_string;
4175 }
4176 while (curr_feature_string);
4177 free (orig);
4178 }
4179
4180 /* Helper function to set ix86_tune_features. IX86_TUNE is the
4181 processor type. */
4182
4183 static void
4184 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
4185 {
4186 unsigned int ix86_tune_mask = 1u << ix86_tune;
4187 int i;
4188
4189 for (i = 0; i < X86_TUNE_LAST; ++i)
4190 {
4191 if (ix86_tune_no_default)
4192 ix86_tune_features[i] = 0;
4193 else
4194 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
4195 }
4196
4197 if (dump)
4198 {
4199 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
4200 for (i = 0; i < X86_TUNE_LAST; i++)
4201 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
4202 ix86_tune_features[i] ? "on" : "off");
4203 }
4204
4205 parse_mtune_ctrl_str (dump);
4206 }
4207
4208
4209 /* Default align_* from the processor table. */
4210
4211 static void
4212 ix86_default_align (struct gcc_options *opts)
4213 {
4214 if (opts->x_align_loops == 0)
4215 {
4216 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
4217 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
4218 }
4219 if (opts->x_align_jumps == 0)
4220 {
4221 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
4222 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
4223 }
4224 if (opts->x_align_functions == 0)
4225 {
4226 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
4227 }
4228 }
4229
4230 /* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook. */
4231
4232 static void
4233 ix86_override_options_after_change (void)
4234 {
4235 ix86_default_align (&global_options);
4236 }
4237
4238 /* Override various settings based on options. If MAIN_ARGS_P, the
4239 options are from the command line, otherwise they are from
4240 attributes. */
4241
4242 static void
4243 ix86_option_override_internal (bool main_args_p,
4244 struct gcc_options *opts,
4245 struct gcc_options *opts_set)
4246 {
4247 int i;
4248 unsigned int ix86_arch_mask;
4249 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
4250 const char *prefix;
4251 const char *suffix;
4252 const char *sw;
4253
4254 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
4255 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
4256 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
4257 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
4258 #define PTA_AES (HOST_WIDE_INT_1 << 4)
4259 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
4260 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
4261 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
4262 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
4263 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
4264 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
4265 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
4266 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
4267 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
4268 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
4269 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
4270 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
4271 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
4272 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
4273 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
4274 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
4275 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
4276 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
4277 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
4278 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
4279 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
4280 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
4281 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
4282 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
4283 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
4284 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
4285 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
4286 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
4287 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
4288 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
4289 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
4290 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
4291 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
4292 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
4293 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
4294 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
4295 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
4296 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
4297 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
4298 #define PTA_MPX (HOST_WIDE_INT_1 << 44)
4299 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
4300 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
4301 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
4302 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
4303 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
4304 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
4305 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
4306 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
4307 #define PTA_AVX512IFMA (HOST_WIDE_INT_1 << 53)
4308 #define PTA_AVX512VBMI (HOST_WIDE_INT_1 << 54)
4309 #define PTA_CLWB (HOST_WIDE_INT_1 << 55)
4310 #define PTA_PCOMMIT (HOST_WIDE_INT_1 << 56)
4311 #define PTA_MWAITX (HOST_WIDE_INT_1 << 57)
4312 #define PTA_CLZERO (HOST_WIDE_INT_1 << 58)
4313 #define PTA_NO_80387 (HOST_WIDE_INT_1 << 59)
4314 #define PTA_PKU (HOST_WIDE_INT_1 << 60)
4315
4316 #define PTA_CORE2 \
4317 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
4318 | PTA_CX16 | PTA_FXSR)
4319 #define PTA_NEHALEM \
4320 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
4321 #define PTA_WESTMERE \
4322 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
4323 #define PTA_SANDYBRIDGE \
4324 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
4325 #define PTA_IVYBRIDGE \
4326 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
4327 #define PTA_HASWELL \
4328 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
4329 | PTA_FMA | PTA_MOVBE | PTA_HLE)
4330 #define PTA_BROADWELL \
4331 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
4332 #define PTA_SKYLAKE \
4333 (PTA_BROADWELL | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES)
4334 #define PTA_SKYLAKE_AVX512 \
4335 (PTA_SKYLAKE | PTA_AVX512F | PTA_AVX512CD | PTA_AVX512VL \
4336 | PTA_AVX512BW | PTA_AVX512DQ | PTA_PKU)
4337 #define PTA_KNL \
4338 (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD)
4339 #define PTA_BONNELL \
4340 (PTA_CORE2 | PTA_MOVBE)
4341 #define PTA_SILVERMONT \
4342 (PTA_WESTMERE | PTA_MOVBE)
4343
4344 /* if this reaches 64, need to widen struct pta flags below */
4345
4346 static struct pta
4347 {
4348 const char *const name; /* processor name or nickname. */
4349 const enum processor_type processor;
4350 const enum attr_cpu schedule;
4351 const unsigned HOST_WIDE_INT flags;
4352 }
4353 const processor_alias_table[] =
4354 {
4355 {"i386", PROCESSOR_I386, CPU_NONE, 0},
4356 {"i486", PROCESSOR_I486, CPU_NONE, 0},
4357 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
4358 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
4359 {"lakemont", PROCESSOR_LAKEMONT, CPU_PENTIUM, PTA_NO_80387},
4360 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
4361 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
4362 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
4363 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
4364 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
4365 PTA_MMX | PTA_SSE | PTA_FXSR},
4366 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
4367 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
4368 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
4369 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
4370 PTA_MMX | PTA_SSE | PTA_FXSR},
4371 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
4372 PTA_MMX | PTA_SSE | PTA_FXSR},
4373 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
4374 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
4375 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
4376 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
4377 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
4378 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
4379 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
4380 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
4381 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
4382 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4383 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
4384 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
4385 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
4386 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
4387 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
4388 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
4389 PTA_SANDYBRIDGE},
4390 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
4391 PTA_SANDYBRIDGE},
4392 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
4393 PTA_IVYBRIDGE},
4394 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
4395 PTA_IVYBRIDGE},
4396 {"haswell", PROCESSOR_HASWELL, CPU_HASWELL, PTA_HASWELL},
4397 {"core-avx2", PROCESSOR_HASWELL, CPU_HASWELL, PTA_HASWELL},
4398 {"broadwell", PROCESSOR_HASWELL, CPU_HASWELL, PTA_BROADWELL},
4399 {"skylake", PROCESSOR_HASWELL, CPU_HASWELL, PTA_SKYLAKE},
4400 {"skylake-avx512", PROCESSOR_HASWELL, CPU_HASWELL, PTA_SKYLAKE_AVX512},
4401 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
4402 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
4403 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
4404 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
4405 {"knl", PROCESSOR_KNL, CPU_SLM, PTA_KNL},
4406 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
4407 {"geode", PROCESSOR_GEODE, CPU_GEODE,
4408 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
4409 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
4410 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
4411 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
4412 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
4413 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
4414 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
4415 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
4416 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
4417 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
4418 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
4419 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
4420 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
4421 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
4422 {"x86-64", PROCESSOR_K8, CPU_K8,
4423 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
4424 {"k8", PROCESSOR_K8, CPU_K8,
4425 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4426 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4427 {"k8-sse3", PROCESSOR_K8, CPU_K8,
4428 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4429 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4430 {"opteron", PROCESSOR_K8, CPU_K8,
4431 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4432 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4433 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
4434 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4435 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4436 {"athlon64", PROCESSOR_K8, CPU_K8,
4437 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4438 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4439 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
4440 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4441 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4442 {"athlon-fx", PROCESSOR_K8, CPU_K8,
4443 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4444 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4445 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
4446 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
4447 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
4448 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
4449 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
4450 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
4451 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
4452 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4453 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
4454 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
4455 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
4456 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
4457 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4458 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
4459 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
4460 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
4461 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
4462 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
4463 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4464 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
4465 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
4466 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
4467 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
4468 | PTA_XSAVEOPT | PTA_FSGSBASE},
4469 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
4470 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4471 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
4472 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
4473 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
4474 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
4475 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
4476 | PTA_MOVBE | PTA_MWAITX},
4477 {"znver1", PROCESSOR_ZNVER1, CPU_ZNVER1,
4478 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4479 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
4480 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
4481 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_PRFCHW
4482 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE
4483 | PTA_RDRND | PTA_MOVBE | PTA_MWAITX | PTA_ADX | PTA_RDSEED
4484 | PTA_CLZERO | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES
4485 | PTA_SHA | PTA_LZCNT | PTA_POPCNT},
4486 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
4487 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4488 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
4489 | PTA_FXSR | PTA_XSAVE},
4490 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
4491 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4492 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
4493 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
4494 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
4495 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
4496
4497 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
4498 PTA_64BIT
4499 | PTA_HLE /* flags are only used for -march switch. */ },
4500 };
4501
4502 /* -mrecip options. */
4503 static struct
4504 {
4505 const char *string; /* option name */
4506 unsigned int mask; /* mask bits to set */
4507 }
4508 const recip_options[] =
4509 {
4510 { "all", RECIP_MASK_ALL },
4511 { "none", RECIP_MASK_NONE },
4512 { "div", RECIP_MASK_DIV },
4513 { "sqrt", RECIP_MASK_SQRT },
4514 { "vec-div", RECIP_MASK_VEC_DIV },
4515 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
4516 };
4517
4518 int const pta_size = ARRAY_SIZE (processor_alias_table);
4519
4520 /* Set up prefix/suffix so the error messages refer to either the command
4521 line argument, or the attribute(target). */
4522 if (main_args_p)
4523 {
4524 prefix = "-m";
4525 suffix = "";
4526 sw = "switch";
4527 }
4528 else
4529 {
4530 prefix = "option(\"";
4531 suffix = "\")";
4532 sw = "attribute";
4533 }
4534
4535 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
4536 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
4537 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
4538 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
4539 #ifdef TARGET_BI_ARCH
4540 else
4541 {
4542 #if TARGET_BI_ARCH == 1
4543 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
4544 is on and OPTION_MASK_ABI_X32 is off. We turn off
4545 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
4546 -mx32. */
4547 if (TARGET_X32_P (opts->x_ix86_isa_flags))
4548 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
4549 #else
4550 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
4551 on and OPTION_MASK_ABI_64 is off. We turn off
4552 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
4553 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
4554 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
4555 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
4556 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
4557 #endif
4558 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
4559 && TARGET_IAMCU_P (opts->x_target_flags))
4560 sorry ("Intel MCU psABI isn%'t supported in %s mode",
4561 TARGET_X32_P (opts->x_ix86_isa_flags) ? "x32" : "64-bit");
4562 }
4563 #endif
4564
4565 if (TARGET_X32_P (opts->x_ix86_isa_flags))
4566 {
4567 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
4568 OPTION_MASK_ABI_64 for TARGET_X32. */
4569 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
4570 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
4571 }
4572 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
4573 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
4574 | OPTION_MASK_ABI_X32
4575 | OPTION_MASK_ABI_64);
4576 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
4577 {
4578 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
4579 OPTION_MASK_ABI_X32 for TARGET_LP64. */
4580 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
4581 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
4582 }
4583
4584 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4585 SUBTARGET_OVERRIDE_OPTIONS;
4586 #endif
4587
4588 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4589 SUBSUBTARGET_OVERRIDE_OPTIONS;
4590 #endif
4591
4592 /* -fPIC is the default for x86_64. */
4593 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
4594 opts->x_flag_pic = 2;
4595
4596 /* Need to check -mtune=generic first. */
4597 if (opts->x_ix86_tune_string)
4598 {
4599 /* As special support for cross compilers we read -mtune=native
4600 as -mtune=generic. With native compilers we won't see the
4601 -mtune=native, as it was changed by the driver. */
4602 if (!strcmp (opts->x_ix86_tune_string, "native"))
4603 {
4604 opts->x_ix86_tune_string = "generic";
4605 }
4606 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
4607 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
4608 "%stune=k8%s or %stune=generic%s instead as appropriate",
4609 prefix, suffix, prefix, suffix, prefix, suffix);
4610 }
4611 else
4612 {
4613 if (opts->x_ix86_arch_string)
4614 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
4615 if (!opts->x_ix86_tune_string)
4616 {
4617 opts->x_ix86_tune_string
4618 = processor_target_table[TARGET_CPU_DEFAULT].name;
4619 ix86_tune_defaulted = 1;
4620 }
4621
4622 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
4623 or defaulted. We need to use a sensible tune option. */
4624 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
4625 {
4626 opts->x_ix86_tune_string = "generic";
4627 }
4628 }
4629
4630 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
4631 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
4632 {
4633 /* rep; movq isn't available in 32-bit code. */
4634 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
4635 opts->x_ix86_stringop_alg = no_stringop;
4636 }
4637
4638 if (!opts->x_ix86_arch_string)
4639 opts->x_ix86_arch_string
4640 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
4641 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
4642 else
4643 ix86_arch_specified = 1;
4644
4645 if (opts_set->x_ix86_pmode)
4646 {
4647 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
4648 && opts->x_ix86_pmode == PMODE_SI)
4649 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4650 && opts->x_ix86_pmode == PMODE_DI))
4651 error ("address mode %qs not supported in the %s bit mode",
4652 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
4653 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
4654 }
4655 else
4656 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
4657 ? PMODE_DI : PMODE_SI;
4658
4659 if (!opts_set->x_ix86_abi)
4660 opts->x_ix86_abi = DEFAULT_ABI;
4661
4662 /* For targets using ms ABI enable ms-extensions, if not
4663 explicit turned off. For non-ms ABI we turn off this
4664 option. */
4665 if (!opts_set->x_flag_ms_extensions)
4666 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
4667
4668 if (opts_set->x_ix86_cmodel)
4669 {
4670 switch (opts->x_ix86_cmodel)
4671 {
4672 case CM_SMALL:
4673 case CM_SMALL_PIC:
4674 if (opts->x_flag_pic)
4675 opts->x_ix86_cmodel = CM_SMALL_PIC;
4676 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4677 error ("code model %qs not supported in the %s bit mode",
4678 "small", "32");
4679 break;
4680
4681 case CM_MEDIUM:
4682 case CM_MEDIUM_PIC:
4683 if (opts->x_flag_pic)
4684 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
4685 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4686 error ("code model %qs not supported in the %s bit mode",
4687 "medium", "32");
4688 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
4689 error ("code model %qs not supported in x32 mode",
4690 "medium");
4691 break;
4692
4693 case CM_LARGE:
4694 case CM_LARGE_PIC:
4695 if (opts->x_flag_pic)
4696 opts->x_ix86_cmodel = CM_LARGE_PIC;
4697 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4698 error ("code model %qs not supported in the %s bit mode",
4699 "large", "32");
4700 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
4701 error ("code model %qs not supported in x32 mode",
4702 "large");
4703 break;
4704
4705 case CM_32:
4706 if (opts->x_flag_pic)
4707 error ("code model %s does not support PIC mode", "32");
4708 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4709 error ("code model %qs not supported in the %s bit mode",
4710 "32", "64");
4711 break;
4712
4713 case CM_KERNEL:
4714 if (opts->x_flag_pic)
4715 {
4716 error ("code model %s does not support PIC mode", "kernel");
4717 opts->x_ix86_cmodel = CM_32;
4718 }
4719 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4720 error ("code model %qs not supported in the %s bit mode",
4721 "kernel", "32");
4722 break;
4723
4724 default:
4725 gcc_unreachable ();
4726 }
4727 }
4728 else
4729 {
4730 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
4731 use of rip-relative addressing. This eliminates fixups that
4732 would otherwise be needed if this object is to be placed in a
4733 DLL, and is essentially just as efficient as direct addressing. */
4734 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
4735 && (TARGET_RDOS || TARGET_PECOFF))
4736 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
4737 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4738 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
4739 else
4740 opts->x_ix86_cmodel = CM_32;
4741 }
4742 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
4743 {
4744 error ("-masm=intel not supported in this configuration");
4745 opts->x_ix86_asm_dialect = ASM_ATT;
4746 }
4747 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
4748 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
4749 sorry ("%i-bit mode not compiled in",
4750 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
4751
4752 for (i = 0; i < pta_size; i++)
4753 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
4754 {
4755 ix86_schedule = processor_alias_table[i].schedule;
4756 ix86_arch = processor_alias_table[i].processor;
4757 /* Default cpu tuning to the architecture. */
4758 ix86_tune = ix86_arch;
4759
4760 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
4761 && !(processor_alias_table[i].flags & PTA_64BIT))
4762 error ("CPU you selected does not support x86-64 "
4763 "instruction set");
4764
4765 if (processor_alias_table[i].flags & PTA_MMX
4766 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
4767 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
4768 if (processor_alias_table[i].flags & PTA_3DNOW
4769 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
4770 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
4771 if (processor_alias_table[i].flags & PTA_3DNOW_A
4772 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
4773 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
4774 if (processor_alias_table[i].flags & PTA_SSE
4775 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
4776 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
4777 if (processor_alias_table[i].flags & PTA_SSE2
4778 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
4779 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
4780 if (processor_alias_table[i].flags & PTA_SSE3
4781 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
4782 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
4783 if (processor_alias_table[i].flags & PTA_SSSE3
4784 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
4785 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
4786 if (processor_alias_table[i].flags & PTA_SSE4_1
4787 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
4788 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
4789 if (processor_alias_table[i].flags & PTA_SSE4_2
4790 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
4791 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
4792 if (processor_alias_table[i].flags & PTA_AVX
4793 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
4794 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
4795 if (processor_alias_table[i].flags & PTA_AVX2
4796 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
4797 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
4798 if (processor_alias_table[i].flags & PTA_FMA
4799 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
4800 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
4801 if (processor_alias_table[i].flags & PTA_SSE4A
4802 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
4803 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
4804 if (processor_alias_table[i].flags & PTA_FMA4
4805 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
4806 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
4807 if (processor_alias_table[i].flags & PTA_XOP
4808 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
4809 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
4810 if (processor_alias_table[i].flags & PTA_LWP
4811 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
4812 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
4813 if (processor_alias_table[i].flags & PTA_ABM
4814 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
4815 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
4816 if (processor_alias_table[i].flags & PTA_BMI
4817 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
4818 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
4819 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
4820 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
4821 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
4822 if (processor_alias_table[i].flags & PTA_TBM
4823 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
4824 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
4825 if (processor_alias_table[i].flags & PTA_BMI2
4826 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
4827 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
4828 if (processor_alias_table[i].flags & PTA_CX16
4829 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
4830 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
4831 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
4832 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
4833 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
4834 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
4835 && (processor_alias_table[i].flags & PTA_NO_SAHF))
4836 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
4837 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
4838 if (processor_alias_table[i].flags & PTA_MOVBE
4839 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
4840 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
4841 if (processor_alias_table[i].flags & PTA_AES
4842 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
4843 ix86_isa_flags |= OPTION_MASK_ISA_AES;
4844 if (processor_alias_table[i].flags & PTA_SHA
4845 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
4846 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
4847 if (processor_alias_table[i].flags & PTA_PCLMUL
4848 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
4849 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
4850 if (processor_alias_table[i].flags & PTA_FSGSBASE
4851 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
4852 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
4853 if (processor_alias_table[i].flags & PTA_RDRND
4854 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
4855 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
4856 if (processor_alias_table[i].flags & PTA_F16C
4857 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
4858 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
4859 if (processor_alias_table[i].flags & PTA_RTM
4860 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
4861 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
4862 if (processor_alias_table[i].flags & PTA_HLE
4863 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
4864 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
4865 if (processor_alias_table[i].flags & PTA_PRFCHW
4866 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
4867 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
4868 if (processor_alias_table[i].flags & PTA_RDSEED
4869 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
4870 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
4871 if (processor_alias_table[i].flags & PTA_ADX
4872 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
4873 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
4874 if (processor_alias_table[i].flags & PTA_FXSR
4875 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
4876 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
4877 if (processor_alias_table[i].flags & PTA_XSAVE
4878 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
4879 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
4880 if (processor_alias_table[i].flags & PTA_XSAVEOPT
4881 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
4882 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
4883 if (processor_alias_table[i].flags & PTA_AVX512F
4884 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
4885 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
4886 if (processor_alias_table[i].flags & PTA_AVX512ER
4887 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
4888 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
4889 if (processor_alias_table[i].flags & PTA_AVX512PF
4890 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
4891 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
4892 if (processor_alias_table[i].flags & PTA_AVX512CD
4893 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
4894 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
4895 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
4896 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
4897 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
4898 if (processor_alias_table[i].flags & PTA_PCOMMIT
4899 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCOMMIT))
4900 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCOMMIT;
4901 if (processor_alias_table[i].flags & PTA_CLWB
4902 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB))
4903 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB;
4904 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
4905 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
4906 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
4907 if (processor_alias_table[i].flags & PTA_CLZERO
4908 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLZERO))
4909 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLZERO;
4910 if (processor_alias_table[i].flags & PTA_XSAVEC
4911 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
4912 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
4913 if (processor_alias_table[i].flags & PTA_XSAVES
4914 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
4915 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
4916 if (processor_alias_table[i].flags & PTA_AVX512DQ
4917 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
4918 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
4919 if (processor_alias_table[i].flags & PTA_AVX512BW
4920 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
4921 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
4922 if (processor_alias_table[i].flags & PTA_AVX512VL
4923 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
4924 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
4925 if (processor_alias_table[i].flags & PTA_MPX
4926 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MPX))
4927 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MPX;
4928 if (processor_alias_table[i].flags & PTA_AVX512VBMI
4929 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI))
4930 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI;
4931 if (processor_alias_table[i].flags & PTA_AVX512IFMA
4932 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA))
4933 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA;
4934 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
4935 x86_prefetch_sse = true;
4936 if (processor_alias_table[i].flags & PTA_MWAITX
4937 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MWAITX))
4938 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MWAITX;
4939 if (processor_alias_table[i].flags & PTA_PKU
4940 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PKU))
4941 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PKU;
4942
4943 if (!(opts_set->x_target_flags & MASK_80387))
4944 {
4945 if (processor_alias_table[i].flags & PTA_NO_80387)
4946 opts->x_target_flags &= ~MASK_80387;
4947 else
4948 opts->x_target_flags |= MASK_80387;
4949 }
4950 break;
4951 }
4952
4953 if (TARGET_X32 && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_MPX))
4954 error ("Intel MPX does not support x32");
4955
4956 if (TARGET_X32 && (ix86_isa_flags & OPTION_MASK_ISA_MPX))
4957 error ("Intel MPX does not support x32");
4958
4959 if (!strcmp (opts->x_ix86_arch_string, "generic"))
4960 error ("generic CPU can be used only for %stune=%s %s",
4961 prefix, suffix, sw);
4962 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
4963 error ("intel CPU can be used only for %stune=%s %s",
4964 prefix, suffix, sw);
4965 else if (i == pta_size)
4966 error ("bad value (%s) for %sarch=%s %s",
4967 opts->x_ix86_arch_string, prefix, suffix, sw);
4968
4969 ix86_arch_mask = 1u << ix86_arch;
4970 for (i = 0; i < X86_ARCH_LAST; ++i)
4971 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4972
4973 for (i = 0; i < pta_size; i++)
4974 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
4975 {
4976 ix86_schedule = processor_alias_table[i].schedule;
4977 ix86_tune = processor_alias_table[i].processor;
4978 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4979 {
4980 if (!(processor_alias_table[i].flags & PTA_64BIT))
4981 {
4982 if (ix86_tune_defaulted)
4983 {
4984 opts->x_ix86_tune_string = "x86-64";
4985 for (i = 0; i < pta_size; i++)
4986 if (! strcmp (opts->x_ix86_tune_string,
4987 processor_alias_table[i].name))
4988 break;
4989 ix86_schedule = processor_alias_table[i].schedule;
4990 ix86_tune = processor_alias_table[i].processor;
4991 }
4992 else
4993 error ("CPU you selected does not support x86-64 "
4994 "instruction set");
4995 }
4996 }
4997 /* Intel CPUs have always interpreted SSE prefetch instructions as
4998 NOPs; so, we can enable SSE prefetch instructions even when
4999 -mtune (rather than -march) points us to a processor that has them.
5000 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
5001 higher processors. */
5002 if (TARGET_CMOV
5003 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
5004 x86_prefetch_sse = true;
5005 break;
5006 }
5007
5008 if (ix86_tune_specified && i == pta_size)
5009 error ("bad value (%s) for %stune=%s %s",
5010 opts->x_ix86_tune_string, prefix, suffix, sw);
5011
5012 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
5013
5014 #ifndef USE_IX86_FRAME_POINTER
5015 #define USE_IX86_FRAME_POINTER 0
5016 #endif
5017
5018 #ifndef USE_X86_64_FRAME_POINTER
5019 #define USE_X86_64_FRAME_POINTER 0
5020 #endif
5021
5022 /* Set the default values for switches whose default depends on TARGET_64BIT
5023 in case they weren't overwritten by command line options. */
5024 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
5025 {
5026 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
5027 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
5028 if (opts->x_flag_asynchronous_unwind_tables
5029 && !opts_set->x_flag_unwind_tables
5030 && TARGET_64BIT_MS_ABI)
5031 opts->x_flag_unwind_tables = 1;
5032 if (opts->x_flag_asynchronous_unwind_tables == 2)
5033 opts->x_flag_unwind_tables
5034 = opts->x_flag_asynchronous_unwind_tables = 1;
5035 if (opts->x_flag_pcc_struct_return == 2)
5036 opts->x_flag_pcc_struct_return = 0;
5037 }
5038 else
5039 {
5040 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
5041 opts->x_flag_omit_frame_pointer
5042 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
5043 if (opts->x_flag_asynchronous_unwind_tables == 2)
5044 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
5045 if (opts->x_flag_pcc_struct_return == 2)
5046 {
5047 /* Intel MCU psABI specifies that -freg-struct-return should
5048 be on. Instead of setting DEFAULT_PCC_STRUCT_RETURN to 1,
5049 we check -miamcu so that -freg-struct-return is always
5050 turned on if -miamcu is used. */
5051 if (TARGET_IAMCU_P (opts->x_target_flags))
5052 opts->x_flag_pcc_struct_return = 0;
5053 else
5054 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
5055 }
5056 }
5057
5058 ix86_tune_cost = processor_target_table[ix86_tune].cost;
5059 /* TODO: ix86_cost should be chosen at instruction or function granuality
5060 so for cold code we use size_cost even in !optimize_size compilation. */
5061 if (opts->x_optimize_size)
5062 ix86_cost = &ix86_size_cost;
5063 else
5064 ix86_cost = ix86_tune_cost;
5065
5066 /* Arrange to set up i386_stack_locals for all functions. */
5067 init_machine_status = ix86_init_machine_status;
5068
5069 /* Validate -mregparm= value. */
5070 if (opts_set->x_ix86_regparm)
5071 {
5072 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
5073 warning (0, "-mregparm is ignored in 64-bit mode");
5074 else if (TARGET_IAMCU_P (opts->x_target_flags))
5075 warning (0, "-mregparm is ignored for Intel MCU psABI");
5076 if (opts->x_ix86_regparm > REGPARM_MAX)
5077 {
5078 error ("-mregparm=%d is not between 0 and %d",
5079 opts->x_ix86_regparm, REGPARM_MAX);
5080 opts->x_ix86_regparm = 0;
5081 }
5082 }
5083 if (TARGET_IAMCU_P (opts->x_target_flags)
5084 || TARGET_64BIT_P (opts->x_ix86_isa_flags))
5085 opts->x_ix86_regparm = REGPARM_MAX;
5086
5087 /* Default align_* from the processor table. */
5088 ix86_default_align (opts);
5089
5090 /* Provide default for -mbranch-cost= value. */
5091 if (!opts_set->x_ix86_branch_cost)
5092 opts->x_ix86_branch_cost = ix86_tune_cost->branch_cost;
5093
5094 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
5095 {
5096 opts->x_target_flags
5097 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
5098
5099 /* Enable by default the SSE and MMX builtins. Do allow the user to
5100 explicitly disable any of these. In particular, disabling SSE and
5101 MMX for kernel code is extremely useful. */
5102 if (!ix86_arch_specified)
5103 opts->x_ix86_isa_flags
5104 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
5105 | TARGET_SUBTARGET64_ISA_DEFAULT)
5106 & ~opts->x_ix86_isa_flags_explicit);
5107
5108 if (TARGET_RTD_P (opts->x_target_flags))
5109 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
5110 }
5111 else
5112 {
5113 opts->x_target_flags
5114 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
5115
5116 if (!ix86_arch_specified)
5117 opts->x_ix86_isa_flags
5118 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
5119
5120 /* i386 ABI does not specify red zone. It still makes sense to use it
5121 when programmer takes care to stack from being destroyed. */
5122 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
5123 opts->x_target_flags |= MASK_NO_RED_ZONE;
5124 }
5125
5126 /* Keep nonleaf frame pointers. */
5127 if (opts->x_flag_omit_frame_pointer)
5128 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
5129 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
5130 opts->x_flag_omit_frame_pointer = 1;
5131
5132 /* If we're doing fast math, we don't care about comparison order
5133 wrt NaNs. This lets us use a shorter comparison sequence. */
5134 if (opts->x_flag_finite_math_only)
5135 opts->x_target_flags &= ~MASK_IEEE_FP;
5136
5137 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
5138 since the insns won't need emulation. */
5139 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
5140 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
5141
5142 /* Likewise, if the target doesn't have a 387, or we've specified
5143 software floating point, don't use 387 inline intrinsics. */
5144 if (!TARGET_80387_P (opts->x_target_flags))
5145 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
5146
5147 /* Turn on MMX builtins for -msse. */
5148 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
5149 opts->x_ix86_isa_flags
5150 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
5151
5152 /* Enable SSE prefetch. */
5153 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
5154 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
5155 x86_prefetch_sse = true;
5156
5157 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
5158 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
5159 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
5160 opts->x_ix86_isa_flags
5161 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
5162
5163 /* Enable popcnt instruction for -msse4.2 or -mabm. */
5164 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
5165 || TARGET_ABM_P (opts->x_ix86_isa_flags))
5166 opts->x_ix86_isa_flags
5167 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
5168
5169 /* Enable lzcnt instruction for -mabm. */
5170 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
5171 opts->x_ix86_isa_flags
5172 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
5173
5174 /* Validate -mpreferred-stack-boundary= value or default it to
5175 PREFERRED_STACK_BOUNDARY_DEFAULT. */
5176 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
5177 if (opts_set->x_ix86_preferred_stack_boundary_arg)
5178 {
5179 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
5180 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
5181 int max = (TARGET_SEH ? 4 : 12);
5182
5183 if (opts->x_ix86_preferred_stack_boundary_arg < min
5184 || opts->x_ix86_preferred_stack_boundary_arg > max)
5185 {
5186 if (min == max)
5187 error ("-mpreferred-stack-boundary is not supported "
5188 "for this target");
5189 else
5190 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
5191 opts->x_ix86_preferred_stack_boundary_arg, min, max);
5192 }
5193 else
5194 ix86_preferred_stack_boundary
5195 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
5196 }
5197
5198 /* Set the default value for -mstackrealign. */
5199 if (opts->x_ix86_force_align_arg_pointer == -1)
5200 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
5201
5202 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
5203
5204 /* Validate -mincoming-stack-boundary= value or default it to
5205 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
5206 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
5207 if (opts_set->x_ix86_incoming_stack_boundary_arg)
5208 {
5209 int min = TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 3 : 2;
5210
5211 if (opts->x_ix86_incoming_stack_boundary_arg < min
5212 || opts->x_ix86_incoming_stack_boundary_arg > 12)
5213 error ("-mincoming-stack-boundary=%d is not between %d and 12",
5214 opts->x_ix86_incoming_stack_boundary_arg, min);
5215 else
5216 {
5217 ix86_user_incoming_stack_boundary
5218 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
5219 ix86_incoming_stack_boundary
5220 = ix86_user_incoming_stack_boundary;
5221 }
5222 }
5223
5224 #ifndef NO_PROFILE_COUNTERS
5225 if (flag_nop_mcount)
5226 error ("-mnop-mcount is not compatible with this target");
5227 #endif
5228 if (flag_nop_mcount && flag_pic)
5229 error ("-mnop-mcount is not implemented for -fPIC");
5230
5231 /* Accept -msseregparm only if at least SSE support is enabled. */
5232 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
5233 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
5234 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
5235
5236 if (opts_set->x_ix86_fpmath)
5237 {
5238 if (opts->x_ix86_fpmath & FPMATH_SSE)
5239 {
5240 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
5241 {
5242 if (TARGET_80387_P (opts->x_target_flags))
5243 {
5244 warning (0, "SSE instruction set disabled, using 387 arithmetics");
5245 opts->x_ix86_fpmath = FPMATH_387;
5246 }
5247 }
5248 else if ((opts->x_ix86_fpmath & FPMATH_387)
5249 && !TARGET_80387_P (opts->x_target_flags))
5250 {
5251 warning (0, "387 instruction set disabled, using SSE arithmetics");
5252 opts->x_ix86_fpmath = FPMATH_SSE;
5253 }
5254 }
5255 }
5256 /* For all chips supporting SSE2, -mfpmath=sse performs better than
5257 fpmath=387. The second is however default at many targets since the
5258 extra 80bit precision of temporaries is considered to be part of ABI.
5259 Overwrite the default at least for -ffast-math.
5260 TODO: -mfpmath=both seems to produce same performing code with bit
5261 smaller binaries. It is however not clear if register allocation is
5262 ready for this setting.
5263 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
5264 codegen. We may switch to 387 with -ffast-math for size optimized
5265 functions. */
5266 else if (fast_math_flags_set_p (&global_options)
5267 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
5268 opts->x_ix86_fpmath = FPMATH_SSE;
5269 else
5270 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
5271
5272 /* Use external vectorized library in vectorizing intrinsics. */
5273 if (opts_set->x_ix86_veclibabi_type)
5274 switch (opts->x_ix86_veclibabi_type)
5275 {
5276 case ix86_veclibabi_type_svml:
5277 ix86_veclib_handler = ix86_veclibabi_svml;
5278 break;
5279
5280 case ix86_veclibabi_type_acml:
5281 ix86_veclib_handler = ix86_veclibabi_acml;
5282 break;
5283
5284 default:
5285 gcc_unreachable ();
5286 }
5287
5288 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
5289 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
5290 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
5291
5292 /* If stack probes are required, the space used for large function
5293 arguments on the stack must also be probed, so enable
5294 -maccumulate-outgoing-args so this happens in the prologue. */
5295 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
5296 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
5297 {
5298 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
5299 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
5300 "for correctness", prefix, suffix);
5301 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
5302 }
5303
5304 /* Stack realignment without -maccumulate-outgoing-args requires %ebp,
5305 so enable -maccumulate-outgoing-args when %ebp is fixed. */
5306 if (fixed_regs[BP_REG]
5307 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
5308 {
5309 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
5310 warning (0, "fixed ebp register requires %saccumulate-outgoing-args%s",
5311 prefix, suffix);
5312 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
5313 }
5314
5315 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
5316 {
5317 char *p;
5318 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
5319 p = strchr (internal_label_prefix, 'X');
5320 internal_label_prefix_len = p - internal_label_prefix;
5321 *p = '\0';
5322 }
5323
5324 /* When scheduling description is not available, disable scheduler pass
5325 so it won't slow down the compilation and make x87 code slower. */
5326 if (!TARGET_SCHEDULE)
5327 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
5328
5329 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
5330 ix86_tune_cost->simultaneous_prefetches,
5331 opts->x_param_values,
5332 opts_set->x_param_values);
5333 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
5334 ix86_tune_cost->prefetch_block,
5335 opts->x_param_values,
5336 opts_set->x_param_values);
5337 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
5338 ix86_tune_cost->l1_cache_size,
5339 opts->x_param_values,
5340 opts_set->x_param_values);
5341 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
5342 ix86_tune_cost->l2_cache_size,
5343 opts->x_param_values,
5344 opts_set->x_param_values);
5345
5346 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
5347 if (opts->x_flag_prefetch_loop_arrays < 0
5348 && HAVE_prefetch
5349 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
5350 && !opts->x_optimize_size
5351 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
5352 opts->x_flag_prefetch_loop_arrays = 1;
5353
5354 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
5355 can be opts->x_optimized to ap = __builtin_next_arg (0). */
5356 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
5357 targetm.expand_builtin_va_start = NULL;
5358
5359 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
5360 {
5361 ix86_gen_leave = gen_leave_rex64;
5362 if (Pmode == DImode)
5363 {
5364 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
5365 ix86_gen_tls_local_dynamic_base_64
5366 = gen_tls_local_dynamic_base_64_di;
5367 }
5368 else
5369 {
5370 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
5371 ix86_gen_tls_local_dynamic_base_64
5372 = gen_tls_local_dynamic_base_64_si;
5373 }
5374 }
5375 else
5376 ix86_gen_leave = gen_leave;
5377
5378 if (Pmode == DImode)
5379 {
5380 ix86_gen_add3 = gen_adddi3;
5381 ix86_gen_sub3 = gen_subdi3;
5382 ix86_gen_sub3_carry = gen_subdi3_carry;
5383 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
5384 ix86_gen_andsp = gen_anddi3;
5385 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
5386 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
5387 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
5388 ix86_gen_monitor = gen_sse3_monitor_di;
5389 ix86_gen_monitorx = gen_monitorx_di;
5390 ix86_gen_clzero = gen_clzero_di;
5391 }
5392 else
5393 {
5394 ix86_gen_add3 = gen_addsi3;
5395 ix86_gen_sub3 = gen_subsi3;
5396 ix86_gen_sub3_carry = gen_subsi3_carry;
5397 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
5398 ix86_gen_andsp = gen_andsi3;
5399 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
5400 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
5401 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
5402 ix86_gen_monitor = gen_sse3_monitor_si;
5403 ix86_gen_monitorx = gen_monitorx_si;
5404 ix86_gen_clzero = gen_clzero_si;
5405 }
5406
5407 #ifdef USE_IX86_CLD
5408 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
5409 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
5410 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
5411 #endif
5412
5413 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
5414 {
5415 if (opts->x_flag_fentry > 0)
5416 sorry ("-mfentry isn%'t supported for 32-bit in combination "
5417 "with -fpic");
5418 opts->x_flag_fentry = 0;
5419 }
5420 else if (TARGET_SEH)
5421 {
5422 if (opts->x_flag_fentry == 0)
5423 sorry ("-mno-fentry isn%'t compatible with SEH");
5424 opts->x_flag_fentry = 1;
5425 }
5426 else if (opts->x_flag_fentry < 0)
5427 {
5428 #if defined(PROFILE_BEFORE_PROLOGUE)
5429 opts->x_flag_fentry = 1;
5430 #else
5431 opts->x_flag_fentry = 0;
5432 #endif
5433 }
5434
5435 if (!(opts_set->x_target_flags & MASK_VZEROUPPER))
5436 opts->x_target_flags |= MASK_VZEROUPPER;
5437 if (!(opts_set->x_target_flags & MASK_STV))
5438 opts->x_target_flags |= MASK_STV;
5439 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
5440 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
5441 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
5442 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
5443 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
5444 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
5445 /* Enable 128-bit AVX instruction generation
5446 for the auto-vectorizer. */
5447 if (TARGET_AVX128_OPTIMAL
5448 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
5449 opts->x_target_flags |= MASK_PREFER_AVX128;
5450
5451 if (opts->x_ix86_recip_name)
5452 {
5453 char *p = ASTRDUP (opts->x_ix86_recip_name);
5454 char *q;
5455 unsigned int mask, i;
5456 bool invert;
5457
5458 while ((q = strtok (p, ",")) != NULL)
5459 {
5460 p = NULL;
5461 if (*q == '!')
5462 {
5463 invert = true;
5464 q++;
5465 }
5466 else
5467 invert = false;
5468
5469 if (!strcmp (q, "default"))
5470 mask = RECIP_MASK_ALL;
5471 else
5472 {
5473 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
5474 if (!strcmp (q, recip_options[i].string))
5475 {
5476 mask = recip_options[i].mask;
5477 break;
5478 }
5479
5480 if (i == ARRAY_SIZE (recip_options))
5481 {
5482 error ("unknown option for -mrecip=%s", q);
5483 invert = false;
5484 mask = RECIP_MASK_NONE;
5485 }
5486 }
5487
5488 opts->x_recip_mask_explicit |= mask;
5489 if (invert)
5490 opts->x_recip_mask &= ~mask;
5491 else
5492 opts->x_recip_mask |= mask;
5493 }
5494 }
5495
5496 if (TARGET_RECIP_P (opts->x_target_flags))
5497 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
5498 else if (opts_set->x_target_flags & MASK_RECIP)
5499 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
5500
5501 /* Default long double to 64-bit for 32-bit Bionic and to __float128
5502 for 64-bit Bionic. Also default long double to 64-bit for Intel
5503 MCU psABI. */
5504 if ((TARGET_HAS_BIONIC || TARGET_IAMCU)
5505 && !(opts_set->x_target_flags
5506 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
5507 opts->x_target_flags |= (TARGET_64BIT
5508 ? MASK_LONG_DOUBLE_128
5509 : MASK_LONG_DOUBLE_64);
5510
5511 /* Only one of them can be active. */
5512 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
5513 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
5514
5515 /* Save the initial options in case the user does function specific
5516 options. */
5517 if (main_args_p)
5518 target_option_default_node = target_option_current_node
5519 = build_target_option_node (opts);
5520
5521 /* Handle stack protector */
5522 if (!opts_set->x_ix86_stack_protector_guard)
5523 opts->x_ix86_stack_protector_guard
5524 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
5525
5526 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
5527 if (opts->x_ix86_tune_memcpy_strategy)
5528 {
5529 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
5530 ix86_parse_stringop_strategy_string (str, false);
5531 free (str);
5532 }
5533
5534 if (opts->x_ix86_tune_memset_strategy)
5535 {
5536 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
5537 ix86_parse_stringop_strategy_string (str, true);
5538 free (str);
5539 }
5540 }
5541
5542 /* Implement the TARGET_OPTION_OVERRIDE hook. */
5543
5544 static void
5545 ix86_option_override (void)
5546 {
5547 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
5548 struct register_pass_info insert_vzeroupper_info
5549 = { pass_insert_vzeroupper, "reload",
5550 1, PASS_POS_INSERT_AFTER
5551 };
5552 opt_pass *pass_stv = make_pass_stv (g);
5553 struct register_pass_info stv_info
5554 = { pass_stv, "combine",
5555 1, PASS_POS_INSERT_AFTER
5556 };
5557
5558 ix86_option_override_internal (true, &global_options, &global_options_set);
5559
5560
5561 /* This needs to be done at start up. It's convenient to do it here. */
5562 register_pass (&insert_vzeroupper_info);
5563 register_pass (&stv_info);
5564 }
5565
5566 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
5567 static char *
5568 ix86_offload_options (void)
5569 {
5570 if (TARGET_LP64)
5571 return xstrdup ("-foffload-abi=lp64");
5572 return xstrdup ("-foffload-abi=ilp32");
5573 }
5574
5575 /* Update register usage after having seen the compiler flags. */
5576
5577 static void
5578 ix86_conditional_register_usage (void)
5579 {
5580 int i, c_mask;
5581
5582 /* For 32-bit targets, squash the REX registers. */
5583 if (! TARGET_64BIT)
5584 {
5585 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
5586 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5587 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
5588 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5589 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
5590 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5591 }
5592
5593 /* See the definition of CALL_USED_REGISTERS in i386.h. */
5594 c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI);
5595
5596 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
5597
5598 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5599 {
5600 /* Set/reset conditionally defined registers from
5601 CALL_USED_REGISTERS initializer. */
5602 if (call_used_regs[i] > 1)
5603 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
5604
5605 /* Calculate registers of CLOBBERED_REGS register set
5606 as call used registers from GENERAL_REGS register set. */
5607 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
5608 && call_used_regs[i])
5609 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
5610 }
5611
5612 /* If MMX is disabled, squash the registers. */
5613 if (! TARGET_MMX)
5614 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5615 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
5616 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5617
5618 /* If SSE is disabled, squash the registers. */
5619 if (! TARGET_SSE)
5620 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5621 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
5622 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5623
5624 /* If the FPU is disabled, squash the registers. */
5625 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
5626 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5627 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
5628 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5629
5630 /* If AVX512F is disabled, squash the registers. */
5631 if (! TARGET_AVX512F)
5632 {
5633 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
5634 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5635
5636 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
5637 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5638 }
5639
5640 /* If MPX is disabled, squash the registers. */
5641 if (! TARGET_MPX)
5642 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
5643 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5644 }
5645
5646 \f
5647 /* Save the current options */
5648
5649 static void
5650 ix86_function_specific_save (struct cl_target_option *ptr,
5651 struct gcc_options *opts)
5652 {
5653 ptr->arch = ix86_arch;
5654 ptr->schedule = ix86_schedule;
5655 ptr->prefetch_sse = x86_prefetch_sse;
5656 ptr->tune = ix86_tune;
5657 ptr->branch_cost = ix86_branch_cost;
5658 ptr->tune_defaulted = ix86_tune_defaulted;
5659 ptr->arch_specified = ix86_arch_specified;
5660 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
5661 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
5662 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
5663 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
5664 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
5665 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
5666 ptr->x_ix86_abi = opts->x_ix86_abi;
5667 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
5668 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
5669 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
5670 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
5671 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
5672 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
5673 ptr->x_ix86_pmode = opts->x_ix86_pmode;
5674 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
5675 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
5676 ptr->x_ix86_regparm = opts->x_ix86_regparm;
5677 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
5678 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
5679 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
5680 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
5681 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
5682 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
5683 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
5684 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
5685 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
5686 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
5687
5688 /* The fields are char but the variables are not; make sure the
5689 values fit in the fields. */
5690 gcc_assert (ptr->arch == ix86_arch);
5691 gcc_assert (ptr->schedule == ix86_schedule);
5692 gcc_assert (ptr->tune == ix86_tune);
5693 gcc_assert (ptr->branch_cost == ix86_branch_cost);
5694 }
5695
5696 /* Restore the current options */
5697
5698 static void
5699 ix86_function_specific_restore (struct gcc_options *opts,
5700 struct cl_target_option *ptr)
5701 {
5702 enum processor_type old_tune = ix86_tune;
5703 enum processor_type old_arch = ix86_arch;
5704 unsigned int ix86_arch_mask;
5705 int i;
5706
5707 /* We don't change -fPIC. */
5708 opts->x_flag_pic = flag_pic;
5709
5710 ix86_arch = (enum processor_type) ptr->arch;
5711 ix86_schedule = (enum attr_cpu) ptr->schedule;
5712 ix86_tune = (enum processor_type) ptr->tune;
5713 x86_prefetch_sse = ptr->prefetch_sse;
5714 opts->x_ix86_branch_cost = ptr->branch_cost;
5715 ix86_tune_defaulted = ptr->tune_defaulted;
5716 ix86_arch_specified = ptr->arch_specified;
5717 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
5718 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
5719 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
5720 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
5721 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
5722 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
5723 opts->x_ix86_abi = ptr->x_ix86_abi;
5724 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
5725 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
5726 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
5727 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
5728 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
5729 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
5730 opts->x_ix86_pmode = ptr->x_ix86_pmode;
5731 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
5732 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
5733 opts->x_ix86_regparm = ptr->x_ix86_regparm;
5734 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
5735 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
5736 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
5737 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
5738 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
5739 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
5740 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
5741 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
5742 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
5743 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
5744 ix86_tune_cost = processor_target_table[ix86_tune].cost;
5745 /* TODO: ix86_cost should be chosen at instruction or function granuality
5746 so for cold code we use size_cost even in !optimize_size compilation. */
5747 if (opts->x_optimize_size)
5748 ix86_cost = &ix86_size_cost;
5749 else
5750 ix86_cost = ix86_tune_cost;
5751
5752 /* Recreate the arch feature tests if the arch changed */
5753 if (old_arch != ix86_arch)
5754 {
5755 ix86_arch_mask = 1u << ix86_arch;
5756 for (i = 0; i < X86_ARCH_LAST; ++i)
5757 ix86_arch_features[i]
5758 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
5759 }
5760
5761 /* Recreate the tune optimization tests */
5762 if (old_tune != ix86_tune)
5763 set_ix86_tune_features (ix86_tune, false);
5764 }
5765
5766 /* Adjust target options after streaming them in. This is mainly about
5767 reconciling them with global options. */
5768
5769 static void
5770 ix86_function_specific_post_stream_in (struct cl_target_option *ptr)
5771 {
5772 /* flag_pic is a global option, but ix86_cmodel is target saved option
5773 partly computed from flag_pic. If flag_pic is on, adjust x_ix86_cmodel
5774 for PIC, or error out. */
5775 if (flag_pic)
5776 switch (ptr->x_ix86_cmodel)
5777 {
5778 case CM_SMALL:
5779 ptr->x_ix86_cmodel = CM_SMALL_PIC;
5780 break;
5781
5782 case CM_MEDIUM:
5783 ptr->x_ix86_cmodel = CM_MEDIUM_PIC;
5784 break;
5785
5786 case CM_LARGE:
5787 ptr->x_ix86_cmodel = CM_LARGE_PIC;
5788 break;
5789
5790 case CM_KERNEL:
5791 error ("code model %s does not support PIC mode", "kernel");
5792 break;
5793
5794 default:
5795 break;
5796 }
5797 else
5798 switch (ptr->x_ix86_cmodel)
5799 {
5800 case CM_SMALL_PIC:
5801 ptr->x_ix86_cmodel = CM_SMALL;
5802 break;
5803
5804 case CM_MEDIUM_PIC:
5805 ptr->x_ix86_cmodel = CM_MEDIUM;
5806 break;
5807
5808 case CM_LARGE_PIC:
5809 ptr->x_ix86_cmodel = CM_LARGE;
5810 break;
5811
5812 default:
5813 break;
5814 }
5815 }
5816
5817 /* Print the current options */
5818
5819 static void
5820 ix86_function_specific_print (FILE *file, int indent,
5821 struct cl_target_option *ptr)
5822 {
5823 char *target_string
5824 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
5825 NULL, NULL, ptr->x_ix86_fpmath, false);
5826
5827 gcc_assert (ptr->arch < PROCESSOR_max);
5828 fprintf (file, "%*sarch = %d (%s)\n",
5829 indent, "",
5830 ptr->arch, processor_target_table[ptr->arch].name);
5831
5832 gcc_assert (ptr->tune < PROCESSOR_max);
5833 fprintf (file, "%*stune = %d (%s)\n",
5834 indent, "",
5835 ptr->tune, processor_target_table[ptr->tune].name);
5836
5837 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
5838
5839 if (target_string)
5840 {
5841 fprintf (file, "%*s%s\n", indent, "", target_string);
5842 free (target_string);
5843 }
5844 }
5845
5846 \f
5847 /* Inner function to process the attribute((target(...))), take an argument and
5848 set the current options from the argument. If we have a list, recursively go
5849 over the list. */
5850
5851 static bool
5852 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
5853 struct gcc_options *opts,
5854 struct gcc_options *opts_set,
5855 struct gcc_options *enum_opts_set)
5856 {
5857 char *next_optstr;
5858 bool ret = true;
5859
5860 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
5861 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
5862 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
5863 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
5864 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
5865
5866 enum ix86_opt_type
5867 {
5868 ix86_opt_unknown,
5869 ix86_opt_yes,
5870 ix86_opt_no,
5871 ix86_opt_str,
5872 ix86_opt_enum,
5873 ix86_opt_isa
5874 };
5875
5876 static const struct
5877 {
5878 const char *string;
5879 size_t len;
5880 enum ix86_opt_type type;
5881 int opt;
5882 int mask;
5883 } attrs[] = {
5884 /* isa options */
5885 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
5886 IX86_ATTR_ISA ("abm", OPT_mabm),
5887 IX86_ATTR_ISA ("bmi", OPT_mbmi),
5888 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
5889 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
5890 IX86_ATTR_ISA ("tbm", OPT_mtbm),
5891 IX86_ATTR_ISA ("aes", OPT_maes),
5892 IX86_ATTR_ISA ("sha", OPT_msha),
5893 IX86_ATTR_ISA ("avx", OPT_mavx),
5894 IX86_ATTR_ISA ("avx2", OPT_mavx2),
5895 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
5896 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
5897 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
5898 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
5899 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
5900 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
5901 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
5902 IX86_ATTR_ISA ("mmx", OPT_mmmx),
5903 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
5904 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
5905 IX86_ATTR_ISA ("sse", OPT_msse),
5906 IX86_ATTR_ISA ("sse2", OPT_msse2),
5907 IX86_ATTR_ISA ("sse3", OPT_msse3),
5908 IX86_ATTR_ISA ("sse4", OPT_msse4),
5909 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
5910 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
5911 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
5912 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
5913 IX86_ATTR_ISA ("fma4", OPT_mfma4),
5914 IX86_ATTR_ISA ("fma", OPT_mfma),
5915 IX86_ATTR_ISA ("xop", OPT_mxop),
5916 IX86_ATTR_ISA ("lwp", OPT_mlwp),
5917 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
5918 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
5919 IX86_ATTR_ISA ("f16c", OPT_mf16c),
5920 IX86_ATTR_ISA ("rtm", OPT_mrtm),
5921 IX86_ATTR_ISA ("hle", OPT_mhle),
5922 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
5923 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
5924 IX86_ATTR_ISA ("adx", OPT_madx),
5925 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
5926 IX86_ATTR_ISA ("xsave", OPT_mxsave),
5927 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
5928 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
5929 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
5930 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
5931 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
5932 IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi),
5933 IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma),
5934 IX86_ATTR_ISA ("clwb", OPT_mclwb),
5935 IX86_ATTR_ISA ("pcommit", OPT_mpcommit),
5936 IX86_ATTR_ISA ("mwaitx", OPT_mmwaitx),
5937 IX86_ATTR_ISA ("clzero", OPT_mclzero),
5938 IX86_ATTR_ISA ("pku", OPT_mpku),
5939
5940 /* enum options */
5941 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
5942
5943 /* string options */
5944 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
5945 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
5946
5947 /* flag options */
5948 IX86_ATTR_YES ("cld",
5949 OPT_mcld,
5950 MASK_CLD),
5951
5952 IX86_ATTR_NO ("fancy-math-387",
5953 OPT_mfancy_math_387,
5954 MASK_NO_FANCY_MATH_387),
5955
5956 IX86_ATTR_YES ("ieee-fp",
5957 OPT_mieee_fp,
5958 MASK_IEEE_FP),
5959
5960 IX86_ATTR_YES ("inline-all-stringops",
5961 OPT_minline_all_stringops,
5962 MASK_INLINE_ALL_STRINGOPS),
5963
5964 IX86_ATTR_YES ("inline-stringops-dynamically",
5965 OPT_minline_stringops_dynamically,
5966 MASK_INLINE_STRINGOPS_DYNAMICALLY),
5967
5968 IX86_ATTR_NO ("align-stringops",
5969 OPT_mno_align_stringops,
5970 MASK_NO_ALIGN_STRINGOPS),
5971
5972 IX86_ATTR_YES ("recip",
5973 OPT_mrecip,
5974 MASK_RECIP),
5975
5976 };
5977
5978 /* If this is a list, recurse to get the options. */
5979 if (TREE_CODE (args) == TREE_LIST)
5980 {
5981 bool ret = true;
5982
5983 for (; args; args = TREE_CHAIN (args))
5984 if (TREE_VALUE (args)
5985 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
5986 p_strings, opts, opts_set,
5987 enum_opts_set))
5988 ret = false;
5989
5990 return ret;
5991 }
5992
5993 else if (TREE_CODE (args) != STRING_CST)
5994 {
5995 error ("attribute %<target%> argument not a string");
5996 return false;
5997 }
5998
5999 /* Handle multiple arguments separated by commas. */
6000 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
6001
6002 while (next_optstr && *next_optstr != '\0')
6003 {
6004 char *p = next_optstr;
6005 char *orig_p = p;
6006 char *comma = strchr (next_optstr, ',');
6007 const char *opt_string;
6008 size_t len, opt_len;
6009 int opt;
6010 bool opt_set_p;
6011 char ch;
6012 unsigned i;
6013 enum ix86_opt_type type = ix86_opt_unknown;
6014 int mask = 0;
6015
6016 if (comma)
6017 {
6018 *comma = '\0';
6019 len = comma - next_optstr;
6020 next_optstr = comma + 1;
6021 }
6022 else
6023 {
6024 len = strlen (p);
6025 next_optstr = NULL;
6026 }
6027
6028 /* Recognize no-xxx. */
6029 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
6030 {
6031 opt_set_p = false;
6032 p += 3;
6033 len -= 3;
6034 }
6035 else
6036 opt_set_p = true;
6037
6038 /* Find the option. */
6039 ch = *p;
6040 opt = N_OPTS;
6041 for (i = 0; i < ARRAY_SIZE (attrs); i++)
6042 {
6043 type = attrs[i].type;
6044 opt_len = attrs[i].len;
6045 if (ch == attrs[i].string[0]
6046 && ((type != ix86_opt_str && type != ix86_opt_enum)
6047 ? len == opt_len
6048 : len > opt_len)
6049 && memcmp (p, attrs[i].string, opt_len) == 0)
6050 {
6051 opt = attrs[i].opt;
6052 mask = attrs[i].mask;
6053 opt_string = attrs[i].string;
6054 break;
6055 }
6056 }
6057
6058 /* Process the option. */
6059 if (opt == N_OPTS)
6060 {
6061 error ("attribute(target(\"%s\")) is unknown", orig_p);
6062 ret = false;
6063 }
6064
6065 else if (type == ix86_opt_isa)
6066 {
6067 struct cl_decoded_option decoded;
6068
6069 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
6070 ix86_handle_option (opts, opts_set,
6071 &decoded, input_location);
6072 }
6073
6074 else if (type == ix86_opt_yes || type == ix86_opt_no)
6075 {
6076 if (type == ix86_opt_no)
6077 opt_set_p = !opt_set_p;
6078
6079 if (opt_set_p)
6080 opts->x_target_flags |= mask;
6081 else
6082 opts->x_target_flags &= ~mask;
6083 }
6084
6085 else if (type == ix86_opt_str)
6086 {
6087 if (p_strings[opt])
6088 {
6089 error ("option(\"%s\") was already specified", opt_string);
6090 ret = false;
6091 }
6092 else
6093 p_strings[opt] = xstrdup (p + opt_len);
6094 }
6095
6096 else if (type == ix86_opt_enum)
6097 {
6098 bool arg_ok;
6099 int value;
6100
6101 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
6102 if (arg_ok)
6103 set_option (opts, enum_opts_set, opt, value,
6104 p + opt_len, DK_UNSPECIFIED, input_location,
6105 global_dc);
6106 else
6107 {
6108 error ("attribute(target(\"%s\")) is unknown", orig_p);
6109 ret = false;
6110 }
6111 }
6112
6113 else
6114 gcc_unreachable ();
6115 }
6116
6117 return ret;
6118 }
6119
6120 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
6121
6122 tree
6123 ix86_valid_target_attribute_tree (tree args,
6124 struct gcc_options *opts,
6125 struct gcc_options *opts_set)
6126 {
6127 const char *orig_arch_string = opts->x_ix86_arch_string;
6128 const char *orig_tune_string = opts->x_ix86_tune_string;
6129 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
6130 int orig_tune_defaulted = ix86_tune_defaulted;
6131 int orig_arch_specified = ix86_arch_specified;
6132 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
6133 tree t = NULL_TREE;
6134 int i;
6135 struct cl_target_option *def
6136 = TREE_TARGET_OPTION (target_option_default_node);
6137 struct gcc_options enum_opts_set;
6138
6139 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
6140
6141 /* Process each of the options on the chain. */
6142 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
6143 opts_set, &enum_opts_set))
6144 return error_mark_node;
6145
6146 /* If the changed options are different from the default, rerun
6147 ix86_option_override_internal, and then save the options away.
6148 The string options are attribute options, and will be undone
6149 when we copy the save structure. */
6150 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
6151 || opts->x_target_flags != def->x_target_flags
6152 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
6153 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
6154 || enum_opts_set.x_ix86_fpmath)
6155 {
6156 /* If we are using the default tune= or arch=, undo the string assigned,
6157 and use the default. */
6158 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
6159 {
6160 opts->x_ix86_arch_string
6161 = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_ARCH]);
6162
6163 /* If arch= is set, clear all bits in x_ix86_isa_flags,
6164 except for ISA_64BIT, ABI_64, ABI_X32, and CODE16. */
6165 opts->x_ix86_isa_flags &= (OPTION_MASK_ISA_64BIT
6166 | OPTION_MASK_ABI_64
6167 | OPTION_MASK_ABI_X32
6168 | OPTION_MASK_CODE16);
6169
6170 }
6171 else if (!orig_arch_specified)
6172 opts->x_ix86_arch_string = NULL;
6173
6174 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
6175 opts->x_ix86_tune_string
6176 = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]);
6177 else if (orig_tune_defaulted)
6178 opts->x_ix86_tune_string = NULL;
6179
6180 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
6181 if (enum_opts_set.x_ix86_fpmath)
6182 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
6183 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
6184 && TARGET_SSE_P (opts->x_ix86_isa_flags))
6185 {
6186 if (TARGET_80387_P (opts->x_target_flags))
6187 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE
6188 | FPMATH_387);
6189 else
6190 opts->x_ix86_fpmath = (enum fpmath_unit) FPMATH_SSE;
6191 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
6192 }
6193
6194 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
6195 ix86_option_override_internal (false, opts, opts_set);
6196
6197 /* Add any builtin functions with the new isa if any. */
6198 ix86_add_new_builtins (opts->x_ix86_isa_flags);
6199
6200 /* Save the current options unless we are validating options for
6201 #pragma. */
6202 t = build_target_option_node (opts);
6203
6204 opts->x_ix86_arch_string = orig_arch_string;
6205 opts->x_ix86_tune_string = orig_tune_string;
6206 opts_set->x_ix86_fpmath = orig_fpmath_set;
6207
6208 /* Free up memory allocated to hold the strings */
6209 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
6210 free (option_strings[i]);
6211 }
6212
6213 return t;
6214 }
6215
6216 /* Hook to validate attribute((target("string"))). */
6217
6218 static bool
6219 ix86_valid_target_attribute_p (tree fndecl,
6220 tree ARG_UNUSED (name),
6221 tree args,
6222 int ARG_UNUSED (flags))
6223 {
6224 struct gcc_options func_options;
6225 tree new_target, new_optimize;
6226 bool ret = true;
6227
6228 /* attribute((target("default"))) does nothing, beyond
6229 affecting multi-versioning. */
6230 if (TREE_VALUE (args)
6231 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
6232 && TREE_CHAIN (args) == NULL_TREE
6233 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
6234 return true;
6235
6236 tree old_optimize = build_optimization_node (&global_options);
6237
6238 /* Get the optimization options of the current function. */
6239 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
6240
6241 if (!func_optimize)
6242 func_optimize = old_optimize;
6243
6244 /* Init func_options. */
6245 memset (&func_options, 0, sizeof (func_options));
6246 init_options_struct (&func_options, NULL);
6247 lang_hooks.init_options_struct (&func_options);
6248
6249 cl_optimization_restore (&func_options,
6250 TREE_OPTIMIZATION (func_optimize));
6251
6252 /* Initialize func_options to the default before its target options can
6253 be set. */
6254 cl_target_option_restore (&func_options,
6255 TREE_TARGET_OPTION (target_option_default_node));
6256
6257 new_target = ix86_valid_target_attribute_tree (args, &func_options,
6258 &global_options_set);
6259
6260 new_optimize = build_optimization_node (&func_options);
6261
6262 if (new_target == error_mark_node)
6263 ret = false;
6264
6265 else if (fndecl && new_target)
6266 {
6267 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
6268
6269 if (old_optimize != new_optimize)
6270 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
6271 }
6272
6273 finalize_options_struct (&func_options);
6274
6275 return ret;
6276 }
6277
6278 \f
6279 /* Hook to determine if one function can safely inline another. */
6280
6281 static bool
6282 ix86_can_inline_p (tree caller, tree callee)
6283 {
6284 bool ret = false;
6285 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
6286 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
6287
6288 /* If callee has no option attributes, then it is ok to inline. */
6289 if (!callee_tree)
6290 ret = true;
6291
6292 /* If caller has no option attributes, but callee does then it is not ok to
6293 inline. */
6294 else if (!caller_tree)
6295 ret = false;
6296
6297 else
6298 {
6299 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
6300 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
6301
6302 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
6303 can inline a SSE2 function but a SSE2 function can't inline a SSE4
6304 function. */
6305 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
6306 != callee_opts->x_ix86_isa_flags)
6307 ret = false;
6308
6309 /* See if we have the same non-isa options. */
6310 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
6311 ret = false;
6312
6313 /* See if arch, tune, etc. are the same. */
6314 else if (caller_opts->arch != callee_opts->arch)
6315 ret = false;
6316
6317 else if (caller_opts->tune != callee_opts->tune)
6318 ret = false;
6319
6320 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
6321 ret = false;
6322
6323 else if (caller_opts->branch_cost != callee_opts->branch_cost)
6324 ret = false;
6325
6326 else
6327 ret = true;
6328 }
6329
6330 return ret;
6331 }
6332
6333 \f
6334 /* Remember the last target of ix86_set_current_function. */
6335 static GTY(()) tree ix86_previous_fndecl;
6336
6337 /* Set targets globals to the default (or current #pragma GCC target
6338 if active). Invalidate ix86_previous_fndecl cache. */
6339
6340 void
6341 ix86_reset_previous_fndecl (void)
6342 {
6343 tree new_tree = target_option_current_node;
6344 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
6345 if (TREE_TARGET_GLOBALS (new_tree))
6346 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
6347 else if (new_tree == target_option_default_node)
6348 restore_target_globals (&default_target_globals);
6349 else
6350 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
6351 ix86_previous_fndecl = NULL_TREE;
6352 }
6353
6354 /* Establish appropriate back-end context for processing the function
6355 FNDECL. The argument might be NULL to indicate processing at top
6356 level, outside of any function scope. */
6357 static void
6358 ix86_set_current_function (tree fndecl)
6359 {
6360 /* Only change the context if the function changes. This hook is called
6361 several times in the course of compiling a function, and we don't want to
6362 slow things down too much or call target_reinit when it isn't safe. */
6363 if (fndecl == ix86_previous_fndecl)
6364 return;
6365
6366 tree old_tree;
6367 if (ix86_previous_fndecl == NULL_TREE)
6368 old_tree = target_option_current_node;
6369 else if (DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl))
6370 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl);
6371 else
6372 old_tree = target_option_default_node;
6373
6374 if (fndecl == NULL_TREE)
6375 {
6376 if (old_tree != target_option_current_node)
6377 ix86_reset_previous_fndecl ();
6378 return;
6379 }
6380
6381 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
6382 if (new_tree == NULL_TREE)
6383 new_tree = target_option_default_node;
6384
6385 if (old_tree != new_tree)
6386 {
6387 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
6388 if (TREE_TARGET_GLOBALS (new_tree))
6389 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
6390 else if (new_tree == target_option_default_node)
6391 restore_target_globals (&default_target_globals);
6392 else
6393 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
6394 }
6395 ix86_previous_fndecl = fndecl;
6396
6397 /* 64-bit MS and SYSV ABI have different set of call used registers.
6398 Avoid expensive re-initialization of init_regs each time we switch
6399 function context. */
6400 if (TARGET_64BIT
6401 && (call_used_regs[SI_REG]
6402 == (cfun->machine->call_abi == MS_ABI)))
6403 reinit_regs ();
6404 }
6405
6406 \f
6407 /* Return true if this goes in large data/bss. */
6408
6409 static bool
6410 ix86_in_large_data_p (tree exp)
6411 {
6412 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
6413 return false;
6414
6415 /* Functions are never large data. */
6416 if (TREE_CODE (exp) == FUNCTION_DECL)
6417 return false;
6418
6419 /* Automatic variables are never large data. */
6420 if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp))
6421 return false;
6422
6423 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
6424 {
6425 const char *section = DECL_SECTION_NAME (exp);
6426 if (strcmp (section, ".ldata") == 0
6427 || strcmp (section, ".lbss") == 0)
6428 return true;
6429 return false;
6430 }
6431 else
6432 {
6433 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
6434
6435 /* If this is an incomplete type with size 0, then we can't put it
6436 in data because it might be too big when completed. Also,
6437 int_size_in_bytes returns -1 if size can vary or is larger than
6438 an integer in which case also it is safer to assume that it goes in
6439 large data. */
6440 if (size <= 0 || size > ix86_section_threshold)
6441 return true;
6442 }
6443
6444 return false;
6445 }
6446
6447 /* Switch to the appropriate section for output of DECL.
6448 DECL is either a `VAR_DECL' node or a constant of some sort.
6449 RELOC indicates whether forming the initial value of DECL requires
6450 link-time relocations. */
6451
6452 ATTRIBUTE_UNUSED static section *
6453 x86_64_elf_select_section (tree decl, int reloc,
6454 unsigned HOST_WIDE_INT align)
6455 {
6456 if (ix86_in_large_data_p (decl))
6457 {
6458 const char *sname = NULL;
6459 unsigned int flags = SECTION_WRITE;
6460 switch (categorize_decl_for_section (decl, reloc))
6461 {
6462 case SECCAT_DATA:
6463 sname = ".ldata";
6464 break;
6465 case SECCAT_DATA_REL:
6466 sname = ".ldata.rel";
6467 break;
6468 case SECCAT_DATA_REL_LOCAL:
6469 sname = ".ldata.rel.local";
6470 break;
6471 case SECCAT_DATA_REL_RO:
6472 sname = ".ldata.rel.ro";
6473 break;
6474 case SECCAT_DATA_REL_RO_LOCAL:
6475 sname = ".ldata.rel.ro.local";
6476 break;
6477 case SECCAT_BSS:
6478 sname = ".lbss";
6479 flags |= SECTION_BSS;
6480 break;
6481 case SECCAT_RODATA:
6482 case SECCAT_RODATA_MERGE_STR:
6483 case SECCAT_RODATA_MERGE_STR_INIT:
6484 case SECCAT_RODATA_MERGE_CONST:
6485 sname = ".lrodata";
6486 flags = 0;
6487 break;
6488 case SECCAT_SRODATA:
6489 case SECCAT_SDATA:
6490 case SECCAT_SBSS:
6491 gcc_unreachable ();
6492 case SECCAT_TEXT:
6493 case SECCAT_TDATA:
6494 case SECCAT_TBSS:
6495 /* We don't split these for medium model. Place them into
6496 default sections and hope for best. */
6497 break;
6498 }
6499 if (sname)
6500 {
6501 /* We might get called with string constants, but get_named_section
6502 doesn't like them as they are not DECLs. Also, we need to set
6503 flags in that case. */
6504 if (!DECL_P (decl))
6505 return get_section (sname, flags, NULL);
6506 return get_named_section (decl, sname, reloc);
6507 }
6508 }
6509 return default_elf_select_section (decl, reloc, align);
6510 }
6511
6512 /* Select a set of attributes for section NAME based on the properties
6513 of DECL and whether or not RELOC indicates that DECL's initializer
6514 might contain runtime relocations. */
6515
6516 static unsigned int ATTRIBUTE_UNUSED
6517 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
6518 {
6519 unsigned int flags = default_section_type_flags (decl, name, reloc);
6520
6521 if (decl == NULL_TREE
6522 && (strcmp (name, ".ldata.rel.ro") == 0
6523 || strcmp (name, ".ldata.rel.ro.local") == 0))
6524 flags |= SECTION_RELRO;
6525
6526 if (strcmp (name, ".lbss") == 0
6527 || strncmp (name, ".lbss.", 5) == 0
6528 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
6529 flags |= SECTION_BSS;
6530
6531 return flags;
6532 }
6533
6534 /* Build up a unique section name, expressed as a
6535 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
6536 RELOC indicates whether the initial value of EXP requires
6537 link-time relocations. */
6538
6539 static void ATTRIBUTE_UNUSED
6540 x86_64_elf_unique_section (tree decl, int reloc)
6541 {
6542 if (ix86_in_large_data_p (decl))
6543 {
6544 const char *prefix = NULL;
6545 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
6546 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
6547
6548 switch (categorize_decl_for_section (decl, reloc))
6549 {
6550 case SECCAT_DATA:
6551 case SECCAT_DATA_REL:
6552 case SECCAT_DATA_REL_LOCAL:
6553 case SECCAT_DATA_REL_RO:
6554 case SECCAT_DATA_REL_RO_LOCAL:
6555 prefix = one_only ? ".ld" : ".ldata";
6556 break;
6557 case SECCAT_BSS:
6558 prefix = one_only ? ".lb" : ".lbss";
6559 break;
6560 case SECCAT_RODATA:
6561 case SECCAT_RODATA_MERGE_STR:
6562 case SECCAT_RODATA_MERGE_STR_INIT:
6563 case SECCAT_RODATA_MERGE_CONST:
6564 prefix = one_only ? ".lr" : ".lrodata";
6565 break;
6566 case SECCAT_SRODATA:
6567 case SECCAT_SDATA:
6568 case SECCAT_SBSS:
6569 gcc_unreachable ();
6570 case SECCAT_TEXT:
6571 case SECCAT_TDATA:
6572 case SECCAT_TBSS:
6573 /* We don't split these for medium model. Place them into
6574 default sections and hope for best. */
6575 break;
6576 }
6577 if (prefix)
6578 {
6579 const char *name, *linkonce;
6580 char *string;
6581
6582 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
6583 name = targetm.strip_name_encoding (name);
6584
6585 /* If we're using one_only, then there needs to be a .gnu.linkonce
6586 prefix to the section name. */
6587 linkonce = one_only ? ".gnu.linkonce" : "";
6588
6589 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
6590
6591 set_decl_section_name (decl, string);
6592 return;
6593 }
6594 }
6595 default_unique_section (decl, reloc);
6596 }
6597
6598 #ifdef COMMON_ASM_OP
6599 /* This says how to output assembler code to declare an
6600 uninitialized external linkage data object.
6601
6602 For medium model x86-64 we need to use .largecomm opcode for
6603 large objects. */
6604 void
6605 x86_elf_aligned_common (FILE *file,
6606 const char *name, unsigned HOST_WIDE_INT size,
6607 int align)
6608 {
6609 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
6610 && size > (unsigned int)ix86_section_threshold)
6611 fputs ("\t.largecomm\t", file);
6612 else
6613 fputs (COMMON_ASM_OP, file);
6614 assemble_name (file, name);
6615 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
6616 size, align / BITS_PER_UNIT);
6617 }
6618 #endif
6619
6620 /* Utility function for targets to use in implementing
6621 ASM_OUTPUT_ALIGNED_BSS. */
6622
6623 void
6624 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
6625 unsigned HOST_WIDE_INT size, int align)
6626 {
6627 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
6628 && size > (unsigned int)ix86_section_threshold)
6629 switch_to_section (get_named_section (decl, ".lbss", 0));
6630 else
6631 switch_to_section (bss_section);
6632 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
6633 #ifdef ASM_DECLARE_OBJECT_NAME
6634 last_assemble_variable_decl = decl;
6635 ASM_DECLARE_OBJECT_NAME (file, name, decl);
6636 #else
6637 /* Standard thing is just output label for the object. */
6638 ASM_OUTPUT_LABEL (file, name);
6639 #endif /* ASM_DECLARE_OBJECT_NAME */
6640 ASM_OUTPUT_SKIP (file, size ? size : 1);
6641 }
6642 \f
6643 /* Decide whether we must probe the stack before any space allocation
6644 on this target. It's essentially TARGET_STACK_PROBE except when
6645 -fstack-check causes the stack to be already probed differently. */
6646
6647 bool
6648 ix86_target_stack_probe (void)
6649 {
6650 /* Do not probe the stack twice if static stack checking is enabled. */
6651 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
6652 return false;
6653
6654 return TARGET_STACK_PROBE;
6655 }
6656 \f
6657 /* Decide whether we can make a sibling call to a function. DECL is the
6658 declaration of the function being targeted by the call and EXP is the
6659 CALL_EXPR representing the call. */
6660
6661 static bool
6662 ix86_function_ok_for_sibcall (tree decl, tree exp)
6663 {
6664 tree type, decl_or_type;
6665 rtx a, b;
6666 bool bind_global = decl && !targetm.binds_local_p (decl);
6667
6668 /* If we are generating position-independent code, we cannot sibcall
6669 optimize direct calls to global functions, as the PLT requires
6670 %ebx be live. (Darwin does not have a PLT.) */
6671 if (!TARGET_MACHO
6672 && !TARGET_64BIT
6673 && flag_pic
6674 && flag_plt
6675 && bind_global)
6676 return false;
6677
6678 /* If we need to align the outgoing stack, then sibcalling would
6679 unalign the stack, which may break the called function. */
6680 if (ix86_minimum_incoming_stack_boundary (true)
6681 < PREFERRED_STACK_BOUNDARY)
6682 return false;
6683
6684 if (decl)
6685 {
6686 decl_or_type = decl;
6687 type = TREE_TYPE (decl);
6688 }
6689 else
6690 {
6691 /* We're looking at the CALL_EXPR, we need the type of the function. */
6692 type = CALL_EXPR_FN (exp); /* pointer expression */
6693 type = TREE_TYPE (type); /* pointer type */
6694 type = TREE_TYPE (type); /* function type */
6695 decl_or_type = type;
6696 }
6697
6698 /* Check that the return value locations are the same. Like
6699 if we are returning floats on the 80387 register stack, we cannot
6700 make a sibcall from a function that doesn't return a float to a
6701 function that does or, conversely, from a function that does return
6702 a float to a function that doesn't; the necessary stack adjustment
6703 would not be executed. This is also the place we notice
6704 differences in the return value ABI. Note that it is ok for one
6705 of the functions to have void return type as long as the return
6706 value of the other is passed in a register. */
6707 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
6708 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6709 cfun->decl, false);
6710 if (STACK_REG_P (a) || STACK_REG_P (b))
6711 {
6712 if (!rtx_equal_p (a, b))
6713 return false;
6714 }
6715 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6716 ;
6717 else if (!rtx_equal_p (a, b))
6718 return false;
6719
6720 if (TARGET_64BIT)
6721 {
6722 /* The SYSV ABI has more call-clobbered registers;
6723 disallow sibcalls from MS to SYSV. */
6724 if (cfun->machine->call_abi == MS_ABI
6725 && ix86_function_type_abi (type) == SYSV_ABI)
6726 return false;
6727 }
6728 else
6729 {
6730 /* If this call is indirect, we'll need to be able to use a
6731 call-clobbered register for the address of the target function.
6732 Make sure that all such registers are not used for passing
6733 parameters. Note that DLLIMPORT functions and call to global
6734 function via GOT slot are indirect. */
6735 if (!decl
6736 || (bind_global && flag_pic && !flag_plt)
6737 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
6738 {
6739 /* Check if regparm >= 3 since arg_reg_available is set to
6740 false if regparm == 0. If regparm is 1 or 2, there is
6741 always a call-clobbered register available.
6742
6743 ??? The symbol indirect call doesn't need a call-clobbered
6744 register. But we don't know if this is a symbol indirect
6745 call or not here. */
6746 if (ix86_function_regparm (type, NULL) >= 3
6747 && !cfun->machine->arg_reg_available)
6748 return false;
6749 }
6750 }
6751
6752 /* Otherwise okay. That also includes certain types of indirect calls. */
6753 return true;
6754 }
6755
6756 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
6757 and "sseregparm" calling convention attributes;
6758 arguments as in struct attribute_spec.handler. */
6759
6760 static tree
6761 ix86_handle_cconv_attribute (tree *node, tree name,
6762 tree args,
6763 int,
6764 bool *no_add_attrs)
6765 {
6766 if (TREE_CODE (*node) != FUNCTION_TYPE
6767 && TREE_CODE (*node) != METHOD_TYPE
6768 && TREE_CODE (*node) != FIELD_DECL
6769 && TREE_CODE (*node) != TYPE_DECL)
6770 {
6771 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6772 name);
6773 *no_add_attrs = true;
6774 return NULL_TREE;
6775 }
6776
6777 /* Can combine regparm with all attributes but fastcall, and thiscall. */
6778 if (is_attribute_p ("regparm", name))
6779 {
6780 tree cst;
6781
6782 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6783 {
6784 error ("fastcall and regparm attributes are not compatible");
6785 }
6786
6787 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6788 {
6789 error ("regparam and thiscall attributes are not compatible");
6790 }
6791
6792 cst = TREE_VALUE (args);
6793 if (TREE_CODE (cst) != INTEGER_CST)
6794 {
6795 warning (OPT_Wattributes,
6796 "%qE attribute requires an integer constant argument",
6797 name);
6798 *no_add_attrs = true;
6799 }
6800 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
6801 {
6802 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
6803 name, REGPARM_MAX);
6804 *no_add_attrs = true;
6805 }
6806
6807 return NULL_TREE;
6808 }
6809
6810 if (TARGET_64BIT)
6811 {
6812 /* Do not warn when emulating the MS ABI. */
6813 if ((TREE_CODE (*node) != FUNCTION_TYPE
6814 && TREE_CODE (*node) != METHOD_TYPE)
6815 || ix86_function_type_abi (*node) != MS_ABI)
6816 warning (OPT_Wattributes, "%qE attribute ignored",
6817 name);
6818 *no_add_attrs = true;
6819 return NULL_TREE;
6820 }
6821
6822 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
6823 if (is_attribute_p ("fastcall", name))
6824 {
6825 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
6826 {
6827 error ("fastcall and cdecl attributes are not compatible");
6828 }
6829 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
6830 {
6831 error ("fastcall and stdcall attributes are not compatible");
6832 }
6833 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
6834 {
6835 error ("fastcall and regparm attributes are not compatible");
6836 }
6837 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6838 {
6839 error ("fastcall and thiscall attributes are not compatible");
6840 }
6841 }
6842
6843 /* Can combine stdcall with fastcall (redundant), regparm and
6844 sseregparm. */
6845 else if (is_attribute_p ("stdcall", name))
6846 {
6847 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
6848 {
6849 error ("stdcall and cdecl attributes are not compatible");
6850 }
6851 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6852 {
6853 error ("stdcall and fastcall attributes are not compatible");
6854 }
6855 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6856 {
6857 error ("stdcall and thiscall attributes are not compatible");
6858 }
6859 }
6860
6861 /* Can combine cdecl with regparm and sseregparm. */
6862 else if (is_attribute_p ("cdecl", name))
6863 {
6864 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
6865 {
6866 error ("stdcall and cdecl attributes are not compatible");
6867 }
6868 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6869 {
6870 error ("fastcall and cdecl attributes are not compatible");
6871 }
6872 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6873 {
6874 error ("cdecl and thiscall attributes are not compatible");
6875 }
6876 }
6877 else if (is_attribute_p ("thiscall", name))
6878 {
6879 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
6880 warning (OPT_Wattributes, "%qE attribute is used for non-class method",
6881 name);
6882 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
6883 {
6884 error ("stdcall and thiscall attributes are not compatible");
6885 }
6886 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6887 {
6888 error ("fastcall and thiscall attributes are not compatible");
6889 }
6890 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
6891 {
6892 error ("cdecl and thiscall attributes are not compatible");
6893 }
6894 }
6895
6896 /* Can combine sseregparm with all attributes. */
6897
6898 return NULL_TREE;
6899 }
6900
6901 /* The transactional memory builtins are implicitly regparm or fastcall
6902 depending on the ABI. Override the generic do-nothing attribute that
6903 these builtins were declared with, and replace it with one of the two
6904 attributes that we expect elsewhere. */
6905
6906 static tree
6907 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
6908 int flags, bool *no_add_attrs)
6909 {
6910 tree alt;
6911
6912 /* In no case do we want to add the placeholder attribute. */
6913 *no_add_attrs = true;
6914
6915 /* The 64-bit ABI is unchanged for transactional memory. */
6916 if (TARGET_64BIT)
6917 return NULL_TREE;
6918
6919 /* ??? Is there a better way to validate 32-bit windows? We have
6920 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
6921 if (CHECK_STACK_LIMIT > 0)
6922 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
6923 else
6924 {
6925 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
6926 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
6927 }
6928 decl_attributes (node, alt, flags);
6929
6930 return NULL_TREE;
6931 }
6932
6933 /* This function determines from TYPE the calling-convention. */
6934
6935 unsigned int
6936 ix86_get_callcvt (const_tree type)
6937 {
6938 unsigned int ret = 0;
6939 bool is_stdarg;
6940 tree attrs;
6941
6942 if (TARGET_64BIT)
6943 return IX86_CALLCVT_CDECL;
6944
6945 attrs = TYPE_ATTRIBUTES (type);
6946 if (attrs != NULL_TREE)
6947 {
6948 if (lookup_attribute ("cdecl", attrs))
6949 ret |= IX86_CALLCVT_CDECL;
6950 else if (lookup_attribute ("stdcall", attrs))
6951 ret |= IX86_CALLCVT_STDCALL;
6952 else if (lookup_attribute ("fastcall", attrs))
6953 ret |= IX86_CALLCVT_FASTCALL;
6954 else if (lookup_attribute ("thiscall", attrs))
6955 ret |= IX86_CALLCVT_THISCALL;
6956
6957 /* Regparam isn't allowed for thiscall and fastcall. */
6958 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
6959 {
6960 if (lookup_attribute ("regparm", attrs))
6961 ret |= IX86_CALLCVT_REGPARM;
6962 if (lookup_attribute ("sseregparm", attrs))
6963 ret |= IX86_CALLCVT_SSEREGPARM;
6964 }
6965
6966 if (IX86_BASE_CALLCVT(ret) != 0)
6967 return ret;
6968 }
6969
6970 is_stdarg = stdarg_p (type);
6971 if (TARGET_RTD && !is_stdarg)
6972 return IX86_CALLCVT_STDCALL | ret;
6973
6974 if (ret != 0
6975 || is_stdarg
6976 || TREE_CODE (type) != METHOD_TYPE
6977 || ix86_function_type_abi (type) != MS_ABI)
6978 return IX86_CALLCVT_CDECL | ret;
6979
6980 return IX86_CALLCVT_THISCALL;
6981 }
6982
6983 /* Return 0 if the attributes for two types are incompatible, 1 if they
6984 are compatible, and 2 if they are nearly compatible (which causes a
6985 warning to be generated). */
6986
6987 static int
6988 ix86_comp_type_attributes (const_tree type1, const_tree type2)
6989 {
6990 unsigned int ccvt1, ccvt2;
6991
6992 if (TREE_CODE (type1) != FUNCTION_TYPE
6993 && TREE_CODE (type1) != METHOD_TYPE)
6994 return 1;
6995
6996 ccvt1 = ix86_get_callcvt (type1);
6997 ccvt2 = ix86_get_callcvt (type2);
6998 if (ccvt1 != ccvt2)
6999 return 0;
7000 if (ix86_function_regparm (type1, NULL)
7001 != ix86_function_regparm (type2, NULL))
7002 return 0;
7003
7004 return 1;
7005 }
7006 \f
7007 /* Return the regparm value for a function with the indicated TYPE and DECL.
7008 DECL may be NULL when calling function indirectly
7009 or considering a libcall. */
7010
7011 static int
7012 ix86_function_regparm (const_tree type, const_tree decl)
7013 {
7014 tree attr;
7015 int regparm;
7016 unsigned int ccvt;
7017
7018 if (TARGET_64BIT)
7019 return (ix86_function_type_abi (type) == SYSV_ABI
7020 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
7021 ccvt = ix86_get_callcvt (type);
7022 regparm = ix86_regparm;
7023
7024 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
7025 {
7026 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
7027 if (attr)
7028 {
7029 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
7030 return regparm;
7031 }
7032 }
7033 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
7034 return 2;
7035 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
7036 return 1;
7037
7038 /* Use register calling convention for local functions when possible. */
7039 if (decl
7040 && TREE_CODE (decl) == FUNCTION_DECL)
7041 {
7042 cgraph_node *target = cgraph_node::get (decl);
7043 if (target)
7044 target = target->function_symbol ();
7045
7046 /* Caller and callee must agree on the calling convention, so
7047 checking here just optimize means that with
7048 __attribute__((optimize (...))) caller could use regparm convention
7049 and callee not, or vice versa. Instead look at whether the callee
7050 is optimized or not. */
7051 if (target && opt_for_fn (target->decl, optimize)
7052 && !(profile_flag && !flag_fentry))
7053 {
7054 cgraph_local_info *i = &target->local;
7055 if (i && i->local && i->can_change_signature)
7056 {
7057 int local_regparm, globals = 0, regno;
7058
7059 /* Make sure no regparm register is taken by a
7060 fixed register variable. */
7061 for (local_regparm = 0; local_regparm < REGPARM_MAX;
7062 local_regparm++)
7063 if (fixed_regs[local_regparm])
7064 break;
7065
7066 /* We don't want to use regparm(3) for nested functions as
7067 these use a static chain pointer in the third argument. */
7068 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
7069 local_regparm = 2;
7070
7071 /* Save a register for the split stack. */
7072 if (local_regparm == 3 && flag_split_stack)
7073 local_regparm = 2;
7074
7075 /* Each fixed register usage increases register pressure,
7076 so less registers should be used for argument passing.
7077 This functionality can be overriden by an explicit
7078 regparm value. */
7079 for (regno = AX_REG; regno <= DI_REG; regno++)
7080 if (fixed_regs[regno])
7081 globals++;
7082
7083 local_regparm
7084 = globals < local_regparm ? local_regparm - globals : 0;
7085
7086 if (local_regparm > regparm)
7087 regparm = local_regparm;
7088 }
7089 }
7090 }
7091
7092 return regparm;
7093 }
7094
7095 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
7096 DFmode (2) arguments in SSE registers for a function with the
7097 indicated TYPE and DECL. DECL may be NULL when calling function
7098 indirectly or considering a libcall. Return -1 if any FP parameter
7099 should be rejected by error. This is used in siutation we imply SSE
7100 calling convetion but the function is called from another function with
7101 SSE disabled. Otherwise return 0. */
7102
7103 static int
7104 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
7105 {
7106 gcc_assert (!TARGET_64BIT);
7107
7108 /* Use SSE registers to pass SFmode and DFmode arguments if requested
7109 by the sseregparm attribute. */
7110 if (TARGET_SSEREGPARM
7111 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
7112 {
7113 if (!TARGET_SSE)
7114 {
7115 if (warn)
7116 {
7117 if (decl)
7118 error ("calling %qD with attribute sseregparm without "
7119 "SSE/SSE2 enabled", decl);
7120 else
7121 error ("calling %qT with attribute sseregparm without "
7122 "SSE/SSE2 enabled", type);
7123 }
7124 return 0;
7125 }
7126
7127 return 2;
7128 }
7129
7130 if (!decl)
7131 return 0;
7132
7133 cgraph_node *target = cgraph_node::get (decl);
7134 if (target)
7135 target = target->function_symbol ();
7136
7137 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
7138 (and DFmode for SSE2) arguments in SSE registers. */
7139 if (target
7140 /* TARGET_SSE_MATH */
7141 && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
7142 && opt_for_fn (target->decl, optimize)
7143 && !(profile_flag && !flag_fentry))
7144 {
7145 cgraph_local_info *i = &target->local;
7146 if (i && i->local && i->can_change_signature)
7147 {
7148 /* Refuse to produce wrong code when local function with SSE enabled
7149 is called from SSE disabled function.
7150 FIXME: We need a way to detect these cases cross-ltrans partition
7151 and avoid using SSE calling conventions on local functions called
7152 from function with SSE disabled. For now at least delay the
7153 warning until we know we are going to produce wrong code.
7154 See PR66047 */
7155 if (!TARGET_SSE && warn)
7156 return -1;
7157 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
7158 ->x_ix86_isa_flags) ? 2 : 1;
7159 }
7160 }
7161
7162 return 0;
7163 }
7164
7165 /* Return true if EAX is live at the start of the function. Used by
7166 ix86_expand_prologue to determine if we need special help before
7167 calling allocate_stack_worker. */
7168
7169 static bool
7170 ix86_eax_live_at_start_p (void)
7171 {
7172 /* Cheat. Don't bother working forward from ix86_function_regparm
7173 to the function type to whether an actual argument is located in
7174 eax. Instead just look at cfg info, which is still close enough
7175 to correct at this point. This gives false positives for broken
7176 functions that might use uninitialized data that happens to be
7177 allocated in eax, but who cares? */
7178 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
7179 }
7180
7181 static bool
7182 ix86_keep_aggregate_return_pointer (tree fntype)
7183 {
7184 tree attr;
7185
7186 if (!TARGET_64BIT)
7187 {
7188 attr = lookup_attribute ("callee_pop_aggregate_return",
7189 TYPE_ATTRIBUTES (fntype));
7190 if (attr)
7191 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
7192
7193 /* For 32-bit MS-ABI the default is to keep aggregate
7194 return pointer. */
7195 if (ix86_function_type_abi (fntype) == MS_ABI)
7196 return true;
7197 }
7198 return KEEP_AGGREGATE_RETURN_POINTER != 0;
7199 }
7200
7201 /* Value is the number of bytes of arguments automatically
7202 popped when returning from a subroutine call.
7203 FUNDECL is the declaration node of the function (as a tree),
7204 FUNTYPE is the data type of the function (as a tree),
7205 or for a library call it is an identifier node for the subroutine name.
7206 SIZE is the number of bytes of arguments passed on the stack.
7207
7208 On the 80386, the RTD insn may be used to pop them if the number
7209 of args is fixed, but if the number is variable then the caller
7210 must pop them all. RTD can't be used for library calls now
7211 because the library is compiled with the Unix compiler.
7212 Use of RTD is a selectable option, since it is incompatible with
7213 standard Unix calling sequences. If the option is not selected,
7214 the caller must always pop the args.
7215
7216 The attribute stdcall is equivalent to RTD on a per module basis. */
7217
7218 static int
7219 ix86_return_pops_args (tree fundecl, tree funtype, int size)
7220 {
7221 unsigned int ccvt;
7222
7223 /* None of the 64-bit ABIs pop arguments. */
7224 if (TARGET_64BIT)
7225 return 0;
7226
7227 ccvt = ix86_get_callcvt (funtype);
7228
7229 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
7230 | IX86_CALLCVT_THISCALL)) != 0
7231 && ! stdarg_p (funtype))
7232 return size;
7233
7234 /* Lose any fake structure return argument if it is passed on the stack. */
7235 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
7236 && !ix86_keep_aggregate_return_pointer (funtype))
7237 {
7238 int nregs = ix86_function_regparm (funtype, fundecl);
7239 if (nregs == 0)
7240 return GET_MODE_SIZE (Pmode);
7241 }
7242
7243 return 0;
7244 }
7245
7246 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
7247
7248 static bool
7249 ix86_legitimate_combined_insn (rtx_insn *insn)
7250 {
7251 /* Check operand constraints in case hard registers were propagated
7252 into insn pattern. This check prevents combine pass from
7253 generating insn patterns with invalid hard register operands.
7254 These invalid insns can eventually confuse reload to error out
7255 with a spill failure. See also PRs 46829 and 46843. */
7256 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
7257 {
7258 int i;
7259
7260 extract_insn (insn);
7261 preprocess_constraints (insn);
7262
7263 int n_operands = recog_data.n_operands;
7264 int n_alternatives = recog_data.n_alternatives;
7265 for (i = 0; i < n_operands; i++)
7266 {
7267 rtx op = recog_data.operand[i];
7268 machine_mode mode = GET_MODE (op);
7269 const operand_alternative *op_alt;
7270 int offset = 0;
7271 bool win;
7272 int j;
7273
7274 /* For pre-AVX disallow unaligned loads/stores where the
7275 instructions don't support it. */
7276 if (!TARGET_AVX
7277 && VECTOR_MODE_P (mode)
7278 && misaligned_operand (op, mode))
7279 {
7280 unsigned int min_align = get_attr_ssememalign (insn);
7281 if (min_align == 0
7282 || MEM_ALIGN (op) < min_align)
7283 return false;
7284 }
7285
7286 /* A unary operator may be accepted by the predicate, but it
7287 is irrelevant for matching constraints. */
7288 if (UNARY_P (op))
7289 op = XEXP (op, 0);
7290
7291 if (SUBREG_P (op))
7292 {
7293 if (REG_P (SUBREG_REG (op))
7294 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
7295 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
7296 GET_MODE (SUBREG_REG (op)),
7297 SUBREG_BYTE (op),
7298 GET_MODE (op));
7299 op = SUBREG_REG (op);
7300 }
7301
7302 if (!(REG_P (op) && HARD_REGISTER_P (op)))
7303 continue;
7304
7305 op_alt = recog_op_alt;
7306
7307 /* Operand has no constraints, anything is OK. */
7308 win = !n_alternatives;
7309
7310 alternative_mask preferred = get_preferred_alternatives (insn);
7311 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
7312 {
7313 if (!TEST_BIT (preferred, j))
7314 continue;
7315 if (op_alt[i].anything_ok
7316 || (op_alt[i].matches != -1
7317 && operands_match_p
7318 (recog_data.operand[i],
7319 recog_data.operand[op_alt[i].matches]))
7320 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
7321 {
7322 win = true;
7323 break;
7324 }
7325 }
7326
7327 if (!win)
7328 return false;
7329 }
7330 }
7331
7332 return true;
7333 }
7334 \f
7335 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
7336
7337 static unsigned HOST_WIDE_INT
7338 ix86_asan_shadow_offset (void)
7339 {
7340 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
7341 : HOST_WIDE_INT_C (0x7fff8000))
7342 : (HOST_WIDE_INT_1 << 29);
7343 }
7344 \f
7345 /* Argument support functions. */
7346
7347 /* Return true when register may be used to pass function parameters. */
7348 bool
7349 ix86_function_arg_regno_p (int regno)
7350 {
7351 int i;
7352 enum calling_abi call_abi;
7353 const int *parm_regs;
7354
7355 if (TARGET_MPX && BND_REGNO_P (regno))
7356 return true;
7357
7358 if (!TARGET_64BIT)
7359 {
7360 if (TARGET_MACHO)
7361 return (regno < REGPARM_MAX
7362 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
7363 else
7364 return (regno < REGPARM_MAX
7365 || (TARGET_MMX && MMX_REGNO_P (regno)
7366 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
7367 || (TARGET_SSE && SSE_REGNO_P (regno)
7368 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
7369 }
7370
7371 if (TARGET_SSE && SSE_REGNO_P (regno)
7372 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
7373 return true;
7374
7375 /* TODO: The function should depend on current function ABI but
7376 builtins.c would need updating then. Therefore we use the
7377 default ABI. */
7378 call_abi = ix86_cfun_abi ();
7379
7380 /* RAX is used as hidden argument to va_arg functions. */
7381 if (call_abi == SYSV_ABI && regno == AX_REG)
7382 return true;
7383
7384 if (call_abi == MS_ABI)
7385 parm_regs = x86_64_ms_abi_int_parameter_registers;
7386 else
7387 parm_regs = x86_64_int_parameter_registers;
7388
7389 for (i = 0; i < (call_abi == MS_ABI
7390 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
7391 if (regno == parm_regs[i])
7392 return true;
7393 return false;
7394 }
7395
7396 /* Return if we do not know how to pass TYPE solely in registers. */
7397
7398 static bool
7399 ix86_must_pass_in_stack (machine_mode mode, const_tree type)
7400 {
7401 if (must_pass_in_stack_var_size_or_pad (mode, type))
7402 return true;
7403
7404 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
7405 The layout_type routine is crafty and tries to trick us into passing
7406 currently unsupported vector types on the stack by using TImode. */
7407 return (!TARGET_64BIT && mode == TImode
7408 && type && TREE_CODE (type) != VECTOR_TYPE);
7409 }
7410
7411 /* It returns the size, in bytes, of the area reserved for arguments passed
7412 in registers for the function represented by fndecl dependent to the used
7413 abi format. */
7414 int
7415 ix86_reg_parm_stack_space (const_tree fndecl)
7416 {
7417 enum calling_abi call_abi = SYSV_ABI;
7418 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
7419 call_abi = ix86_function_abi (fndecl);
7420 else
7421 call_abi = ix86_function_type_abi (fndecl);
7422 if (TARGET_64BIT && call_abi == MS_ABI)
7423 return 32;
7424 return 0;
7425 }
7426
7427 /* We add this as a workaround in order to use libc_has_function
7428 hook in i386.md. */
7429 bool
7430 ix86_libc_has_function (enum function_class fn_class)
7431 {
7432 return targetm.libc_has_function (fn_class);
7433 }
7434
7435 /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
7436 specifying the call abi used. */
7437 enum calling_abi
7438 ix86_function_type_abi (const_tree fntype)
7439 {
7440 enum calling_abi abi = ix86_abi;
7441
7442 if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE)
7443 return abi;
7444
7445 if (abi == SYSV_ABI
7446 && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
7447 {
7448 if (TARGET_X32)
7449 error ("X32 does not support ms_abi attribute");
7450
7451 abi = MS_ABI;
7452 }
7453 else if (abi == MS_ABI
7454 && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
7455 abi = SYSV_ABI;
7456
7457 return abi;
7458 }
7459
7460 static enum calling_abi
7461 ix86_function_abi (const_tree fndecl)
7462 {
7463 return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi;
7464 }
7465
7466 /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
7467 specifying the call abi used. */
7468 enum calling_abi
7469 ix86_cfun_abi (void)
7470 {
7471 return cfun ? cfun->machine->call_abi : ix86_abi;
7472 }
7473
7474 static bool
7475 ix86_function_ms_hook_prologue (const_tree fn)
7476 {
7477 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
7478 {
7479 if (decl_function_context (fn) != NULL_TREE)
7480 error_at (DECL_SOURCE_LOCATION (fn),
7481 "ms_hook_prologue is not compatible with nested function");
7482 else
7483 return true;
7484 }
7485 return false;
7486 }
7487
7488 /* Write the extra assembler code needed to declare a function properly. */
7489
7490 void
7491 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
7492 tree decl)
7493 {
7494 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
7495
7496 if (is_ms_hook)
7497 {
7498 int i, filler_count = (TARGET_64BIT ? 32 : 16);
7499 unsigned int filler_cc = 0xcccccccc;
7500
7501 for (i = 0; i < filler_count; i += 4)
7502 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
7503 }
7504
7505 #ifdef SUBTARGET_ASM_UNWIND_INIT
7506 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
7507 #endif
7508
7509 ASM_OUTPUT_LABEL (asm_out_file, fname);
7510
7511 /* Output magic byte marker, if hot-patch attribute is set. */
7512 if (is_ms_hook)
7513 {
7514 if (TARGET_64BIT)
7515 {
7516 /* leaq [%rsp + 0], %rsp */
7517 asm_fprintf (asm_out_file, ASM_BYTE
7518 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
7519 }
7520 else
7521 {
7522 /* movl.s %edi, %edi
7523 push %ebp
7524 movl.s %esp, %ebp */
7525 asm_fprintf (asm_out_file, ASM_BYTE
7526 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
7527 }
7528 }
7529 }
7530
7531 /* regclass.c */
7532 extern void init_regs (void);
7533
7534 /* Implementation of call abi switching target hook. Specific to FNDECL
7535 the specific call register sets are set. See also
7536 ix86_conditional_register_usage for more details. */
7537 void
7538 ix86_call_abi_override (const_tree fndecl)
7539 {
7540 cfun->machine->call_abi = ix86_function_abi (fndecl);
7541 }
7542
7543 /* Return 1 if pseudo register should be created and used to hold
7544 GOT address for PIC code. */
7545 bool
7546 ix86_use_pseudo_pic_reg (void)
7547 {
7548 if ((TARGET_64BIT
7549 && (ix86_cmodel == CM_SMALL_PIC
7550 || TARGET_PECOFF))
7551 || !flag_pic)
7552 return false;
7553 return true;
7554 }
7555
7556 /* Initialize large model PIC register. */
7557
7558 static void
7559 ix86_init_large_pic_reg (unsigned int tmp_regno)
7560 {
7561 rtx_code_label *label;
7562 rtx tmp_reg;
7563
7564 gcc_assert (Pmode == DImode);
7565 label = gen_label_rtx ();
7566 emit_label (label);
7567 LABEL_PRESERVE_P (label) = 1;
7568 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
7569 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
7570 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
7571 label));
7572 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
7573 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
7574 pic_offset_table_rtx, tmp_reg));
7575 }
7576
7577 /* Create and initialize PIC register if required. */
7578 static void
7579 ix86_init_pic_reg (void)
7580 {
7581 edge entry_edge;
7582 rtx_insn *seq;
7583
7584 if (!ix86_use_pseudo_pic_reg ())
7585 return;
7586
7587 start_sequence ();
7588
7589 if (TARGET_64BIT)
7590 {
7591 if (ix86_cmodel == CM_LARGE_PIC)
7592 ix86_init_large_pic_reg (R11_REG);
7593 else
7594 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
7595 }
7596 else
7597 {
7598 /* If there is future mcount call in the function it is more profitable
7599 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
7600 rtx reg = crtl->profile
7601 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
7602 : pic_offset_table_rtx;
7603 rtx_insn *insn = emit_insn (gen_set_got (reg));
7604 RTX_FRAME_RELATED_P (insn) = 1;
7605 if (crtl->profile)
7606 emit_move_insn (pic_offset_table_rtx, reg);
7607 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
7608 }
7609
7610 seq = get_insns ();
7611 end_sequence ();
7612
7613 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
7614 insert_insn_on_edge (seq, entry_edge);
7615 commit_one_edge_insertion (entry_edge);
7616 }
7617
7618 /* Initialize a variable CUM of type CUMULATIVE_ARGS
7619 for a call to a function whose data type is FNTYPE.
7620 For a library call, FNTYPE is 0. */
7621
7622 void
7623 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
7624 tree fntype, /* tree ptr for function decl */
7625 rtx libname, /* SYMBOL_REF of library name or 0 */
7626 tree fndecl,
7627 int caller)
7628 {
7629 struct cgraph_local_info *i = NULL;
7630 struct cgraph_node *target = NULL;
7631
7632 memset (cum, 0, sizeof (*cum));
7633
7634 if (fndecl)
7635 {
7636 target = cgraph_node::get (fndecl);
7637 if (target)
7638 {
7639 target = target->function_symbol ();
7640 i = cgraph_node::local_info (target->decl);
7641 cum->call_abi = ix86_function_abi (target->decl);
7642 }
7643 else
7644 cum->call_abi = ix86_function_abi (fndecl);
7645 }
7646 else
7647 cum->call_abi = ix86_function_type_abi (fntype);
7648
7649 cum->caller = caller;
7650
7651 /* Set up the number of registers to use for passing arguments. */
7652 cum->nregs = ix86_regparm;
7653 if (TARGET_64BIT)
7654 {
7655 cum->nregs = (cum->call_abi == SYSV_ABI
7656 ? X86_64_REGPARM_MAX
7657 : X86_64_MS_REGPARM_MAX);
7658 }
7659 if (TARGET_SSE)
7660 {
7661 cum->sse_nregs = SSE_REGPARM_MAX;
7662 if (TARGET_64BIT)
7663 {
7664 cum->sse_nregs = (cum->call_abi == SYSV_ABI
7665 ? X86_64_SSE_REGPARM_MAX
7666 : X86_64_MS_SSE_REGPARM_MAX);
7667 }
7668 }
7669 if (TARGET_MMX)
7670 cum->mmx_nregs = MMX_REGPARM_MAX;
7671 cum->warn_avx512f = true;
7672 cum->warn_avx = true;
7673 cum->warn_sse = true;
7674 cum->warn_mmx = true;
7675
7676 /* Because type might mismatch in between caller and callee, we need to
7677 use actual type of function for local calls.
7678 FIXME: cgraph_analyze can be told to actually record if function uses
7679 va_start so for local functions maybe_vaarg can be made aggressive
7680 helping K&R code.
7681 FIXME: once typesytem is fixed, we won't need this code anymore. */
7682 if (i && i->local && i->can_change_signature)
7683 fntype = TREE_TYPE (target->decl);
7684 cum->stdarg = stdarg_p (fntype);
7685 cum->maybe_vaarg = (fntype
7686 ? (!prototype_p (fntype) || stdarg_p (fntype))
7687 : !libname);
7688
7689 cum->bnd_regno = FIRST_BND_REG;
7690 cum->bnds_in_bt = 0;
7691 cum->force_bnd_pass = 0;
7692 cum->decl = fndecl;
7693
7694 if (!TARGET_64BIT)
7695 {
7696 /* If there are variable arguments, then we won't pass anything
7697 in registers in 32-bit mode. */
7698 if (stdarg_p (fntype))
7699 {
7700 cum->nregs = 0;
7701 /* Since in 32-bit, variable arguments are always passed on
7702 stack, there is scratch register available for indirect
7703 sibcall. */
7704 cfun->machine->arg_reg_available = true;
7705 cum->sse_nregs = 0;
7706 cum->mmx_nregs = 0;
7707 cum->warn_avx512f = false;
7708 cum->warn_avx = false;
7709 cum->warn_sse = false;
7710 cum->warn_mmx = false;
7711 return;
7712 }
7713
7714 /* Use ecx and edx registers if function has fastcall attribute,
7715 else look for regparm information. */
7716 if (fntype)
7717 {
7718 unsigned int ccvt = ix86_get_callcvt (fntype);
7719 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
7720 {
7721 cum->nregs = 1;
7722 cum->fastcall = 1; /* Same first register as in fastcall. */
7723 }
7724 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
7725 {
7726 cum->nregs = 2;
7727 cum->fastcall = 1;
7728 }
7729 else
7730 cum->nregs = ix86_function_regparm (fntype, fndecl);
7731 }
7732
7733 /* Set up the number of SSE registers used for passing SFmode
7734 and DFmode arguments. Warn for mismatching ABI. */
7735 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
7736 }
7737
7738 cfun->machine->arg_reg_available = (cum->nregs > 0);
7739 }
7740
7741 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
7742 But in the case of vector types, it is some vector mode.
7743
7744 When we have only some of our vector isa extensions enabled, then there
7745 are some modes for which vector_mode_supported_p is false. For these
7746 modes, the generic vector support in gcc will choose some non-vector mode
7747 in order to implement the type. By computing the natural mode, we'll
7748 select the proper ABI location for the operand and not depend on whatever
7749 the middle-end decides to do with these vector types.
7750
7751 The midde-end can't deal with the vector types > 16 bytes. In this
7752 case, we return the original mode and warn ABI change if CUM isn't
7753 NULL.
7754
7755 If INT_RETURN is true, warn ABI change if the vector mode isn't
7756 available for function return value. */
7757
7758 static machine_mode
7759 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
7760 bool in_return)
7761 {
7762 machine_mode mode = TYPE_MODE (type);
7763
7764 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
7765 {
7766 HOST_WIDE_INT size = int_size_in_bytes (type);
7767 if ((size == 8 || size == 16 || size == 32 || size == 64)
7768 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
7769 && TYPE_VECTOR_SUBPARTS (type) > 1)
7770 {
7771 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
7772
7773 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
7774 mode = MIN_MODE_VECTOR_FLOAT;
7775 else
7776 mode = MIN_MODE_VECTOR_INT;
7777
7778 /* Get the mode which has this inner mode and number of units. */
7779 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
7780 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
7781 && GET_MODE_INNER (mode) == innermode)
7782 {
7783 if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
7784 {
7785 static bool warnedavx512f;
7786 static bool warnedavx512f_ret;
7787
7788 if (cum && cum->warn_avx512f && !warnedavx512f)
7789 {
7790 if (warning (OPT_Wpsabi, "AVX512F vector argument "
7791 "without AVX512F enabled changes the ABI"))
7792 warnedavx512f = true;
7793 }
7794 else if (in_return && !warnedavx512f_ret)
7795 {
7796 if (warning (OPT_Wpsabi, "AVX512F vector return "
7797 "without AVX512F enabled changes the ABI"))
7798 warnedavx512f_ret = true;
7799 }
7800
7801 return TYPE_MODE (type);
7802 }
7803 else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU)
7804 {
7805 static bool warnedavx;
7806 static bool warnedavx_ret;
7807
7808 if (cum && cum->warn_avx && !warnedavx)
7809 {
7810 if (warning (OPT_Wpsabi, "AVX vector argument "
7811 "without AVX enabled changes the ABI"))
7812 warnedavx = true;
7813 }
7814 else if (in_return && !warnedavx_ret)
7815 {
7816 if (warning (OPT_Wpsabi, "AVX vector return "
7817 "without AVX enabled changes the ABI"))
7818 warnedavx_ret = true;
7819 }
7820
7821 return TYPE_MODE (type);
7822 }
7823 else if (((size == 8 && TARGET_64BIT) || size == 16)
7824 && !TARGET_SSE
7825 && !TARGET_IAMCU)
7826 {
7827 static bool warnedsse;
7828 static bool warnedsse_ret;
7829
7830 if (cum && cum->warn_sse && !warnedsse)
7831 {
7832 if (warning (OPT_Wpsabi, "SSE vector argument "
7833 "without SSE enabled changes the ABI"))
7834 warnedsse = true;
7835 }
7836 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
7837 {
7838 if (warning (OPT_Wpsabi, "SSE vector return "
7839 "without SSE enabled changes the ABI"))
7840 warnedsse_ret = true;
7841 }
7842 }
7843 else if ((size == 8 && !TARGET_64BIT)
7844 && !TARGET_MMX
7845 && !TARGET_IAMCU)
7846 {
7847 static bool warnedmmx;
7848 static bool warnedmmx_ret;
7849
7850 if (cum && cum->warn_mmx && !warnedmmx)
7851 {
7852 if (warning (OPT_Wpsabi, "MMX vector argument "
7853 "without MMX enabled changes the ABI"))
7854 warnedmmx = true;
7855 }
7856 else if (in_return && !warnedmmx_ret)
7857 {
7858 if (warning (OPT_Wpsabi, "MMX vector return "
7859 "without MMX enabled changes the ABI"))
7860 warnedmmx_ret = true;
7861 }
7862 }
7863 return mode;
7864 }
7865
7866 gcc_unreachable ();
7867 }
7868 }
7869
7870 return mode;
7871 }
7872
7873 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
7874 this may not agree with the mode that the type system has chosen for the
7875 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
7876 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
7877
7878 static rtx
7879 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
7880 unsigned int regno)
7881 {
7882 rtx tmp;
7883
7884 if (orig_mode != BLKmode)
7885 tmp = gen_rtx_REG (orig_mode, regno);
7886 else
7887 {
7888 tmp = gen_rtx_REG (mode, regno);
7889 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
7890 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
7891 }
7892
7893 return tmp;
7894 }
7895
7896 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
7897 of this code is to classify each 8bytes of incoming argument by the register
7898 class and assign registers accordingly. */
7899
7900 /* Return the union class of CLASS1 and CLASS2.
7901 See the x86-64 PS ABI for details. */
7902
7903 static enum x86_64_reg_class
7904 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
7905 {
7906 /* Rule #1: If both classes are equal, this is the resulting class. */
7907 if (class1 == class2)
7908 return class1;
7909
7910 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
7911 the other class. */
7912 if (class1 == X86_64_NO_CLASS)
7913 return class2;
7914 if (class2 == X86_64_NO_CLASS)
7915 return class1;
7916
7917 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
7918 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
7919 return X86_64_MEMORY_CLASS;
7920
7921 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
7922 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
7923 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
7924 return X86_64_INTEGERSI_CLASS;
7925 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
7926 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
7927 return X86_64_INTEGER_CLASS;
7928
7929 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
7930 MEMORY is used. */
7931 if (class1 == X86_64_X87_CLASS
7932 || class1 == X86_64_X87UP_CLASS
7933 || class1 == X86_64_COMPLEX_X87_CLASS
7934 || class2 == X86_64_X87_CLASS
7935 || class2 == X86_64_X87UP_CLASS
7936 || class2 == X86_64_COMPLEX_X87_CLASS)
7937 return X86_64_MEMORY_CLASS;
7938
7939 /* Rule #6: Otherwise class SSE is used. */
7940 return X86_64_SSE_CLASS;
7941 }
7942
7943 /* Classify the argument of type TYPE and mode MODE.
7944 CLASSES will be filled by the register class used to pass each word
7945 of the operand. The number of words is returned. In case the parameter
7946 should be passed in memory, 0 is returned. As a special case for zero
7947 sized containers, classes[0] will be NO_CLASS and 1 is returned.
7948
7949 BIT_OFFSET is used internally for handling records and specifies offset
7950 of the offset in bits modulo 512 to avoid overflow cases.
7951
7952 See the x86-64 PS ABI for details.
7953 */
7954
7955 static int
7956 classify_argument (machine_mode mode, const_tree type,
7957 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
7958 {
7959 HOST_WIDE_INT bytes =
7960 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7961 int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD);
7962
7963 /* Variable sized entities are always passed/returned in memory. */
7964 if (bytes < 0)
7965 return 0;
7966
7967 if (mode != VOIDmode
7968 && targetm.calls.must_pass_in_stack (mode, type))
7969 return 0;
7970
7971 if (type && AGGREGATE_TYPE_P (type))
7972 {
7973 int i;
7974 tree field;
7975 enum x86_64_reg_class subclasses[MAX_CLASSES];
7976
7977 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
7978 if (bytes > 64)
7979 return 0;
7980
7981 for (i = 0; i < words; i++)
7982 classes[i] = X86_64_NO_CLASS;
7983
7984 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
7985 signalize memory class, so handle it as special case. */
7986 if (!words)
7987 {
7988 classes[0] = X86_64_NO_CLASS;
7989 return 1;
7990 }
7991
7992 /* Classify each field of record and merge classes. */
7993 switch (TREE_CODE (type))
7994 {
7995 case RECORD_TYPE:
7996 /* And now merge the fields of structure. */
7997 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7998 {
7999 if (TREE_CODE (field) == FIELD_DECL)
8000 {
8001 int num;
8002
8003 if (TREE_TYPE (field) == error_mark_node)
8004 continue;
8005
8006 /* Bitfields are always classified as integer. Handle them
8007 early, since later code would consider them to be
8008 misaligned integers. */
8009 if (DECL_BIT_FIELD (field))
8010 {
8011 for (i = (int_bit_position (field)
8012 + (bit_offset % 64)) / 8 / 8;
8013 i < ((int_bit_position (field) + (bit_offset % 64))
8014 + tree_to_shwi (DECL_SIZE (field))
8015 + 63) / 8 / 8; i++)
8016 classes[i] =
8017 merge_classes (X86_64_INTEGER_CLASS,
8018 classes[i]);
8019 }
8020 else
8021 {
8022 int pos;
8023
8024 type = TREE_TYPE (field);
8025
8026 /* Flexible array member is ignored. */
8027 if (TYPE_MODE (type) == BLKmode
8028 && TREE_CODE (type) == ARRAY_TYPE
8029 && TYPE_SIZE (type) == NULL_TREE
8030 && TYPE_DOMAIN (type) != NULL_TREE
8031 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
8032 == NULL_TREE))
8033 {
8034 static bool warned;
8035
8036 if (!warned && warn_psabi)
8037 {
8038 warned = true;
8039 inform (input_location,
8040 "the ABI of passing struct with"
8041 " a flexible array member has"
8042 " changed in GCC 4.4");
8043 }
8044 continue;
8045 }
8046 num = classify_argument (TYPE_MODE (type), type,
8047 subclasses,
8048 (int_bit_position (field)
8049 + bit_offset) % 512);
8050 if (!num)
8051 return 0;
8052 pos = (int_bit_position (field)
8053 + (bit_offset % 64)) / 8 / 8;
8054 for (i = 0; i < num && (i + pos) < words; i++)
8055 classes[i + pos] =
8056 merge_classes (subclasses[i], classes[i + pos]);
8057 }
8058 }
8059 }
8060 break;
8061
8062 case ARRAY_TYPE:
8063 /* Arrays are handled as small records. */
8064 {
8065 int num;
8066 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
8067 TREE_TYPE (type), subclasses, bit_offset);
8068 if (!num)
8069 return 0;
8070
8071 /* The partial classes are now full classes. */
8072 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
8073 subclasses[0] = X86_64_SSE_CLASS;
8074 if (subclasses[0] == X86_64_INTEGERSI_CLASS
8075 && !((bit_offset % 64) == 0 && bytes == 4))
8076 subclasses[0] = X86_64_INTEGER_CLASS;
8077
8078 for (i = 0; i < words; i++)
8079 classes[i] = subclasses[i % num];
8080
8081 break;
8082 }
8083 case UNION_TYPE:
8084 case QUAL_UNION_TYPE:
8085 /* Unions are similar to RECORD_TYPE but offset is always 0.
8086 */
8087 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8088 {
8089 if (TREE_CODE (field) == FIELD_DECL)
8090 {
8091 int num;
8092
8093 if (TREE_TYPE (field) == error_mark_node)
8094 continue;
8095
8096 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
8097 TREE_TYPE (field), subclasses,
8098 bit_offset);
8099 if (!num)
8100 return 0;
8101 for (i = 0; i < num && i < words; i++)
8102 classes[i] = merge_classes (subclasses[i], classes[i]);
8103 }
8104 }
8105 break;
8106
8107 default:
8108 gcc_unreachable ();
8109 }
8110
8111 if (words > 2)
8112 {
8113 /* When size > 16 bytes, if the first one isn't
8114 X86_64_SSE_CLASS or any other ones aren't
8115 X86_64_SSEUP_CLASS, everything should be passed in
8116 memory. */
8117 if (classes[0] != X86_64_SSE_CLASS)
8118 return 0;
8119
8120 for (i = 1; i < words; i++)
8121 if (classes[i] != X86_64_SSEUP_CLASS)
8122 return 0;
8123 }
8124
8125 /* Final merger cleanup. */
8126 for (i = 0; i < words; i++)
8127 {
8128 /* If one class is MEMORY, everything should be passed in
8129 memory. */
8130 if (classes[i] == X86_64_MEMORY_CLASS)
8131 return 0;
8132
8133 /* The X86_64_SSEUP_CLASS should be always preceded by
8134 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
8135 if (classes[i] == X86_64_SSEUP_CLASS
8136 && classes[i - 1] != X86_64_SSE_CLASS
8137 && classes[i - 1] != X86_64_SSEUP_CLASS)
8138 {
8139 /* The first one should never be X86_64_SSEUP_CLASS. */
8140 gcc_assert (i != 0);
8141 classes[i] = X86_64_SSE_CLASS;
8142 }
8143
8144 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
8145 everything should be passed in memory. */
8146 if (classes[i] == X86_64_X87UP_CLASS
8147 && (classes[i - 1] != X86_64_X87_CLASS))
8148 {
8149 static bool warned;
8150
8151 /* The first one should never be X86_64_X87UP_CLASS. */
8152 gcc_assert (i != 0);
8153 if (!warned && warn_psabi)
8154 {
8155 warned = true;
8156 inform (input_location,
8157 "the ABI of passing union with long double"
8158 " has changed in GCC 4.4");
8159 }
8160 return 0;
8161 }
8162 }
8163 return words;
8164 }
8165
8166 /* Compute alignment needed. We align all types to natural boundaries with
8167 exception of XFmode that is aligned to 64bits. */
8168 if (mode != VOIDmode && mode != BLKmode)
8169 {
8170 int mode_alignment = GET_MODE_BITSIZE (mode);
8171
8172 if (mode == XFmode)
8173 mode_alignment = 128;
8174 else if (mode == XCmode)
8175 mode_alignment = 256;
8176 if (COMPLEX_MODE_P (mode))
8177 mode_alignment /= 2;
8178 /* Misaligned fields are always returned in memory. */
8179 if (bit_offset % mode_alignment)
8180 return 0;
8181 }
8182
8183 /* for V1xx modes, just use the base mode */
8184 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
8185 && GET_MODE_UNIT_SIZE (mode) == bytes)
8186 mode = GET_MODE_INNER (mode);
8187
8188 /* Classification of atomic types. */
8189 switch (mode)
8190 {
8191 case SDmode:
8192 case DDmode:
8193 classes[0] = X86_64_SSE_CLASS;
8194 return 1;
8195 case TDmode:
8196 classes[0] = X86_64_SSE_CLASS;
8197 classes[1] = X86_64_SSEUP_CLASS;
8198 return 2;
8199 case DImode:
8200 case SImode:
8201 case HImode:
8202 case QImode:
8203 case CSImode:
8204 case CHImode:
8205 case CQImode:
8206 {
8207 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
8208
8209 /* Analyze last 128 bits only. */
8210 size = (size - 1) & 0x7f;
8211
8212 if (size < 32)
8213 {
8214 classes[0] = X86_64_INTEGERSI_CLASS;
8215 return 1;
8216 }
8217 else if (size < 64)
8218 {
8219 classes[0] = X86_64_INTEGER_CLASS;
8220 return 1;
8221 }
8222 else if (size < 64+32)
8223 {
8224 classes[0] = X86_64_INTEGER_CLASS;
8225 classes[1] = X86_64_INTEGERSI_CLASS;
8226 return 2;
8227 }
8228 else if (size < 64+64)
8229 {
8230 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
8231 return 2;
8232 }
8233 else
8234 gcc_unreachable ();
8235 }
8236 case CDImode:
8237 case TImode:
8238 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
8239 return 2;
8240 case COImode:
8241 case OImode:
8242 /* OImode shouldn't be used directly. */
8243 gcc_unreachable ();
8244 case CTImode:
8245 return 0;
8246 case SFmode:
8247 if (!(bit_offset % 64))
8248 classes[0] = X86_64_SSESF_CLASS;
8249 else
8250 classes[0] = X86_64_SSE_CLASS;
8251 return 1;
8252 case DFmode:
8253 classes[0] = X86_64_SSEDF_CLASS;
8254 return 1;
8255 case XFmode:
8256 classes[0] = X86_64_X87_CLASS;
8257 classes[1] = X86_64_X87UP_CLASS;
8258 return 2;
8259 case TFmode:
8260 classes[0] = X86_64_SSE_CLASS;
8261 classes[1] = X86_64_SSEUP_CLASS;
8262 return 2;
8263 case SCmode:
8264 classes[0] = X86_64_SSE_CLASS;
8265 if (!(bit_offset % 64))
8266 return 1;
8267 else
8268 {
8269 static bool warned;
8270
8271 if (!warned && warn_psabi)
8272 {
8273 warned = true;
8274 inform (input_location,
8275 "the ABI of passing structure with complex float"
8276 " member has changed in GCC 4.4");
8277 }
8278 classes[1] = X86_64_SSESF_CLASS;
8279 return 2;
8280 }
8281 case DCmode:
8282 classes[0] = X86_64_SSEDF_CLASS;
8283 classes[1] = X86_64_SSEDF_CLASS;
8284 return 2;
8285 case XCmode:
8286 classes[0] = X86_64_COMPLEX_X87_CLASS;
8287 return 1;
8288 case TCmode:
8289 /* This modes is larger than 16 bytes. */
8290 return 0;
8291 case V8SFmode:
8292 case V8SImode:
8293 case V32QImode:
8294 case V16HImode:
8295 case V4DFmode:
8296 case V4DImode:
8297 classes[0] = X86_64_SSE_CLASS;
8298 classes[1] = X86_64_SSEUP_CLASS;
8299 classes[2] = X86_64_SSEUP_CLASS;
8300 classes[3] = X86_64_SSEUP_CLASS;
8301 return 4;
8302 case V8DFmode:
8303 case V16SFmode:
8304 case V8DImode:
8305 case V16SImode:
8306 case V32HImode:
8307 case V64QImode:
8308 classes[0] = X86_64_SSE_CLASS;
8309 classes[1] = X86_64_SSEUP_CLASS;
8310 classes[2] = X86_64_SSEUP_CLASS;
8311 classes[3] = X86_64_SSEUP_CLASS;
8312 classes[4] = X86_64_SSEUP_CLASS;
8313 classes[5] = X86_64_SSEUP_CLASS;
8314 classes[6] = X86_64_SSEUP_CLASS;
8315 classes[7] = X86_64_SSEUP_CLASS;
8316 return 8;
8317 case V4SFmode:
8318 case V4SImode:
8319 case V16QImode:
8320 case V8HImode:
8321 case V2DFmode:
8322 case V2DImode:
8323 classes[0] = X86_64_SSE_CLASS;
8324 classes[1] = X86_64_SSEUP_CLASS;
8325 return 2;
8326 case V1TImode:
8327 case V1DImode:
8328 case V2SFmode:
8329 case V2SImode:
8330 case V4HImode:
8331 case V8QImode:
8332 classes[0] = X86_64_SSE_CLASS;
8333 return 1;
8334 case BLKmode:
8335 case VOIDmode:
8336 return 0;
8337 default:
8338 gcc_assert (VECTOR_MODE_P (mode));
8339
8340 if (bytes > 16)
8341 return 0;
8342
8343 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
8344
8345 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
8346 classes[0] = X86_64_INTEGERSI_CLASS;
8347 else
8348 classes[0] = X86_64_INTEGER_CLASS;
8349 classes[1] = X86_64_INTEGER_CLASS;
8350 return 1 + (bytes > 8);
8351 }
8352 }
8353
8354 /* Examine the argument and return set number of register required in each
8355 class. Return true iff parameter should be passed in memory. */
8356
8357 static bool
8358 examine_argument (machine_mode mode, const_tree type, int in_return,
8359 int *int_nregs, int *sse_nregs)
8360 {
8361 enum x86_64_reg_class regclass[MAX_CLASSES];
8362 int n = classify_argument (mode, type, regclass, 0);
8363
8364 *int_nregs = 0;
8365 *sse_nregs = 0;
8366
8367 if (!n)
8368 return true;
8369 for (n--; n >= 0; n--)
8370 switch (regclass[n])
8371 {
8372 case X86_64_INTEGER_CLASS:
8373 case X86_64_INTEGERSI_CLASS:
8374 (*int_nregs)++;
8375 break;
8376 case X86_64_SSE_CLASS:
8377 case X86_64_SSESF_CLASS:
8378 case X86_64_SSEDF_CLASS:
8379 (*sse_nregs)++;
8380 break;
8381 case X86_64_NO_CLASS:
8382 case X86_64_SSEUP_CLASS:
8383 break;
8384 case X86_64_X87_CLASS:
8385 case X86_64_X87UP_CLASS:
8386 case X86_64_COMPLEX_X87_CLASS:
8387 if (!in_return)
8388 return true;
8389 break;
8390 case X86_64_MEMORY_CLASS:
8391 gcc_unreachable ();
8392 }
8393
8394 return false;
8395 }
8396
8397 /* Construct container for the argument used by GCC interface. See
8398 FUNCTION_ARG for the detailed description. */
8399
8400 static rtx
8401 construct_container (machine_mode mode, machine_mode orig_mode,
8402 const_tree type, int in_return, int nintregs, int nsseregs,
8403 const int *intreg, int sse_regno)
8404 {
8405 /* The following variables hold the static issued_error state. */
8406 static bool issued_sse_arg_error;
8407 static bool issued_sse_ret_error;
8408 static bool issued_x87_ret_error;
8409
8410 machine_mode tmpmode;
8411 int bytes =
8412 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
8413 enum x86_64_reg_class regclass[MAX_CLASSES];
8414 int n;
8415 int i;
8416 int nexps = 0;
8417 int needed_sseregs, needed_intregs;
8418 rtx exp[MAX_CLASSES];
8419 rtx ret;
8420
8421 n = classify_argument (mode, type, regclass, 0);
8422 if (!n)
8423 return NULL;
8424 if (examine_argument (mode, type, in_return, &needed_intregs,
8425 &needed_sseregs))
8426 return NULL;
8427 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
8428 return NULL;
8429
8430 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
8431 some less clueful developer tries to use floating-point anyway. */
8432 if (needed_sseregs && !TARGET_SSE)
8433 {
8434 if (in_return)
8435 {
8436 if (!issued_sse_ret_error)
8437 {
8438 error ("SSE register return with SSE disabled");
8439 issued_sse_ret_error = true;
8440 }
8441 }
8442 else if (!issued_sse_arg_error)
8443 {
8444 error ("SSE register argument with SSE disabled");
8445 issued_sse_arg_error = true;
8446 }
8447 return NULL;
8448 }
8449
8450 /* Likewise, error if the ABI requires us to return values in the
8451 x87 registers and the user specified -mno-80387. */
8452 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
8453 for (i = 0; i < n; i++)
8454 if (regclass[i] == X86_64_X87_CLASS
8455 || regclass[i] == X86_64_X87UP_CLASS
8456 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
8457 {
8458 if (!issued_x87_ret_error)
8459 {
8460 error ("x87 register return with x87 disabled");
8461 issued_x87_ret_error = true;
8462 }
8463 return NULL;
8464 }
8465
8466 /* First construct simple cases. Avoid SCmode, since we want to use
8467 single register to pass this type. */
8468 if (n == 1 && mode != SCmode)
8469 switch (regclass[0])
8470 {
8471 case X86_64_INTEGER_CLASS:
8472 case X86_64_INTEGERSI_CLASS:
8473 return gen_rtx_REG (mode, intreg[0]);
8474 case X86_64_SSE_CLASS:
8475 case X86_64_SSESF_CLASS:
8476 case X86_64_SSEDF_CLASS:
8477 if (mode != BLKmode)
8478 return gen_reg_or_parallel (mode, orig_mode,
8479 SSE_REGNO (sse_regno));
8480 break;
8481 case X86_64_X87_CLASS:
8482 case X86_64_COMPLEX_X87_CLASS:
8483 return gen_rtx_REG (mode, FIRST_STACK_REG);
8484 case X86_64_NO_CLASS:
8485 /* Zero sized array, struct or class. */
8486 return NULL;
8487 default:
8488 gcc_unreachable ();
8489 }
8490 if (n == 2
8491 && regclass[0] == X86_64_SSE_CLASS
8492 && regclass[1] == X86_64_SSEUP_CLASS
8493 && mode != BLKmode)
8494 return gen_reg_or_parallel (mode, orig_mode,
8495 SSE_REGNO (sse_regno));
8496 if (n == 4
8497 && regclass[0] == X86_64_SSE_CLASS
8498 && regclass[1] == X86_64_SSEUP_CLASS
8499 && regclass[2] == X86_64_SSEUP_CLASS
8500 && regclass[3] == X86_64_SSEUP_CLASS
8501 && mode != BLKmode)
8502 return gen_reg_or_parallel (mode, orig_mode,
8503 SSE_REGNO (sse_regno));
8504 if (n == 8
8505 && regclass[0] == X86_64_SSE_CLASS
8506 && regclass[1] == X86_64_SSEUP_CLASS
8507 && regclass[2] == X86_64_SSEUP_CLASS
8508 && regclass[3] == X86_64_SSEUP_CLASS
8509 && regclass[4] == X86_64_SSEUP_CLASS
8510 && regclass[5] == X86_64_SSEUP_CLASS
8511 && regclass[6] == X86_64_SSEUP_CLASS
8512 && regclass[7] == X86_64_SSEUP_CLASS
8513 && mode != BLKmode)
8514 return gen_reg_or_parallel (mode, orig_mode,
8515 SSE_REGNO (sse_regno));
8516 if (n == 2
8517 && regclass[0] == X86_64_X87_CLASS
8518 && regclass[1] == X86_64_X87UP_CLASS)
8519 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
8520
8521 if (n == 2
8522 && regclass[0] == X86_64_INTEGER_CLASS
8523 && regclass[1] == X86_64_INTEGER_CLASS
8524 && (mode == CDImode || mode == TImode)
8525 && intreg[0] + 1 == intreg[1])
8526 return gen_rtx_REG (mode, intreg[0]);
8527
8528 /* Otherwise figure out the entries of the PARALLEL. */
8529 for (i = 0; i < n; i++)
8530 {
8531 int pos;
8532
8533 switch (regclass[i])
8534 {
8535 case X86_64_NO_CLASS:
8536 break;
8537 case X86_64_INTEGER_CLASS:
8538 case X86_64_INTEGERSI_CLASS:
8539 /* Merge TImodes on aligned occasions here too. */
8540 if (i * 8 + 8 > bytes)
8541 tmpmode
8542 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
8543 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
8544 tmpmode = SImode;
8545 else
8546 tmpmode = DImode;
8547 /* We've requested 24 bytes we
8548 don't have mode for. Use DImode. */
8549 if (tmpmode == BLKmode)
8550 tmpmode = DImode;
8551 exp [nexps++]
8552 = gen_rtx_EXPR_LIST (VOIDmode,
8553 gen_rtx_REG (tmpmode, *intreg),
8554 GEN_INT (i*8));
8555 intreg++;
8556 break;
8557 case X86_64_SSESF_CLASS:
8558 exp [nexps++]
8559 = gen_rtx_EXPR_LIST (VOIDmode,
8560 gen_rtx_REG (SFmode,
8561 SSE_REGNO (sse_regno)),
8562 GEN_INT (i*8));
8563 sse_regno++;
8564 break;
8565 case X86_64_SSEDF_CLASS:
8566 exp [nexps++]
8567 = gen_rtx_EXPR_LIST (VOIDmode,
8568 gen_rtx_REG (DFmode,
8569 SSE_REGNO (sse_regno)),
8570 GEN_INT (i*8));
8571 sse_regno++;
8572 break;
8573 case X86_64_SSE_CLASS:
8574 pos = i;
8575 switch (n)
8576 {
8577 case 1:
8578 tmpmode = DImode;
8579 break;
8580 case 2:
8581 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
8582 {
8583 tmpmode = TImode;
8584 i++;
8585 }
8586 else
8587 tmpmode = DImode;
8588 break;
8589 case 4:
8590 gcc_assert (i == 0
8591 && regclass[1] == X86_64_SSEUP_CLASS
8592 && regclass[2] == X86_64_SSEUP_CLASS
8593 && regclass[3] == X86_64_SSEUP_CLASS);
8594 tmpmode = OImode;
8595 i += 3;
8596 break;
8597 case 8:
8598 gcc_assert (i == 0
8599 && regclass[1] == X86_64_SSEUP_CLASS
8600 && regclass[2] == X86_64_SSEUP_CLASS
8601 && regclass[3] == X86_64_SSEUP_CLASS
8602 && regclass[4] == X86_64_SSEUP_CLASS
8603 && regclass[5] == X86_64_SSEUP_CLASS
8604 && regclass[6] == X86_64_SSEUP_CLASS
8605 && regclass[7] == X86_64_SSEUP_CLASS);
8606 tmpmode = XImode;
8607 i += 7;
8608 break;
8609 default:
8610 gcc_unreachable ();
8611 }
8612 exp [nexps++]
8613 = gen_rtx_EXPR_LIST (VOIDmode,
8614 gen_rtx_REG (tmpmode,
8615 SSE_REGNO (sse_regno)),
8616 GEN_INT (pos*8));
8617 sse_regno++;
8618 break;
8619 default:
8620 gcc_unreachable ();
8621 }
8622 }
8623
8624 /* Empty aligned struct, union or class. */
8625 if (nexps == 0)
8626 return NULL;
8627
8628 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
8629 for (i = 0; i < nexps; i++)
8630 XVECEXP (ret, 0, i) = exp [i];
8631 return ret;
8632 }
8633
8634 /* Update the data in CUM to advance over an argument of mode MODE
8635 and data type TYPE. (TYPE is null for libcalls where that information
8636 may not be available.)
8637
8638 Return a number of integer regsiters advanced over. */
8639
8640 static int
8641 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
8642 const_tree type, HOST_WIDE_INT bytes,
8643 HOST_WIDE_INT words)
8644 {
8645 int res = 0;
8646 bool error_p = NULL;
8647
8648 if (TARGET_IAMCU)
8649 {
8650 /* Intel MCU psABI passes scalars and aggregates no larger than 8
8651 bytes in registers. */
8652 if (!VECTOR_MODE_P (mode) && bytes <= 8)
8653 goto pass_in_reg;
8654 return res;
8655 }
8656
8657 switch (mode)
8658 {
8659 default:
8660 break;
8661
8662 case BLKmode:
8663 if (bytes < 0)
8664 break;
8665 /* FALLTHRU */
8666
8667 case DImode:
8668 case SImode:
8669 case HImode:
8670 case QImode:
8671 pass_in_reg:
8672 cum->words += words;
8673 cum->nregs -= words;
8674 cum->regno += words;
8675 if (cum->nregs >= 0)
8676 res = words;
8677 if (cum->nregs <= 0)
8678 {
8679 cum->nregs = 0;
8680 cfun->machine->arg_reg_available = false;
8681 cum->regno = 0;
8682 }
8683 break;
8684
8685 case OImode:
8686 /* OImode shouldn't be used directly. */
8687 gcc_unreachable ();
8688
8689 case DFmode:
8690 if (cum->float_in_sse == -1)
8691 error_p = 1;
8692 if (cum->float_in_sse < 2)
8693 break;
8694 case SFmode:
8695 if (cum->float_in_sse == -1)
8696 error_p = 1;
8697 if (cum->float_in_sse < 1)
8698 break;
8699 /* FALLTHRU */
8700
8701 case V8SFmode:
8702 case V8SImode:
8703 case V64QImode:
8704 case V32HImode:
8705 case V16SImode:
8706 case V8DImode:
8707 case V16SFmode:
8708 case V8DFmode:
8709 case V32QImode:
8710 case V16HImode:
8711 case V4DFmode:
8712 case V4DImode:
8713 case TImode:
8714 case V16QImode:
8715 case V8HImode:
8716 case V4SImode:
8717 case V2DImode:
8718 case V4SFmode:
8719 case V2DFmode:
8720 if (!type || !AGGREGATE_TYPE_P (type))
8721 {
8722 cum->sse_words += words;
8723 cum->sse_nregs -= 1;
8724 cum->sse_regno += 1;
8725 if (cum->sse_nregs <= 0)
8726 {
8727 cum->sse_nregs = 0;
8728 cum->sse_regno = 0;
8729 }
8730 }
8731 break;
8732
8733 case V8QImode:
8734 case V4HImode:
8735 case V2SImode:
8736 case V2SFmode:
8737 case V1TImode:
8738 case V1DImode:
8739 if (!type || !AGGREGATE_TYPE_P (type))
8740 {
8741 cum->mmx_words += words;
8742 cum->mmx_nregs -= 1;
8743 cum->mmx_regno += 1;
8744 if (cum->mmx_nregs <= 0)
8745 {
8746 cum->mmx_nregs = 0;
8747 cum->mmx_regno = 0;
8748 }
8749 }
8750 break;
8751 }
8752 if (error_p)
8753 {
8754 cum->float_in_sse = 0;
8755 error ("calling %qD with SSE calling convention without "
8756 "SSE/SSE2 enabled", cum->decl);
8757 sorry ("this is a GCC bug that can be worked around by adding "
8758 "attribute used to function called");
8759 }
8760
8761 return res;
8762 }
8763
8764 static int
8765 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
8766 const_tree type, HOST_WIDE_INT words, bool named)
8767 {
8768 int int_nregs, sse_nregs;
8769
8770 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
8771 if (!named && (VALID_AVX512F_REG_MODE (mode)
8772 || VALID_AVX256_REG_MODE (mode)))
8773 return 0;
8774
8775 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
8776 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
8777 {
8778 cum->nregs -= int_nregs;
8779 cum->sse_nregs -= sse_nregs;
8780 cum->regno += int_nregs;
8781 cum->sse_regno += sse_nregs;
8782 return int_nregs;
8783 }
8784 else
8785 {
8786 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
8787 cum->words = ROUND_UP (cum->words, align);
8788 cum->words += words;
8789 return 0;
8790 }
8791 }
8792
8793 static int
8794 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
8795 HOST_WIDE_INT words)
8796 {
8797 /* Otherwise, this should be passed indirect. */
8798 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
8799
8800 cum->words += words;
8801 if (cum->nregs > 0)
8802 {
8803 cum->nregs -= 1;
8804 cum->regno += 1;
8805 return 1;
8806 }
8807 return 0;
8808 }
8809
8810 /* Update the data in CUM to advance over an argument of mode MODE and
8811 data type TYPE. (TYPE is null for libcalls where that information
8812 may not be available.) */
8813
8814 static void
8815 ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
8816 const_tree type, bool named)
8817 {
8818 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8819 HOST_WIDE_INT bytes, words;
8820 int nregs;
8821
8822 if (mode == BLKmode)
8823 bytes = int_size_in_bytes (type);
8824 else
8825 bytes = GET_MODE_SIZE (mode);
8826 words = CEIL (bytes, UNITS_PER_WORD);
8827
8828 if (type)
8829 mode = type_natural_mode (type, NULL, false);
8830
8831 if ((type && POINTER_BOUNDS_TYPE_P (type))
8832 || POINTER_BOUNDS_MODE_P (mode))
8833 {
8834 /* If we pass bounds in BT then just update remained bounds count. */
8835 if (cum->bnds_in_bt)
8836 {
8837 cum->bnds_in_bt--;
8838 return;
8839 }
8840
8841 /* Update remained number of bounds to force. */
8842 if (cum->force_bnd_pass)
8843 cum->force_bnd_pass--;
8844
8845 cum->bnd_regno++;
8846
8847 return;
8848 }
8849
8850 /* The first arg not going to Bounds Tables resets this counter. */
8851 cum->bnds_in_bt = 0;
8852 /* For unnamed args we always pass bounds to avoid bounds mess when
8853 passed and received types do not match. If bounds do not follow
8854 unnamed arg, still pretend required number of bounds were passed. */
8855 if (cum->force_bnd_pass)
8856 {
8857 cum->bnd_regno += cum->force_bnd_pass;
8858 cum->force_bnd_pass = 0;
8859 }
8860
8861 if (TARGET_64BIT)
8862 {
8863 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
8864
8865 if (call_abi == MS_ABI)
8866 nregs = function_arg_advance_ms_64 (cum, bytes, words);
8867 else
8868 nregs = function_arg_advance_64 (cum, mode, type, words, named);
8869 }
8870 else
8871 nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
8872
8873 /* For stdarg we expect bounds to be passed for each value passed
8874 in register. */
8875 if (cum->stdarg)
8876 cum->force_bnd_pass = nregs;
8877 /* For pointers passed in memory we expect bounds passed in Bounds
8878 Table. */
8879 if (!nregs)
8880 cum->bnds_in_bt = chkp_type_bounds_count (type);
8881 }
8882
8883 /* Define where to put the arguments to a function.
8884 Value is zero to push the argument on the stack,
8885 or a hard register in which to store the argument.
8886
8887 MODE is the argument's machine mode.
8888 TYPE is the data type of the argument (as a tree).
8889 This is null for libcalls where that information may
8890 not be available.
8891 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8892 the preceding args and about the function being called.
8893 NAMED is nonzero if this argument is a named parameter
8894 (otherwise it is an extra parameter matching an ellipsis). */
8895
8896 static rtx
8897 function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
8898 machine_mode orig_mode, const_tree type,
8899 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
8900 {
8901 bool error_p = false;
8902 /* Avoid the AL settings for the Unix64 ABI. */
8903 if (mode == VOIDmode)
8904 return constm1_rtx;
8905
8906 if (TARGET_IAMCU)
8907 {
8908 /* Intel MCU psABI passes scalars and aggregates no larger than 8
8909 bytes in registers. */
8910 if (!VECTOR_MODE_P (mode) && bytes <= 8)
8911 goto pass_in_reg;
8912 return NULL_RTX;
8913 }
8914
8915 switch (mode)
8916 {
8917 default:
8918 break;
8919
8920 case BLKmode:
8921 if (bytes < 0)
8922 break;
8923 /* FALLTHRU */
8924 case DImode:
8925 case SImode:
8926 case HImode:
8927 case QImode:
8928 pass_in_reg:
8929 if (words <= cum->nregs)
8930 {
8931 int regno = cum->regno;
8932
8933 /* Fastcall allocates the first two DWORD (SImode) or
8934 smaller arguments to ECX and EDX if it isn't an
8935 aggregate type . */
8936 if (cum->fastcall)
8937 {
8938 if (mode == BLKmode
8939 || mode == DImode
8940 || (type && AGGREGATE_TYPE_P (type)))
8941 break;
8942
8943 /* ECX not EAX is the first allocated register. */
8944 if (regno == AX_REG)
8945 regno = CX_REG;
8946 }
8947 return gen_rtx_REG (mode, regno);
8948 }
8949 break;
8950
8951 case DFmode:
8952 if (cum->float_in_sse == -1)
8953 error_p = 1;
8954 if (cum->float_in_sse < 2)
8955 break;
8956 case SFmode:
8957 if (cum->float_in_sse == -1)
8958 error_p = 1;
8959 if (cum->float_in_sse < 1)
8960 break;
8961 /* FALLTHRU */
8962 case TImode:
8963 /* In 32bit, we pass TImode in xmm registers. */
8964 case V16QImode:
8965 case V8HImode:
8966 case V4SImode:
8967 case V2DImode:
8968 case V4SFmode:
8969 case V2DFmode:
8970 if (!type || !AGGREGATE_TYPE_P (type))
8971 {
8972 if (cum->sse_nregs)
8973 return gen_reg_or_parallel (mode, orig_mode,
8974 cum->sse_regno + FIRST_SSE_REG);
8975 }
8976 break;
8977
8978 case OImode:
8979 case XImode:
8980 /* OImode and XImode shouldn't be used directly. */
8981 gcc_unreachable ();
8982
8983 case V64QImode:
8984 case V32HImode:
8985 case V16SImode:
8986 case V8DImode:
8987 case V16SFmode:
8988 case V8DFmode:
8989 case V8SFmode:
8990 case V8SImode:
8991 case V32QImode:
8992 case V16HImode:
8993 case V4DFmode:
8994 case V4DImode:
8995 if (!type || !AGGREGATE_TYPE_P (type))
8996 {
8997 if (cum->sse_nregs)
8998 return gen_reg_or_parallel (mode, orig_mode,
8999 cum->sse_regno + FIRST_SSE_REG);
9000 }
9001 break;
9002
9003 case V8QImode:
9004 case V4HImode:
9005 case V2SImode:
9006 case V2SFmode:
9007 case V1TImode:
9008 case V1DImode:
9009 if (!type || !AGGREGATE_TYPE_P (type))
9010 {
9011 if (cum->mmx_nregs)
9012 return gen_reg_or_parallel (mode, orig_mode,
9013 cum->mmx_regno + FIRST_MMX_REG);
9014 }
9015 break;
9016 }
9017 if (error_p)
9018 {
9019 cum->float_in_sse = 0;
9020 error ("calling %qD with SSE calling convention without "
9021 "SSE/SSE2 enabled", cum->decl);
9022 sorry ("this is a GCC bug that can be worked around by adding "
9023 "attribute used to function called");
9024 }
9025
9026 return NULL_RTX;
9027 }
9028
9029 static rtx
9030 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
9031 machine_mode orig_mode, const_tree type, bool named)
9032 {
9033 /* Handle a hidden AL argument containing number of registers
9034 for varargs x86-64 functions. */
9035 if (mode == VOIDmode)
9036 return GEN_INT (cum->maybe_vaarg
9037 ? (cum->sse_nregs < 0
9038 ? X86_64_SSE_REGPARM_MAX
9039 : cum->sse_regno)
9040 : -1);
9041
9042 switch (mode)
9043 {
9044 default:
9045 break;
9046
9047 case V8SFmode:
9048 case V8SImode:
9049 case V32QImode:
9050 case V16HImode:
9051 case V4DFmode:
9052 case V4DImode:
9053 case V16SFmode:
9054 case V16SImode:
9055 case V64QImode:
9056 case V32HImode:
9057 case V8DFmode:
9058 case V8DImode:
9059 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
9060 if (!named)
9061 return NULL;
9062 break;
9063 }
9064
9065 return construct_container (mode, orig_mode, type, 0, cum->nregs,
9066 cum->sse_nregs,
9067 &x86_64_int_parameter_registers [cum->regno],
9068 cum->sse_regno);
9069 }
9070
9071 static rtx
9072 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
9073 machine_mode orig_mode, bool named,
9074 HOST_WIDE_INT bytes)
9075 {
9076 unsigned int regno;
9077
9078 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
9079 We use value of -2 to specify that current function call is MSABI. */
9080 if (mode == VOIDmode)
9081 return GEN_INT (-2);
9082
9083 /* If we've run out of registers, it goes on the stack. */
9084 if (cum->nregs == 0)
9085 return NULL_RTX;
9086
9087 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
9088
9089 /* Only floating point modes are passed in anything but integer regs. */
9090 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
9091 {
9092 if (named)
9093 regno = cum->regno + FIRST_SSE_REG;
9094 else
9095 {
9096 rtx t1, t2;
9097
9098 /* Unnamed floating parameters are passed in both the
9099 SSE and integer registers. */
9100 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
9101 t2 = gen_rtx_REG (mode, regno);
9102 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
9103 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
9104 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
9105 }
9106 }
9107 /* Handle aggregated types passed in register. */
9108 if (orig_mode == BLKmode)
9109 {
9110 if (bytes > 0 && bytes <= 8)
9111 mode = (bytes > 4 ? DImode : SImode);
9112 if (mode == BLKmode)
9113 mode = DImode;
9114 }
9115
9116 return gen_reg_or_parallel (mode, orig_mode, regno);
9117 }
9118
9119 /* Return where to put the arguments to a function.
9120 Return zero to push the argument on the stack, or a hard register in which to store the argument.
9121
9122 MODE is the argument's machine mode. TYPE is the data type of the
9123 argument. It is null for libcalls where that information may not be
9124 available. CUM gives information about the preceding args and about
9125 the function being called. NAMED is nonzero if this argument is a
9126 named parameter (otherwise it is an extra parameter matching an
9127 ellipsis). */
9128
9129 static rtx
9130 ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
9131 const_tree type, bool named)
9132 {
9133 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9134 machine_mode mode = omode;
9135 HOST_WIDE_INT bytes, words;
9136 rtx arg;
9137
9138 /* All pointer bounds arguments are handled separately here. */
9139 if ((type && POINTER_BOUNDS_TYPE_P (type))
9140 || POINTER_BOUNDS_MODE_P (mode))
9141 {
9142 /* Return NULL if bounds are forced to go in Bounds Table. */
9143 if (cum->bnds_in_bt)
9144 arg = NULL;
9145 /* Return the next available bound reg if any. */
9146 else if (cum->bnd_regno <= LAST_BND_REG)
9147 arg = gen_rtx_REG (BNDmode, cum->bnd_regno);
9148 /* Return the next special slot number otherwise. */
9149 else
9150 arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1);
9151
9152 return arg;
9153 }
9154
9155 if (mode == BLKmode)
9156 bytes = int_size_in_bytes (type);
9157 else
9158 bytes = GET_MODE_SIZE (mode);
9159 words = CEIL (bytes, UNITS_PER_WORD);
9160
9161 /* To simplify the code below, represent vector types with a vector mode
9162 even if MMX/SSE are not active. */
9163 if (type && TREE_CODE (type) == VECTOR_TYPE)
9164 mode = type_natural_mode (type, cum, false);
9165
9166 if (TARGET_64BIT)
9167 {
9168 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
9169
9170 if (call_abi == MS_ABI)
9171 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
9172 else
9173 arg = function_arg_64 (cum, mode, omode, type, named);
9174 }
9175 else
9176 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
9177
9178 return arg;
9179 }
9180
9181 /* A C expression that indicates when an argument must be passed by
9182 reference. If nonzero for an argument, a copy of that argument is
9183 made in memory and a pointer to the argument is passed instead of
9184 the argument itself. The pointer is passed in whatever way is
9185 appropriate for passing a pointer to that type. */
9186
9187 static bool
9188 ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
9189 const_tree type, bool)
9190 {
9191 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9192
9193 /* Bounds are never passed by reference. */
9194 if ((type && POINTER_BOUNDS_TYPE_P (type))
9195 || POINTER_BOUNDS_MODE_P (mode))
9196 return false;
9197
9198 if (TARGET_64BIT)
9199 {
9200 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
9201
9202 /* See Windows x64 Software Convention. */
9203 if (call_abi == MS_ABI)
9204 {
9205 HOST_WIDE_INT msize = GET_MODE_SIZE (mode);
9206
9207 if (type)
9208 {
9209 /* Arrays are passed by reference. */
9210 if (TREE_CODE (type) == ARRAY_TYPE)
9211 return true;
9212
9213 if (RECORD_OR_UNION_TYPE_P (type))
9214 {
9215 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
9216 are passed by reference. */
9217 msize = int_size_in_bytes (type);
9218 }
9219 }
9220
9221 /* __m128 is passed by reference. */
9222 return msize != 1 && msize != 2 && msize != 4 && msize != 8;
9223 }
9224 else if (type && int_size_in_bytes (type) == -1)
9225 return true;
9226 }
9227
9228 return false;
9229 }
9230
9231 /* Return true when TYPE should be 128bit aligned for 32bit argument
9232 passing ABI. XXX: This function is obsolete and is only used for
9233 checking psABI compatibility with previous versions of GCC. */
9234
9235 static bool
9236 ix86_compat_aligned_value_p (const_tree type)
9237 {
9238 machine_mode mode = TYPE_MODE (type);
9239 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
9240 || mode == TDmode
9241 || mode == TFmode
9242 || mode == TCmode)
9243 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
9244 return true;
9245 if (TYPE_ALIGN (type) < 128)
9246 return false;
9247
9248 if (AGGREGATE_TYPE_P (type))
9249 {
9250 /* Walk the aggregates recursively. */
9251 switch (TREE_CODE (type))
9252 {
9253 case RECORD_TYPE:
9254 case UNION_TYPE:
9255 case QUAL_UNION_TYPE:
9256 {
9257 tree field;
9258
9259 /* Walk all the structure fields. */
9260 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
9261 {
9262 if (TREE_CODE (field) == FIELD_DECL
9263 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
9264 return true;
9265 }
9266 break;
9267 }
9268
9269 case ARRAY_TYPE:
9270 /* Just for use if some languages passes arrays by value. */
9271 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
9272 return true;
9273 break;
9274
9275 default:
9276 gcc_unreachable ();
9277 }
9278 }
9279 return false;
9280 }
9281
9282 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
9283 XXX: This function is obsolete and is only used for checking psABI
9284 compatibility with previous versions of GCC. */
9285
9286 static unsigned int
9287 ix86_compat_function_arg_boundary (machine_mode mode,
9288 const_tree type, unsigned int align)
9289 {
9290 /* In 32bit, only _Decimal128 and __float128 are aligned to their
9291 natural boundaries. */
9292 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
9293 {
9294 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
9295 make an exception for SSE modes since these require 128bit
9296 alignment.
9297
9298 The handling here differs from field_alignment. ICC aligns MMX
9299 arguments to 4 byte boundaries, while structure fields are aligned
9300 to 8 byte boundaries. */
9301 if (!type)
9302 {
9303 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
9304 align = PARM_BOUNDARY;
9305 }
9306 else
9307 {
9308 if (!ix86_compat_aligned_value_p (type))
9309 align = PARM_BOUNDARY;
9310 }
9311 }
9312 if (align > BIGGEST_ALIGNMENT)
9313 align = BIGGEST_ALIGNMENT;
9314 return align;
9315 }
9316
9317 /* Return true when TYPE should be 128bit aligned for 32bit argument
9318 passing ABI. */
9319
9320 static bool
9321 ix86_contains_aligned_value_p (const_tree type)
9322 {
9323 machine_mode mode = TYPE_MODE (type);
9324
9325 if (mode == XFmode || mode == XCmode)
9326 return false;
9327
9328 if (TYPE_ALIGN (type) < 128)
9329 return false;
9330
9331 if (AGGREGATE_TYPE_P (type))
9332 {
9333 /* Walk the aggregates recursively. */
9334 switch (TREE_CODE (type))
9335 {
9336 case RECORD_TYPE:
9337 case UNION_TYPE:
9338 case QUAL_UNION_TYPE:
9339 {
9340 tree field;
9341
9342 /* Walk all the structure fields. */
9343 for (field = TYPE_FIELDS (type);
9344 field;
9345 field = DECL_CHAIN (field))
9346 {
9347 if (TREE_CODE (field) == FIELD_DECL
9348 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
9349 return true;
9350 }
9351 break;
9352 }
9353
9354 case ARRAY_TYPE:
9355 /* Just for use if some languages passes arrays by value. */
9356 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
9357 return true;
9358 break;
9359
9360 default:
9361 gcc_unreachable ();
9362 }
9363 }
9364 else
9365 return TYPE_ALIGN (type) >= 128;
9366
9367 return false;
9368 }
9369
9370 /* Gives the alignment boundary, in bits, of an argument with the
9371 specified mode and type. */
9372
9373 static unsigned int
9374 ix86_function_arg_boundary (machine_mode mode, const_tree type)
9375 {
9376 unsigned int align;
9377 if (type)
9378 {
9379 /* Since the main variant type is used for call, we convert it to
9380 the main variant type. */
9381 type = TYPE_MAIN_VARIANT (type);
9382 align = TYPE_ALIGN (type);
9383 }
9384 else
9385 align = GET_MODE_ALIGNMENT (mode);
9386 if (align < PARM_BOUNDARY)
9387 align = PARM_BOUNDARY;
9388 else
9389 {
9390 static bool warned;
9391 unsigned int saved_align = align;
9392
9393 if (!TARGET_64BIT)
9394 {
9395 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
9396 if (!type)
9397 {
9398 if (mode == XFmode || mode == XCmode)
9399 align = PARM_BOUNDARY;
9400 }
9401 else if (!ix86_contains_aligned_value_p (type))
9402 align = PARM_BOUNDARY;
9403
9404 if (align < 128)
9405 align = PARM_BOUNDARY;
9406 }
9407
9408 if (warn_psabi
9409 && !warned
9410 && align != ix86_compat_function_arg_boundary (mode, type,
9411 saved_align))
9412 {
9413 warned = true;
9414 inform (input_location,
9415 "The ABI for passing parameters with %d-byte"
9416 " alignment has changed in GCC 4.6",
9417 align / BITS_PER_UNIT);
9418 }
9419 }
9420
9421 return align;
9422 }
9423
9424 /* Return true if N is a possible register number of function value. */
9425
9426 static bool
9427 ix86_function_value_regno_p (const unsigned int regno)
9428 {
9429 switch (regno)
9430 {
9431 case AX_REG:
9432 return true;
9433 case DX_REG:
9434 return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
9435 case DI_REG:
9436 case SI_REG:
9437 return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
9438
9439 case BND0_REG:
9440 case BND1_REG:
9441 return chkp_function_instrumented_p (current_function_decl);
9442
9443 /* Complex values are returned in %st(0)/%st(1) pair. */
9444 case ST0_REG:
9445 case ST1_REG:
9446 /* TODO: The function should depend on current function ABI but
9447 builtins.c would need updating then. Therefore we use the
9448 default ABI. */
9449 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
9450 return false;
9451 return TARGET_FLOAT_RETURNS_IN_80387;
9452
9453 /* Complex values are returned in %xmm0/%xmm1 pair. */
9454 case XMM0_REG:
9455 case XMM1_REG:
9456 return TARGET_SSE;
9457
9458 case MM0_REG:
9459 if (TARGET_MACHO || TARGET_64BIT)
9460 return false;
9461 return TARGET_MMX;
9462 }
9463
9464 return false;
9465 }
9466
9467 /* Define how to find the value returned by a function.
9468 VALTYPE is the data type of the value (as a tree).
9469 If the precise function being called is known, FUNC is its FUNCTION_DECL;
9470 otherwise, FUNC is 0. */
9471
9472 static rtx
9473 function_value_32 (machine_mode orig_mode, machine_mode mode,
9474 const_tree fntype, const_tree fn)
9475 {
9476 unsigned int regno;
9477
9478 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
9479 we normally prevent this case when mmx is not available. However
9480 some ABIs may require the result to be returned like DImode. */
9481 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
9482 regno = FIRST_MMX_REG;
9483
9484 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
9485 we prevent this case when sse is not available. However some ABIs
9486 may require the result to be returned like integer TImode. */
9487 else if (mode == TImode
9488 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
9489 regno = FIRST_SSE_REG;
9490
9491 /* 32-byte vector modes in %ymm0. */
9492 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
9493 regno = FIRST_SSE_REG;
9494
9495 /* 64-byte vector modes in %zmm0. */
9496 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
9497 regno = FIRST_SSE_REG;
9498
9499 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
9500 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
9501 regno = FIRST_FLOAT_REG;
9502 else
9503 /* Most things go in %eax. */
9504 regno = AX_REG;
9505
9506 /* Override FP return register with %xmm0 for local functions when
9507 SSE math is enabled or for functions with sseregparm attribute. */
9508 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
9509 {
9510 int sse_level = ix86_function_sseregparm (fntype, fn, false);
9511 if (sse_level == -1)
9512 {
9513 error ("calling %qD with SSE caling convention without "
9514 "SSE/SSE2 enabled", fn);
9515 sorry ("this is a GCC bug that can be worked around by adding "
9516 "attribute used to function called");
9517 }
9518 else if ((sse_level >= 1 && mode == SFmode)
9519 || (sse_level == 2 && mode == DFmode))
9520 regno = FIRST_SSE_REG;
9521 }
9522
9523 /* OImode shouldn't be used directly. */
9524 gcc_assert (mode != OImode);
9525
9526 return gen_rtx_REG (orig_mode, regno);
9527 }
9528
9529 static rtx
9530 function_value_64 (machine_mode orig_mode, machine_mode mode,
9531 const_tree valtype)
9532 {
9533 rtx ret;
9534
9535 /* Handle libcalls, which don't provide a type node. */
9536 if (valtype == NULL)
9537 {
9538 unsigned int regno;
9539
9540 switch (mode)
9541 {
9542 case SFmode:
9543 case SCmode:
9544 case DFmode:
9545 case DCmode:
9546 case TFmode:
9547 case SDmode:
9548 case DDmode:
9549 case TDmode:
9550 regno = FIRST_SSE_REG;
9551 break;
9552 case XFmode:
9553 case XCmode:
9554 regno = FIRST_FLOAT_REG;
9555 break;
9556 case TCmode:
9557 return NULL;
9558 default:
9559 regno = AX_REG;
9560 }
9561
9562 return gen_rtx_REG (mode, regno);
9563 }
9564 else if (POINTER_TYPE_P (valtype))
9565 {
9566 /* Pointers are always returned in word_mode. */
9567 mode = word_mode;
9568 }
9569
9570 ret = construct_container (mode, orig_mode, valtype, 1,
9571 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
9572 x86_64_int_return_registers, 0);
9573
9574 /* For zero sized structures, construct_container returns NULL, but we
9575 need to keep rest of compiler happy by returning meaningful value. */
9576 if (!ret)
9577 ret = gen_rtx_REG (orig_mode, AX_REG);
9578
9579 return ret;
9580 }
9581
9582 static rtx
9583 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
9584 const_tree valtype)
9585 {
9586 unsigned int regno = AX_REG;
9587
9588 if (TARGET_SSE)
9589 {
9590 switch (GET_MODE_SIZE (mode))
9591 {
9592 case 16:
9593 if (valtype != NULL_TREE
9594 && !VECTOR_INTEGER_TYPE_P (valtype)
9595 && !VECTOR_INTEGER_TYPE_P (valtype)
9596 && !INTEGRAL_TYPE_P (valtype)
9597 && !VECTOR_FLOAT_TYPE_P (valtype))
9598 break;
9599 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
9600 && !COMPLEX_MODE_P (mode))
9601 regno = FIRST_SSE_REG;
9602 break;
9603 case 8:
9604 case 4:
9605 if (mode == SFmode || mode == DFmode)
9606 regno = FIRST_SSE_REG;
9607 break;
9608 default:
9609 break;
9610 }
9611 }
9612 return gen_rtx_REG (orig_mode, regno);
9613 }
9614
9615 static rtx
9616 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
9617 machine_mode orig_mode, machine_mode mode)
9618 {
9619 const_tree fn, fntype;
9620
9621 fn = NULL_TREE;
9622 if (fntype_or_decl && DECL_P (fntype_or_decl))
9623 fn = fntype_or_decl;
9624 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
9625
9626 if ((valtype && POINTER_BOUNDS_TYPE_P (valtype))
9627 || POINTER_BOUNDS_MODE_P (mode))
9628 return gen_rtx_REG (BNDmode, FIRST_BND_REG);
9629 else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
9630 return function_value_ms_64 (orig_mode, mode, valtype);
9631 else if (TARGET_64BIT)
9632 return function_value_64 (orig_mode, mode, valtype);
9633 else
9634 return function_value_32 (orig_mode, mode, fntype, fn);
9635 }
9636
9637 static rtx
9638 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
9639 {
9640 machine_mode mode, orig_mode;
9641
9642 orig_mode = TYPE_MODE (valtype);
9643 mode = type_natural_mode (valtype, NULL, true);
9644 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
9645 }
9646
9647 /* Return an RTX representing a place where a function returns
9648 or recieves pointer bounds or NULL if no bounds are returned.
9649
9650 VALTYPE is a data type of a value returned by the function.
9651
9652 FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL
9653 or FUNCTION_TYPE of the function.
9654
9655 If OUTGOING is false, return a place in which the caller will
9656 see the return value. Otherwise, return a place where a
9657 function returns a value. */
9658
9659 static rtx
9660 ix86_function_value_bounds (const_tree valtype,
9661 const_tree fntype_or_decl ATTRIBUTE_UNUSED,
9662 bool outgoing ATTRIBUTE_UNUSED)
9663 {
9664 rtx res = NULL_RTX;
9665
9666 if (BOUNDED_TYPE_P (valtype))
9667 res = gen_rtx_REG (BNDmode, FIRST_BND_REG);
9668 else if (chkp_type_has_pointer (valtype))
9669 {
9670 bitmap slots;
9671 rtx bounds[2];
9672 bitmap_iterator bi;
9673 unsigned i, bnd_no = 0;
9674
9675 bitmap_obstack_initialize (NULL);
9676 slots = BITMAP_ALLOC (NULL);
9677 chkp_find_bound_slots (valtype, slots);
9678
9679 EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi)
9680 {
9681 rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no);
9682 rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT);
9683 gcc_assert (bnd_no < 2);
9684 bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs);
9685 }
9686
9687 res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds));
9688
9689 BITMAP_FREE (slots);
9690 bitmap_obstack_release (NULL);
9691 }
9692 else
9693 res = NULL_RTX;
9694
9695 return res;
9696 }
9697
9698 /* Pointer function arguments and return values are promoted to
9699 word_mode. */
9700
9701 static machine_mode
9702 ix86_promote_function_mode (const_tree type, machine_mode mode,
9703 int *punsignedp, const_tree fntype,
9704 int for_return)
9705 {
9706 if (type != NULL_TREE && POINTER_TYPE_P (type))
9707 {
9708 *punsignedp = POINTERS_EXTEND_UNSIGNED;
9709 return word_mode;
9710 }
9711 return default_promote_function_mode (type, mode, punsignedp, fntype,
9712 for_return);
9713 }
9714
9715 /* Return true if a structure, union or array with MODE containing FIELD
9716 should be accessed using BLKmode. */
9717
9718 static bool
9719 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
9720 {
9721 /* Union with XFmode must be in BLKmode. */
9722 return (mode == XFmode
9723 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
9724 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
9725 }
9726
9727 rtx
9728 ix86_libcall_value (machine_mode mode)
9729 {
9730 return ix86_function_value_1 (NULL, NULL, mode, mode);
9731 }
9732
9733 /* Return true iff type is returned in memory. */
9734
9735 static bool
9736 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9737 {
9738 #ifdef SUBTARGET_RETURN_IN_MEMORY
9739 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
9740 #else
9741 const machine_mode mode = type_natural_mode (type, NULL, true);
9742 HOST_WIDE_INT size;
9743
9744 if (POINTER_BOUNDS_TYPE_P (type))
9745 return false;
9746
9747 if (TARGET_64BIT)
9748 {
9749 if (ix86_function_type_abi (fntype) == MS_ABI)
9750 {
9751 size = int_size_in_bytes (type);
9752
9753 /* __m128 is returned in xmm0. */
9754 if ((!type || VECTOR_INTEGER_TYPE_P (type)
9755 || INTEGRAL_TYPE_P (type)
9756 || VECTOR_FLOAT_TYPE_P (type))
9757 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
9758 && !COMPLEX_MODE_P (mode)
9759 && (GET_MODE_SIZE (mode) == 16 || size == 16))
9760 return false;
9761
9762 /* Otherwise, the size must be exactly in [1248]. */
9763 return size != 1 && size != 2 && size != 4 && size != 8;
9764 }
9765 else
9766 {
9767 int needed_intregs, needed_sseregs;
9768
9769 return examine_argument (mode, type, 1,
9770 &needed_intregs, &needed_sseregs);
9771 }
9772 }
9773 else
9774 {
9775 size = int_size_in_bytes (type);
9776
9777 /* Intel MCU psABI returns scalars and aggregates no larger than 8
9778 bytes in registers. */
9779 if (TARGET_IAMCU)
9780 return VECTOR_MODE_P (mode) || size < 0 || size > 8;
9781
9782 if (mode == BLKmode)
9783 return true;
9784
9785 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
9786 return false;
9787
9788 if (VECTOR_MODE_P (mode) || mode == TImode)
9789 {
9790 /* User-created vectors small enough to fit in EAX. */
9791 if (size < 8)
9792 return false;
9793
9794 /* Unless ABI prescibes otherwise,
9795 MMX/3dNow values are returned in MM0 if available. */
9796
9797 if (size == 8)
9798 return TARGET_VECT8_RETURNS || !TARGET_MMX;
9799
9800 /* SSE values are returned in XMM0 if available. */
9801 if (size == 16)
9802 return !TARGET_SSE;
9803
9804 /* AVX values are returned in YMM0 if available. */
9805 if (size == 32)
9806 return !TARGET_AVX;
9807
9808 /* AVX512F values are returned in ZMM0 if available. */
9809 if (size == 64)
9810 return !TARGET_AVX512F;
9811 }
9812
9813 if (mode == XFmode)
9814 return false;
9815
9816 if (size > 12)
9817 return true;
9818
9819 /* OImode shouldn't be used directly. */
9820 gcc_assert (mode != OImode);
9821
9822 return false;
9823 }
9824 #endif
9825 }
9826
9827 \f
9828 /* Create the va_list data type. */
9829
9830 static tree
9831 ix86_build_builtin_va_list_64 (void)
9832 {
9833 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
9834
9835 record = lang_hooks.types.make_type (RECORD_TYPE);
9836 type_decl = build_decl (BUILTINS_LOCATION,
9837 TYPE_DECL, get_identifier ("__va_list_tag"), record);
9838
9839 f_gpr = build_decl (BUILTINS_LOCATION,
9840 FIELD_DECL, get_identifier ("gp_offset"),
9841 unsigned_type_node);
9842 f_fpr = build_decl (BUILTINS_LOCATION,
9843 FIELD_DECL, get_identifier ("fp_offset"),
9844 unsigned_type_node);
9845 f_ovf = build_decl (BUILTINS_LOCATION,
9846 FIELD_DECL, get_identifier ("overflow_arg_area"),
9847 ptr_type_node);
9848 f_sav = build_decl (BUILTINS_LOCATION,
9849 FIELD_DECL, get_identifier ("reg_save_area"),
9850 ptr_type_node);
9851
9852 va_list_gpr_counter_field = f_gpr;
9853 va_list_fpr_counter_field = f_fpr;
9854
9855 DECL_FIELD_CONTEXT (f_gpr) = record;
9856 DECL_FIELD_CONTEXT (f_fpr) = record;
9857 DECL_FIELD_CONTEXT (f_ovf) = record;
9858 DECL_FIELD_CONTEXT (f_sav) = record;
9859
9860 TYPE_STUB_DECL (record) = type_decl;
9861 TYPE_NAME (record) = type_decl;
9862 TYPE_FIELDS (record) = f_gpr;
9863 DECL_CHAIN (f_gpr) = f_fpr;
9864 DECL_CHAIN (f_fpr) = f_ovf;
9865 DECL_CHAIN (f_ovf) = f_sav;
9866
9867 layout_type (record);
9868
9869 /* The correct type is an array type of one element. */
9870 return build_array_type (record, build_index_type (size_zero_node));
9871 }
9872
9873 /* Setup the builtin va_list data type and for 64-bit the additional
9874 calling convention specific va_list data types. */
9875
9876 static tree
9877 ix86_build_builtin_va_list (void)
9878 {
9879 if (TARGET_64BIT)
9880 {
9881 /* Initialize ABI specific va_list builtin types. */
9882 tree sysv_va_list, ms_va_list;
9883
9884 sysv_va_list = ix86_build_builtin_va_list_64 ();
9885 sysv_va_list_type_node = build_variant_type_copy (sysv_va_list);
9886
9887 /* For MS_ABI we use plain pointer to argument area. */
9888 ms_va_list = build_pointer_type (char_type_node);
9889 ms_va_list_type_node = build_variant_type_copy (ms_va_list);
9890
9891 return (ix86_abi == MS_ABI) ? ms_va_list : sysv_va_list;
9892 }
9893 else
9894 {
9895 /* For i386 we use plain pointer to argument area. */
9896 return build_pointer_type (char_type_node);
9897 }
9898 }
9899
9900 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
9901
9902 static void
9903 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
9904 {
9905 rtx save_area, mem;
9906 alias_set_type set;
9907 int i, max;
9908
9909 /* GPR size of varargs save area. */
9910 if (cfun->va_list_gpr_size)
9911 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
9912 else
9913 ix86_varargs_gpr_size = 0;
9914
9915 /* FPR size of varargs save area. We don't need it if we don't pass
9916 anything in SSE registers. */
9917 if (TARGET_SSE && cfun->va_list_fpr_size)
9918 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
9919 else
9920 ix86_varargs_fpr_size = 0;
9921
9922 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
9923 return;
9924
9925 save_area = frame_pointer_rtx;
9926 set = get_varargs_alias_set ();
9927
9928 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
9929 if (max > X86_64_REGPARM_MAX)
9930 max = X86_64_REGPARM_MAX;
9931
9932 for (i = cum->regno; i < max; i++)
9933 {
9934 mem = gen_rtx_MEM (word_mode,
9935 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
9936 MEM_NOTRAP_P (mem) = 1;
9937 set_mem_alias_set (mem, set);
9938 emit_move_insn (mem,
9939 gen_rtx_REG (word_mode,
9940 x86_64_int_parameter_registers[i]));
9941 }
9942
9943 if (ix86_varargs_fpr_size)
9944 {
9945 machine_mode smode;
9946 rtx_code_label *label;
9947 rtx test;
9948
9949 /* Now emit code to save SSE registers. The AX parameter contains number
9950 of SSE parameter registers used to call this function, though all we
9951 actually check here is the zero/non-zero status. */
9952
9953 label = gen_label_rtx ();
9954 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
9955 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
9956 label));
9957
9958 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
9959 we used movdqa (i.e. TImode) instead? Perhaps even better would
9960 be if we could determine the real mode of the data, via a hook
9961 into pass_stdarg. Ignore all that for now. */
9962 smode = V4SFmode;
9963 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
9964 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
9965
9966 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
9967 if (max > X86_64_SSE_REGPARM_MAX)
9968 max = X86_64_SSE_REGPARM_MAX;
9969
9970 for (i = cum->sse_regno; i < max; ++i)
9971 {
9972 mem = plus_constant (Pmode, save_area,
9973 i * 16 + ix86_varargs_gpr_size);
9974 mem = gen_rtx_MEM (smode, mem);
9975 MEM_NOTRAP_P (mem) = 1;
9976 set_mem_alias_set (mem, set);
9977 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
9978
9979 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
9980 }
9981
9982 emit_label (label);
9983 }
9984 }
9985
9986 static void
9987 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
9988 {
9989 alias_set_type set = get_varargs_alias_set ();
9990 int i;
9991
9992 /* Reset to zero, as there might be a sysv vaarg used
9993 before. */
9994 ix86_varargs_gpr_size = 0;
9995 ix86_varargs_fpr_size = 0;
9996
9997 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
9998 {
9999 rtx reg, mem;
10000
10001 mem = gen_rtx_MEM (Pmode,
10002 plus_constant (Pmode, virtual_incoming_args_rtx,
10003 i * UNITS_PER_WORD));
10004 MEM_NOTRAP_P (mem) = 1;
10005 set_mem_alias_set (mem, set);
10006
10007 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
10008 emit_move_insn (mem, reg);
10009 }
10010 }
10011
10012 static void
10013 ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
10014 tree type, int *, int no_rtl)
10015 {
10016 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10017 CUMULATIVE_ARGS next_cum;
10018 tree fntype;
10019
10020 /* This argument doesn't appear to be used anymore. Which is good,
10021 because the old code here didn't suppress rtl generation. */
10022 gcc_assert (!no_rtl);
10023
10024 if (!TARGET_64BIT)
10025 return;
10026
10027 fntype = TREE_TYPE (current_function_decl);
10028
10029 /* For varargs, we do not want to skip the dummy va_dcl argument.
10030 For stdargs, we do want to skip the last named argument. */
10031 next_cum = *cum;
10032 if (stdarg_p (fntype))
10033 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
10034 true);
10035
10036 if (cum->call_abi == MS_ABI)
10037 setup_incoming_varargs_ms_64 (&next_cum);
10038 else
10039 setup_incoming_varargs_64 (&next_cum);
10040 }
10041
10042 static void
10043 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
10044 enum machine_mode mode,
10045 tree type,
10046 int *pretend_size ATTRIBUTE_UNUSED,
10047 int no_rtl)
10048 {
10049 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10050 CUMULATIVE_ARGS next_cum;
10051 tree fntype;
10052 rtx save_area;
10053 int bnd_reg, i, max;
10054
10055 gcc_assert (!no_rtl);
10056
10057 /* Do nothing if we use plain pointer to argument area. */
10058 if (!TARGET_64BIT || cum->call_abi == MS_ABI)
10059 return;
10060
10061 fntype = TREE_TYPE (current_function_decl);
10062
10063 /* For varargs, we do not want to skip the dummy va_dcl argument.
10064 For stdargs, we do want to skip the last named argument. */
10065 next_cum = *cum;
10066 if (stdarg_p (fntype))
10067 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
10068 true);
10069 save_area = frame_pointer_rtx;
10070
10071 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
10072 if (max > X86_64_REGPARM_MAX)
10073 max = X86_64_REGPARM_MAX;
10074
10075 bnd_reg = cum->bnd_regno + cum->force_bnd_pass;
10076 if (chkp_function_instrumented_p (current_function_decl))
10077 for (i = cum->regno; i < max; i++)
10078 {
10079 rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD);
10080 rtx ptr = gen_rtx_REG (Pmode,
10081 x86_64_int_parameter_registers[i]);
10082 rtx bounds;
10083
10084 if (bnd_reg <= LAST_BND_REG)
10085 bounds = gen_rtx_REG (BNDmode, bnd_reg);
10086 else
10087 {
10088 rtx ldx_addr =
10089 plus_constant (Pmode, arg_pointer_rtx,
10090 (LAST_BND_REG - bnd_reg) * GET_MODE_SIZE (Pmode));
10091 bounds = gen_reg_rtx (BNDmode);
10092 emit_insn (BNDmode == BND64mode
10093 ? gen_bnd64_ldx (bounds, ldx_addr, ptr)
10094 : gen_bnd32_ldx (bounds, ldx_addr, ptr));
10095 }
10096
10097 emit_insn (BNDmode == BND64mode
10098 ? gen_bnd64_stx (addr, ptr, bounds)
10099 : gen_bnd32_stx (addr, ptr, bounds));
10100
10101 bnd_reg++;
10102 }
10103 }
10104
10105
10106 /* Checks if TYPE is of kind va_list char *. */
10107
10108 static bool
10109 is_va_list_char_pointer (tree type)
10110 {
10111 tree canonic;
10112
10113 /* For 32-bit it is always true. */
10114 if (!TARGET_64BIT)
10115 return true;
10116 canonic = ix86_canonical_va_list_type (type);
10117 return (canonic == ms_va_list_type_node
10118 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
10119 }
10120
10121 /* Implement va_start. */
10122
10123 static void
10124 ix86_va_start (tree valist, rtx nextarg)
10125 {
10126 HOST_WIDE_INT words, n_gpr, n_fpr;
10127 tree f_gpr, f_fpr, f_ovf, f_sav;
10128 tree gpr, fpr, ovf, sav, t;
10129 tree type;
10130 rtx ovf_rtx;
10131
10132 if (flag_split_stack
10133 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
10134 {
10135 unsigned int scratch_regno;
10136
10137 /* When we are splitting the stack, we can't refer to the stack
10138 arguments using internal_arg_pointer, because they may be on
10139 the old stack. The split stack prologue will arrange to
10140 leave a pointer to the old stack arguments in a scratch
10141 register, which we here copy to a pseudo-register. The split
10142 stack prologue can't set the pseudo-register directly because
10143 it (the prologue) runs before any registers have been saved. */
10144
10145 scratch_regno = split_stack_prologue_scratch_regno ();
10146 if (scratch_regno != INVALID_REGNUM)
10147 {
10148 rtx reg;
10149 rtx_insn *seq;
10150
10151 reg = gen_reg_rtx (Pmode);
10152 cfun->machine->split_stack_varargs_pointer = reg;
10153
10154 start_sequence ();
10155 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
10156 seq = get_insns ();
10157 end_sequence ();
10158
10159 push_topmost_sequence ();
10160 emit_insn_after (seq, entry_of_function ());
10161 pop_topmost_sequence ();
10162 }
10163 }
10164
10165 /* Only 64bit target needs something special. */
10166 if (is_va_list_char_pointer (TREE_TYPE (valist)))
10167 {
10168 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
10169 std_expand_builtin_va_start (valist, nextarg);
10170 else
10171 {
10172 rtx va_r, next;
10173
10174 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
10175 next = expand_binop (ptr_mode, add_optab,
10176 cfun->machine->split_stack_varargs_pointer,
10177 crtl->args.arg_offset_rtx,
10178 NULL_RTX, 0, OPTAB_LIB_WIDEN);
10179 convert_move (va_r, next, 0);
10180
10181 /* Store zero bounds for va_list. */
10182 if (chkp_function_instrumented_p (current_function_decl))
10183 chkp_expand_bounds_reset_for_mem (valist,
10184 make_tree (TREE_TYPE (valist),
10185 next));
10186
10187 }
10188 return;
10189 }
10190
10191 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
10192 f_fpr = DECL_CHAIN (f_gpr);
10193 f_ovf = DECL_CHAIN (f_fpr);
10194 f_sav = DECL_CHAIN (f_ovf);
10195
10196 valist = build_simple_mem_ref (valist);
10197 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
10198 /* The following should be folded into the MEM_REF offset. */
10199 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
10200 f_gpr, NULL_TREE);
10201 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
10202 f_fpr, NULL_TREE);
10203 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
10204 f_ovf, NULL_TREE);
10205 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
10206 f_sav, NULL_TREE);
10207
10208 /* Count number of gp and fp argument registers used. */
10209 words = crtl->args.info.words;
10210 n_gpr = crtl->args.info.regno;
10211 n_fpr = crtl->args.info.sse_regno;
10212
10213 if (cfun->va_list_gpr_size)
10214 {
10215 type = TREE_TYPE (gpr);
10216 t = build2 (MODIFY_EXPR, type,
10217 gpr, build_int_cst (type, n_gpr * 8));
10218 TREE_SIDE_EFFECTS (t) = 1;
10219 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10220 }
10221
10222 if (TARGET_SSE && cfun->va_list_fpr_size)
10223 {
10224 type = TREE_TYPE (fpr);
10225 t = build2 (MODIFY_EXPR, type, fpr,
10226 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
10227 TREE_SIDE_EFFECTS (t) = 1;
10228 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10229 }
10230
10231 /* Find the overflow area. */
10232 type = TREE_TYPE (ovf);
10233 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
10234 ovf_rtx = crtl->args.internal_arg_pointer;
10235 else
10236 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
10237 t = make_tree (type, ovf_rtx);
10238 if (words != 0)
10239 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
10240
10241 /* Store zero bounds for overflow area pointer. */
10242 if (chkp_function_instrumented_p (current_function_decl))
10243 chkp_expand_bounds_reset_for_mem (ovf, t);
10244
10245 t = build2 (MODIFY_EXPR, type, ovf, t);
10246 TREE_SIDE_EFFECTS (t) = 1;
10247 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10248
10249 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
10250 {
10251 /* Find the register save area.
10252 Prologue of the function save it right above stack frame. */
10253 type = TREE_TYPE (sav);
10254 t = make_tree (type, frame_pointer_rtx);
10255 if (!ix86_varargs_gpr_size)
10256 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
10257
10258 /* Store zero bounds for save area pointer. */
10259 if (chkp_function_instrumented_p (current_function_decl))
10260 chkp_expand_bounds_reset_for_mem (sav, t);
10261
10262 t = build2 (MODIFY_EXPR, type, sav, t);
10263 TREE_SIDE_EFFECTS (t) = 1;
10264 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10265 }
10266 }
10267
10268 /* Implement va_arg. */
10269
10270 static tree
10271 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
10272 gimple_seq *post_p)
10273 {
10274 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
10275 tree f_gpr, f_fpr, f_ovf, f_sav;
10276 tree gpr, fpr, ovf, sav, t;
10277 int size, rsize;
10278 tree lab_false, lab_over = NULL_TREE;
10279 tree addr, t2;
10280 rtx container;
10281 int indirect_p = 0;
10282 tree ptrtype;
10283 machine_mode nat_mode;
10284 unsigned int arg_boundary;
10285
10286 /* Only 64bit target needs something special. */
10287 if (is_va_list_char_pointer (TREE_TYPE (valist)))
10288 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
10289
10290 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
10291 f_fpr = DECL_CHAIN (f_gpr);
10292 f_ovf = DECL_CHAIN (f_fpr);
10293 f_sav = DECL_CHAIN (f_ovf);
10294
10295 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
10296 valist, f_gpr, NULL_TREE);
10297
10298 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
10299 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
10300 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
10301
10302 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
10303 if (indirect_p)
10304 type = build_pointer_type (type);
10305 size = int_size_in_bytes (type);
10306 rsize = CEIL (size, UNITS_PER_WORD);
10307
10308 nat_mode = type_natural_mode (type, NULL, false);
10309 switch (nat_mode)
10310 {
10311 case V8SFmode:
10312 case V8SImode:
10313 case V32QImode:
10314 case V16HImode:
10315 case V4DFmode:
10316 case V4DImode:
10317 case V16SFmode:
10318 case V16SImode:
10319 case V64QImode:
10320 case V32HImode:
10321 case V8DFmode:
10322 case V8DImode:
10323 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
10324 if (!TARGET_64BIT_MS_ABI)
10325 {
10326 container = NULL;
10327 break;
10328 }
10329
10330 default:
10331 container = construct_container (nat_mode, TYPE_MODE (type),
10332 type, 0, X86_64_REGPARM_MAX,
10333 X86_64_SSE_REGPARM_MAX, intreg,
10334 0);
10335 break;
10336 }
10337
10338 /* Pull the value out of the saved registers. */
10339
10340 addr = create_tmp_var (ptr_type_node, "addr");
10341
10342 if (container)
10343 {
10344 int needed_intregs, needed_sseregs;
10345 bool need_temp;
10346 tree int_addr, sse_addr;
10347
10348 lab_false = create_artificial_label (UNKNOWN_LOCATION);
10349 lab_over = create_artificial_label (UNKNOWN_LOCATION);
10350
10351 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
10352
10353 need_temp = (!REG_P (container)
10354 && ((needed_intregs && TYPE_ALIGN (type) > 64)
10355 || TYPE_ALIGN (type) > 128));
10356
10357 /* In case we are passing structure, verify that it is consecutive block
10358 on the register save area. If not we need to do moves. */
10359 if (!need_temp && !REG_P (container))
10360 {
10361 /* Verify that all registers are strictly consecutive */
10362 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
10363 {
10364 int i;
10365
10366 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
10367 {
10368 rtx slot = XVECEXP (container, 0, i);
10369 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
10370 || INTVAL (XEXP (slot, 1)) != i * 16)
10371 need_temp = true;
10372 }
10373 }
10374 else
10375 {
10376 int i;
10377
10378 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
10379 {
10380 rtx slot = XVECEXP (container, 0, i);
10381 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
10382 || INTVAL (XEXP (slot, 1)) != i * 8)
10383 need_temp = true;
10384 }
10385 }
10386 }
10387 if (!need_temp)
10388 {
10389 int_addr = addr;
10390 sse_addr = addr;
10391 }
10392 else
10393 {
10394 int_addr = create_tmp_var (ptr_type_node, "int_addr");
10395 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
10396 }
10397
10398 /* First ensure that we fit completely in registers. */
10399 if (needed_intregs)
10400 {
10401 t = build_int_cst (TREE_TYPE (gpr),
10402 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
10403 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
10404 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
10405 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
10406 gimplify_and_add (t, pre_p);
10407 }
10408 if (needed_sseregs)
10409 {
10410 t = build_int_cst (TREE_TYPE (fpr),
10411 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
10412 + X86_64_REGPARM_MAX * 8);
10413 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
10414 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
10415 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
10416 gimplify_and_add (t, pre_p);
10417 }
10418
10419 /* Compute index to start of area used for integer regs. */
10420 if (needed_intregs)
10421 {
10422 /* int_addr = gpr + sav; */
10423 t = fold_build_pointer_plus (sav, gpr);
10424 gimplify_assign (int_addr, t, pre_p);
10425 }
10426 if (needed_sseregs)
10427 {
10428 /* sse_addr = fpr + sav; */
10429 t = fold_build_pointer_plus (sav, fpr);
10430 gimplify_assign (sse_addr, t, pre_p);
10431 }
10432 if (need_temp)
10433 {
10434 int i, prev_size = 0;
10435 tree temp = create_tmp_var (type, "va_arg_tmp");
10436
10437 /* addr = &temp; */
10438 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
10439 gimplify_assign (addr, t, pre_p);
10440
10441 for (i = 0; i < XVECLEN (container, 0); i++)
10442 {
10443 rtx slot = XVECEXP (container, 0, i);
10444 rtx reg = XEXP (slot, 0);
10445 machine_mode mode = GET_MODE (reg);
10446 tree piece_type;
10447 tree addr_type;
10448 tree daddr_type;
10449 tree src_addr, src;
10450 int src_offset;
10451 tree dest_addr, dest;
10452 int cur_size = GET_MODE_SIZE (mode);
10453
10454 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
10455 prev_size = INTVAL (XEXP (slot, 1));
10456 if (prev_size + cur_size > size)
10457 {
10458 cur_size = size - prev_size;
10459 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
10460 if (mode == BLKmode)
10461 mode = QImode;
10462 }
10463 piece_type = lang_hooks.types.type_for_mode (mode, 1);
10464 if (mode == GET_MODE (reg))
10465 addr_type = build_pointer_type (piece_type);
10466 else
10467 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
10468 true);
10469 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
10470 true);
10471
10472 if (SSE_REGNO_P (REGNO (reg)))
10473 {
10474 src_addr = sse_addr;
10475 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
10476 }
10477 else
10478 {
10479 src_addr = int_addr;
10480 src_offset = REGNO (reg) * 8;
10481 }
10482 src_addr = fold_convert (addr_type, src_addr);
10483 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
10484
10485 dest_addr = fold_convert (daddr_type, addr);
10486 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
10487 if (cur_size == GET_MODE_SIZE (mode))
10488 {
10489 src = build_va_arg_indirect_ref (src_addr);
10490 dest = build_va_arg_indirect_ref (dest_addr);
10491
10492 gimplify_assign (dest, src, pre_p);
10493 }
10494 else
10495 {
10496 tree copy
10497 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
10498 3, dest_addr, src_addr,
10499 size_int (cur_size));
10500 gimplify_and_add (copy, pre_p);
10501 }
10502 prev_size += cur_size;
10503 }
10504 }
10505
10506 if (needed_intregs)
10507 {
10508 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
10509 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
10510 gimplify_assign (gpr, t, pre_p);
10511 }
10512
10513 if (needed_sseregs)
10514 {
10515 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
10516 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
10517 gimplify_assign (unshare_expr (fpr), t, pre_p);
10518 }
10519
10520 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
10521
10522 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
10523 }
10524
10525 /* ... otherwise out of the overflow area. */
10526
10527 /* When we align parameter on stack for caller, if the parameter
10528 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
10529 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
10530 here with caller. */
10531 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
10532 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
10533 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
10534
10535 /* Care for on-stack alignment if needed. */
10536 if (arg_boundary <= 64 || size == 0)
10537 t = ovf;
10538 else
10539 {
10540 HOST_WIDE_INT align = arg_boundary / 8;
10541 t = fold_build_pointer_plus_hwi (ovf, align - 1);
10542 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
10543 build_int_cst (TREE_TYPE (t), -align));
10544 }
10545
10546 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
10547 gimplify_assign (addr, t, pre_p);
10548
10549 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
10550 gimplify_assign (unshare_expr (ovf), t, pre_p);
10551
10552 if (container)
10553 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
10554
10555 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
10556 addr = fold_convert (ptrtype, addr);
10557
10558 if (indirect_p)
10559 addr = build_va_arg_indirect_ref (addr);
10560 return build_va_arg_indirect_ref (addr);
10561 }
10562 \f
10563 /* Return true if OPNUM's MEM should be matched
10564 in movabs* patterns. */
10565
10566 bool
10567 ix86_check_movabs (rtx insn, int opnum)
10568 {
10569 rtx set, mem;
10570
10571 set = PATTERN (insn);
10572 if (GET_CODE (set) == PARALLEL)
10573 set = XVECEXP (set, 0, 0);
10574 gcc_assert (GET_CODE (set) == SET);
10575 mem = XEXP (set, opnum);
10576 while (SUBREG_P (mem))
10577 mem = SUBREG_REG (mem);
10578 gcc_assert (MEM_P (mem));
10579 return volatile_ok || !MEM_VOLATILE_P (mem);
10580 }
10581
10582 /* Return false if INSN contains a MEM with a non-default address space. */
10583 bool
10584 ix86_check_no_addr_space (rtx insn)
10585 {
10586 subrtx_var_iterator::array_type array;
10587 FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL)
10588 {
10589 rtx x = *iter;
10590 if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)))
10591 return false;
10592 }
10593 return true;
10594 }
10595 \f
10596 /* Initialize the table of extra 80387 mathematical constants. */
10597
10598 static void
10599 init_ext_80387_constants (void)
10600 {
10601 static const char * cst[5] =
10602 {
10603 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
10604 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
10605 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
10606 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
10607 "3.1415926535897932385128089594061862044", /* 4: fldpi */
10608 };
10609 int i;
10610
10611 for (i = 0; i < 5; i++)
10612 {
10613 real_from_string (&ext_80387_constants_table[i], cst[i]);
10614 /* Ensure each constant is rounded to XFmode precision. */
10615 real_convert (&ext_80387_constants_table[i],
10616 XFmode, &ext_80387_constants_table[i]);
10617 }
10618
10619 ext_80387_constants_init = 1;
10620 }
10621
10622 /* Return non-zero if the constant is something that
10623 can be loaded with a special instruction. */
10624
10625 int
10626 standard_80387_constant_p (rtx x)
10627 {
10628 machine_mode mode = GET_MODE (x);
10629
10630 const REAL_VALUE_TYPE *r;
10631
10632 if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
10633 return -1;
10634
10635 if (x == CONST0_RTX (mode))
10636 return 1;
10637 if (x == CONST1_RTX (mode))
10638 return 2;
10639
10640 r = CONST_DOUBLE_REAL_VALUE (x);
10641
10642 /* For XFmode constants, try to find a special 80387 instruction when
10643 optimizing for size or on those CPUs that benefit from them. */
10644 if (mode == XFmode
10645 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
10646 {
10647 int i;
10648
10649 if (! ext_80387_constants_init)
10650 init_ext_80387_constants ();
10651
10652 for (i = 0; i < 5; i++)
10653 if (real_identical (r, &ext_80387_constants_table[i]))
10654 return i + 3;
10655 }
10656
10657 /* Load of the constant -0.0 or -1.0 will be split as
10658 fldz;fchs or fld1;fchs sequence. */
10659 if (real_isnegzero (r))
10660 return 8;
10661 if (real_identical (r, &dconstm1))
10662 return 9;
10663
10664 return 0;
10665 }
10666
10667 /* Return the opcode of the special instruction to be used to load
10668 the constant X. */
10669
10670 const char *
10671 standard_80387_constant_opcode (rtx x)
10672 {
10673 switch (standard_80387_constant_p (x))
10674 {
10675 case 1:
10676 return "fldz";
10677 case 2:
10678 return "fld1";
10679 case 3:
10680 return "fldlg2";
10681 case 4:
10682 return "fldln2";
10683 case 5:
10684 return "fldl2e";
10685 case 6:
10686 return "fldl2t";
10687 case 7:
10688 return "fldpi";
10689 case 8:
10690 case 9:
10691 return "#";
10692 default:
10693 gcc_unreachable ();
10694 }
10695 }
10696
10697 /* Return the CONST_DOUBLE representing the 80387 constant that is
10698 loaded by the specified special instruction. The argument IDX
10699 matches the return value from standard_80387_constant_p. */
10700
10701 rtx
10702 standard_80387_constant_rtx (int idx)
10703 {
10704 int i;
10705
10706 if (! ext_80387_constants_init)
10707 init_ext_80387_constants ();
10708
10709 switch (idx)
10710 {
10711 case 3:
10712 case 4:
10713 case 5:
10714 case 6:
10715 case 7:
10716 i = idx - 3;
10717 break;
10718
10719 default:
10720 gcc_unreachable ();
10721 }
10722
10723 return const_double_from_real_value (ext_80387_constants_table[i],
10724 XFmode);
10725 }
10726
10727 /* Return 1 if X is all 0s and 2 if x is all 1s
10728 in supported SSE/AVX vector mode. */
10729
10730 int
10731 standard_sse_constant_p (rtx x)
10732 {
10733 machine_mode mode;
10734
10735 if (!TARGET_SSE)
10736 return 0;
10737
10738 mode = GET_MODE (x);
10739
10740 if (x == const0_rtx || x == CONST0_RTX (mode))
10741 return 1;
10742 if (vector_all_ones_operand (x, mode))
10743 switch (mode)
10744 {
10745 case V16QImode:
10746 case V8HImode:
10747 case V4SImode:
10748 case V2DImode:
10749 if (TARGET_SSE2)
10750 return 2;
10751 case V32QImode:
10752 case V16HImode:
10753 case V8SImode:
10754 case V4DImode:
10755 if (TARGET_AVX2)
10756 return 2;
10757 case V64QImode:
10758 case V32HImode:
10759 case V16SImode:
10760 case V8DImode:
10761 if (TARGET_AVX512F)
10762 return 2;
10763 default:
10764 break;
10765 }
10766
10767 return 0;
10768 }
10769
10770 /* Return the opcode of the special instruction to be used to load
10771 the constant X. */
10772
10773 const char *
10774 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
10775 {
10776 switch (standard_sse_constant_p (x))
10777 {
10778 case 1:
10779 switch (get_attr_mode (insn))
10780 {
10781 case MODE_XI:
10782 return "vpxord\t%g0, %g0, %g0";
10783 case MODE_V16SF:
10784 return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
10785 : "vpxord\t%g0, %g0, %g0";
10786 case MODE_V8DF:
10787 return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
10788 : "vpxorq\t%g0, %g0, %g0";
10789 case MODE_TI:
10790 return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
10791 : "%vpxor\t%0, %d0";
10792 case MODE_V2DF:
10793 return "%vxorpd\t%0, %d0";
10794 case MODE_V4SF:
10795 return "%vxorps\t%0, %d0";
10796
10797 case MODE_OI:
10798 return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
10799 : "vpxor\t%x0, %x0, %x0";
10800 case MODE_V4DF:
10801 return "vxorpd\t%x0, %x0, %x0";
10802 case MODE_V8SF:
10803 return "vxorps\t%x0, %x0, %x0";
10804
10805 default:
10806 break;
10807 }
10808
10809 case 2:
10810 if (TARGET_AVX512VL
10811 || get_attr_mode (insn) == MODE_XI
10812 || get_attr_mode (insn) == MODE_V8DF
10813 || get_attr_mode (insn) == MODE_V16SF)
10814 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
10815 if (TARGET_AVX)
10816 return "vpcmpeqd\t%0, %0, %0";
10817 else
10818 return "pcmpeqd\t%0, %0";
10819
10820 default:
10821 break;
10822 }
10823 gcc_unreachable ();
10824 }
10825
10826 /* Returns true if OP contains a symbol reference */
10827
10828 bool
10829 symbolic_reference_mentioned_p (rtx op)
10830 {
10831 const char *fmt;
10832 int i;
10833
10834 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
10835 return true;
10836
10837 fmt = GET_RTX_FORMAT (GET_CODE (op));
10838 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
10839 {
10840 if (fmt[i] == 'E')
10841 {
10842 int j;
10843
10844 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
10845 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
10846 return true;
10847 }
10848
10849 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
10850 return true;
10851 }
10852
10853 return false;
10854 }
10855
10856 /* Return true if it is appropriate to emit `ret' instructions in the
10857 body of a function. Do this only if the epilogue is simple, needing a
10858 couple of insns. Prior to reloading, we can't tell how many registers
10859 must be saved, so return false then. Return false if there is no frame
10860 marker to de-allocate. */
10861
10862 bool
10863 ix86_can_use_return_insn_p (void)
10864 {
10865 struct ix86_frame frame;
10866
10867 if (! reload_completed || frame_pointer_needed)
10868 return 0;
10869
10870 /* Don't allow more than 32k pop, since that's all we can do
10871 with one instruction. */
10872 if (crtl->args.pops_args && crtl->args.size >= 32768)
10873 return 0;
10874
10875 ix86_compute_frame_layout (&frame);
10876 return (frame.stack_pointer_offset == UNITS_PER_WORD
10877 && (frame.nregs + frame.nsseregs) == 0);
10878 }
10879 \f
10880 /* Value should be nonzero if functions must have frame pointers.
10881 Zero means the frame pointer need not be set up (and parms may
10882 be accessed via the stack pointer) in functions that seem suitable. */
10883
10884 static bool
10885 ix86_frame_pointer_required (void)
10886 {
10887 /* If we accessed previous frames, then the generated code expects
10888 to be able to access the saved ebp value in our frame. */
10889 if (cfun->machine->accesses_prev_frame)
10890 return true;
10891
10892 /* Several x86 os'es need a frame pointer for other reasons,
10893 usually pertaining to setjmp. */
10894 if (SUBTARGET_FRAME_POINTER_REQUIRED)
10895 return true;
10896
10897 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
10898 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
10899 return true;
10900
10901 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
10902 allocation is 4GB. */
10903 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
10904 return true;
10905
10906 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
10907 turns off the frame pointer by default. Turn it back on now if
10908 we've not got a leaf function. */
10909 if (TARGET_OMIT_LEAF_FRAME_POINTER
10910 && (!crtl->is_leaf
10911 || ix86_current_function_calls_tls_descriptor))
10912 return true;
10913
10914 if (crtl->profile && !flag_fentry)
10915 return true;
10916
10917 return false;
10918 }
10919
10920 /* Record that the current function accesses previous call frames. */
10921
10922 void
10923 ix86_setup_frame_addresses (void)
10924 {
10925 cfun->machine->accesses_prev_frame = 1;
10926 }
10927 \f
10928 #ifndef USE_HIDDEN_LINKONCE
10929 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
10930 # define USE_HIDDEN_LINKONCE 1
10931 # else
10932 # define USE_HIDDEN_LINKONCE 0
10933 # endif
10934 #endif
10935
10936 static int pic_labels_used;
10937
10938 /* Fills in the label name that should be used for a pc thunk for
10939 the given register. */
10940
10941 static void
10942 get_pc_thunk_name (char name[32], unsigned int regno)
10943 {
10944 gcc_assert (!TARGET_64BIT);
10945
10946 if (USE_HIDDEN_LINKONCE)
10947 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
10948 else
10949 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
10950 }
10951
10952
10953 /* This function generates code for -fpic that loads %ebx with
10954 the return address of the caller and then returns. */
10955
10956 static void
10957 ix86_code_end (void)
10958 {
10959 rtx xops[2];
10960 int regno;
10961
10962 for (regno = AX_REG; regno <= SP_REG; regno++)
10963 {
10964 char name[32];
10965 tree decl;
10966
10967 if (!(pic_labels_used & (1 << regno)))
10968 continue;
10969
10970 get_pc_thunk_name (name, regno);
10971
10972 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
10973 get_identifier (name),
10974 build_function_type_list (void_type_node, NULL_TREE));
10975 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
10976 NULL_TREE, void_type_node);
10977 TREE_PUBLIC (decl) = 1;
10978 TREE_STATIC (decl) = 1;
10979 DECL_IGNORED_P (decl) = 1;
10980
10981 #if TARGET_MACHO
10982 if (TARGET_MACHO)
10983 {
10984 switch_to_section (darwin_sections[text_coal_section]);
10985 fputs ("\t.weak_definition\t", asm_out_file);
10986 assemble_name (asm_out_file, name);
10987 fputs ("\n\t.private_extern\t", asm_out_file);
10988 assemble_name (asm_out_file, name);
10989 putc ('\n', asm_out_file);
10990 ASM_OUTPUT_LABEL (asm_out_file, name);
10991 DECL_WEAK (decl) = 1;
10992 }
10993 else
10994 #endif
10995 if (USE_HIDDEN_LINKONCE)
10996 {
10997 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
10998
10999 targetm.asm_out.unique_section (decl, 0);
11000 switch_to_section (get_named_section (decl, NULL, 0));
11001
11002 targetm.asm_out.globalize_label (asm_out_file, name);
11003 fputs ("\t.hidden\t", asm_out_file);
11004 assemble_name (asm_out_file, name);
11005 putc ('\n', asm_out_file);
11006 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
11007 }
11008 else
11009 {
11010 switch_to_section (text_section);
11011 ASM_OUTPUT_LABEL (asm_out_file, name);
11012 }
11013
11014 DECL_INITIAL (decl) = make_node (BLOCK);
11015 current_function_decl = decl;
11016 allocate_struct_function (decl, false);
11017 init_function_start (decl);
11018 first_function_block_is_cold = false;
11019 /* Make sure unwind info is emitted for the thunk if needed. */
11020 final_start_function (emit_barrier (), asm_out_file, 1);
11021
11022 /* Pad stack IP move with 4 instructions (two NOPs count
11023 as one instruction). */
11024 if (TARGET_PAD_SHORT_FUNCTION)
11025 {
11026 int i = 8;
11027
11028 while (i--)
11029 fputs ("\tnop\n", asm_out_file);
11030 }
11031
11032 xops[0] = gen_rtx_REG (Pmode, regno);
11033 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
11034 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
11035 output_asm_insn ("%!ret", NULL);
11036 final_end_function ();
11037 init_insn_lengths ();
11038 free_after_compilation (cfun);
11039 set_cfun (NULL);
11040 current_function_decl = NULL;
11041 }
11042
11043 if (flag_split_stack)
11044 file_end_indicate_split_stack ();
11045 }
11046
11047 /* Emit code for the SET_GOT patterns. */
11048
11049 const char *
11050 output_set_got (rtx dest, rtx label)
11051 {
11052 rtx xops[3];
11053
11054 xops[0] = dest;
11055
11056 if (TARGET_VXWORKS_RTP && flag_pic)
11057 {
11058 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
11059 xops[2] = gen_rtx_MEM (Pmode,
11060 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
11061 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
11062
11063 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
11064 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
11065 an unadorned address. */
11066 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
11067 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
11068 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
11069 return "";
11070 }
11071
11072 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
11073
11074 if (!flag_pic)
11075 {
11076 if (TARGET_MACHO)
11077 /* We don't need a pic base, we're not producing pic. */
11078 gcc_unreachable ();
11079
11080 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
11081 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
11082 targetm.asm_out.internal_label (asm_out_file, "L",
11083 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
11084 }
11085 else
11086 {
11087 char name[32];
11088 get_pc_thunk_name (name, REGNO (dest));
11089 pic_labels_used |= 1 << REGNO (dest);
11090
11091 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
11092 xops[2] = gen_rtx_MEM (QImode, xops[2]);
11093 output_asm_insn ("%!call\t%X2", xops);
11094
11095 #if TARGET_MACHO
11096 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
11097 This is what will be referenced by the Mach-O PIC subsystem. */
11098 if (machopic_should_output_picbase_label () || !label)
11099 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
11100
11101 /* When we are restoring the pic base at the site of a nonlocal label,
11102 and we decided to emit the pic base above, we will still output a
11103 local label used for calculating the correction offset (even though
11104 the offset will be 0 in that case). */
11105 if (label)
11106 targetm.asm_out.internal_label (asm_out_file, "L",
11107 CODE_LABEL_NUMBER (label));
11108 #endif
11109 }
11110
11111 if (!TARGET_MACHO)
11112 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
11113
11114 return "";
11115 }
11116
11117 /* Generate an "push" pattern for input ARG. */
11118
11119 static rtx
11120 gen_push (rtx arg)
11121 {
11122 struct machine_function *m = cfun->machine;
11123
11124 if (m->fs.cfa_reg == stack_pointer_rtx)
11125 m->fs.cfa_offset += UNITS_PER_WORD;
11126 m->fs.sp_offset += UNITS_PER_WORD;
11127
11128 if (REG_P (arg) && GET_MODE (arg) != word_mode)
11129 arg = gen_rtx_REG (word_mode, REGNO (arg));
11130
11131 return gen_rtx_SET (gen_rtx_MEM (word_mode,
11132 gen_rtx_PRE_DEC (Pmode,
11133 stack_pointer_rtx)),
11134 arg);
11135 }
11136
11137 /* Generate an "pop" pattern for input ARG. */
11138
11139 static rtx
11140 gen_pop (rtx arg)
11141 {
11142 if (REG_P (arg) && GET_MODE (arg) != word_mode)
11143 arg = gen_rtx_REG (word_mode, REGNO (arg));
11144
11145 return gen_rtx_SET (arg,
11146 gen_rtx_MEM (word_mode,
11147 gen_rtx_POST_INC (Pmode,
11148 stack_pointer_rtx)));
11149 }
11150
11151 /* Return >= 0 if there is an unused call-clobbered register available
11152 for the entire function. */
11153
11154 static unsigned int
11155 ix86_select_alt_pic_regnum (void)
11156 {
11157 if (ix86_use_pseudo_pic_reg ())
11158 return INVALID_REGNUM;
11159
11160 if (crtl->is_leaf
11161 && !crtl->profile
11162 && !ix86_current_function_calls_tls_descriptor)
11163 {
11164 int i, drap;
11165 /* Can't use the same register for both PIC and DRAP. */
11166 if (crtl->drap_reg)
11167 drap = REGNO (crtl->drap_reg);
11168 else
11169 drap = -1;
11170 for (i = 2; i >= 0; --i)
11171 if (i != drap && !df_regs_ever_live_p (i))
11172 return i;
11173 }
11174
11175 return INVALID_REGNUM;
11176 }
11177
11178 /* Return TRUE if we need to save REGNO. */
11179
11180 static bool
11181 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
11182 {
11183 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
11184 && pic_offset_table_rtx)
11185 {
11186 if (ix86_use_pseudo_pic_reg ())
11187 {
11188 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
11189 _mcount in prologue. */
11190 if (!TARGET_64BIT && flag_pic && crtl->profile)
11191 return true;
11192 }
11193 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
11194 || crtl->profile
11195 || crtl->calls_eh_return
11196 || crtl->uses_const_pool
11197 || cfun->has_nonlocal_label)
11198 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
11199 }
11200
11201 if (crtl->calls_eh_return && maybe_eh_return)
11202 {
11203 unsigned i;
11204 for (i = 0; ; i++)
11205 {
11206 unsigned test = EH_RETURN_DATA_REGNO (i);
11207 if (test == INVALID_REGNUM)
11208 break;
11209 if (test == regno)
11210 return true;
11211 }
11212 }
11213
11214 if (crtl->drap_reg
11215 && regno == REGNO (crtl->drap_reg)
11216 && !cfun->machine->no_drap_save_restore)
11217 return true;
11218
11219 return (df_regs_ever_live_p (regno)
11220 && !call_used_regs[regno]
11221 && !fixed_regs[regno]
11222 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
11223 }
11224
11225 /* Return number of saved general prupose registers. */
11226
11227 static int
11228 ix86_nsaved_regs (void)
11229 {
11230 int nregs = 0;
11231 int regno;
11232
11233 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11234 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true))
11235 nregs ++;
11236 return nregs;
11237 }
11238
11239 /* Return number of saved SSE registers. */
11240
11241 static int
11242 ix86_nsaved_sseregs (void)
11243 {
11244 int nregs = 0;
11245 int regno;
11246
11247 if (!TARGET_64BIT_MS_ABI)
11248 return 0;
11249 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11250 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
11251 nregs ++;
11252 return nregs;
11253 }
11254
11255 /* Given FROM and TO register numbers, say whether this elimination is
11256 allowed. If stack alignment is needed, we can only replace argument
11257 pointer with hard frame pointer, or replace frame pointer with stack
11258 pointer. Otherwise, frame pointer elimination is automatically
11259 handled and all other eliminations are valid. */
11260
11261 static bool
11262 ix86_can_eliminate (const int from, const int to)
11263 {
11264 if (stack_realign_fp)
11265 return ((from == ARG_POINTER_REGNUM
11266 && to == HARD_FRAME_POINTER_REGNUM)
11267 || (from == FRAME_POINTER_REGNUM
11268 && to == STACK_POINTER_REGNUM));
11269 else
11270 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
11271 }
11272
11273 /* Return the offset between two registers, one to be eliminated, and the other
11274 its replacement, at the start of a routine. */
11275
11276 HOST_WIDE_INT
11277 ix86_initial_elimination_offset (int from, int to)
11278 {
11279 struct ix86_frame frame;
11280 ix86_compute_frame_layout (&frame);
11281
11282 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
11283 return frame.hard_frame_pointer_offset;
11284 else if (from == FRAME_POINTER_REGNUM
11285 && to == HARD_FRAME_POINTER_REGNUM)
11286 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
11287 else
11288 {
11289 gcc_assert (to == STACK_POINTER_REGNUM);
11290
11291 if (from == ARG_POINTER_REGNUM)
11292 return frame.stack_pointer_offset;
11293
11294 gcc_assert (from == FRAME_POINTER_REGNUM);
11295 return frame.stack_pointer_offset - frame.frame_pointer_offset;
11296 }
11297 }
11298
11299 /* In a dynamically-aligned function, we can't know the offset from
11300 stack pointer to frame pointer, so we must ensure that setjmp
11301 eliminates fp against the hard fp (%ebp) rather than trying to
11302 index from %esp up to the top of the frame across a gap that is
11303 of unknown (at compile-time) size. */
11304 static rtx
11305 ix86_builtin_setjmp_frame_value (void)
11306 {
11307 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
11308 }
11309
11310 /* When using -fsplit-stack, the allocation routines set a field in
11311 the TCB to the bottom of the stack plus this much space, measured
11312 in bytes. */
11313
11314 #define SPLIT_STACK_AVAILABLE 256
11315
11316 /* Fill structure ix86_frame about frame of currently computed function. */
11317
11318 static void
11319 ix86_compute_frame_layout (struct ix86_frame *frame)
11320 {
11321 unsigned HOST_WIDE_INT stack_alignment_needed;
11322 HOST_WIDE_INT offset;
11323 unsigned HOST_WIDE_INT preferred_alignment;
11324 HOST_WIDE_INT size = get_frame_size ();
11325 HOST_WIDE_INT to_allocate;
11326
11327 frame->nregs = ix86_nsaved_regs ();
11328 frame->nsseregs = ix86_nsaved_sseregs ();
11329
11330 /* 64-bit MS ABI seem to require stack alignment to be always 16,
11331 except for function prologues, leaf functions and when the defult
11332 incoming stack boundary is overriden at command line or via
11333 force_align_arg_pointer attribute. */
11334 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
11335 && (!crtl->is_leaf || cfun->calls_alloca != 0
11336 || ix86_current_function_calls_tls_descriptor
11337 || ix86_incoming_stack_boundary < 128))
11338 {
11339 crtl->preferred_stack_boundary = 128;
11340 crtl->stack_alignment_needed = 128;
11341 }
11342 /* preferred_stack_boundary is never updated for call
11343 expanded from tls descriptor. Update it here. We don't update it in
11344 expand stage because according to the comments before
11345 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
11346 away. */
11347 else if (ix86_current_function_calls_tls_descriptor
11348 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
11349 {
11350 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
11351 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
11352 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
11353 }
11354
11355 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
11356 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
11357
11358 gcc_assert (!size || stack_alignment_needed);
11359 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
11360 gcc_assert (preferred_alignment <= stack_alignment_needed);
11361
11362 /* For SEH we have to limit the amount of code movement into the prologue.
11363 At present we do this via a BLOCKAGE, at which point there's very little
11364 scheduling that can be done, which means that there's very little point
11365 in doing anything except PUSHs. */
11366 if (TARGET_SEH)
11367 cfun->machine->use_fast_prologue_epilogue = false;
11368
11369 /* During reload iteration the amount of registers saved can change.
11370 Recompute the value as needed. Do not recompute when amount of registers
11371 didn't change as reload does multiple calls to the function and does not
11372 expect the decision to change within single iteration. */
11373 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
11374 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
11375 {
11376 int count = frame->nregs;
11377 struct cgraph_node *node = cgraph_node::get (current_function_decl);
11378
11379 cfun->machine->use_fast_prologue_epilogue_nregs = count;
11380
11381 /* The fast prologue uses move instead of push to save registers. This
11382 is significantly longer, but also executes faster as modern hardware
11383 can execute the moves in parallel, but can't do that for push/pop.
11384
11385 Be careful about choosing what prologue to emit: When function takes
11386 many instructions to execute we may use slow version as well as in
11387 case function is known to be outside hot spot (this is known with
11388 feedback only). Weight the size of function by number of registers
11389 to save as it is cheap to use one or two push instructions but very
11390 slow to use many of them. */
11391 if (count)
11392 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
11393 if (node->frequency < NODE_FREQUENCY_NORMAL
11394 || (flag_branch_probabilities
11395 && node->frequency < NODE_FREQUENCY_HOT))
11396 cfun->machine->use_fast_prologue_epilogue = false;
11397 else
11398 cfun->machine->use_fast_prologue_epilogue
11399 = !expensive_function_p (count);
11400 }
11401
11402 frame->save_regs_using_mov
11403 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
11404 /* If static stack checking is enabled and done with probes,
11405 the registers need to be saved before allocating the frame. */
11406 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
11407
11408 /* Skip return address. */
11409 offset = UNITS_PER_WORD;
11410
11411 /* Skip pushed static chain. */
11412 if (ix86_static_chain_on_stack)
11413 offset += UNITS_PER_WORD;
11414
11415 /* Skip saved base pointer. */
11416 if (frame_pointer_needed)
11417 offset += UNITS_PER_WORD;
11418 frame->hfp_save_offset = offset;
11419
11420 /* The traditional frame pointer location is at the top of the frame. */
11421 frame->hard_frame_pointer_offset = offset;
11422
11423 /* Register save area */
11424 offset += frame->nregs * UNITS_PER_WORD;
11425 frame->reg_save_offset = offset;
11426
11427 /* On SEH target, registers are pushed just before the frame pointer
11428 location. */
11429 if (TARGET_SEH)
11430 frame->hard_frame_pointer_offset = offset;
11431
11432 /* Align and set SSE register save area. */
11433 if (frame->nsseregs)
11434 {
11435 /* The only ABI that has saved SSE registers (Win64) also has a
11436 16-byte aligned default stack, and thus we don't need to be
11437 within the re-aligned local stack frame to save them. In case
11438 incoming stack boundary is aligned to less than 16 bytes,
11439 unaligned move of SSE register will be emitted, so there is
11440 no point to round up the SSE register save area outside the
11441 re-aligned local stack frame to 16 bytes. */
11442 if (ix86_incoming_stack_boundary >= 128)
11443 offset = ROUND_UP (offset, 16);
11444 offset += frame->nsseregs * 16;
11445 }
11446 frame->sse_reg_save_offset = offset;
11447
11448 /* The re-aligned stack starts here. Values before this point are not
11449 directly comparable with values below this point. In order to make
11450 sure that no value happens to be the same before and after, force
11451 the alignment computation below to add a non-zero value. */
11452 if (stack_realign_fp)
11453 offset = ROUND_UP (offset, stack_alignment_needed);
11454
11455 /* Va-arg area */
11456 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
11457 offset += frame->va_arg_size;
11458
11459 /* Align start of frame for local function. */
11460 if (stack_realign_fp
11461 || offset != frame->sse_reg_save_offset
11462 || size != 0
11463 || !crtl->is_leaf
11464 || cfun->calls_alloca
11465 || ix86_current_function_calls_tls_descriptor)
11466 offset = ROUND_UP (offset, stack_alignment_needed);
11467
11468 /* Frame pointer points here. */
11469 frame->frame_pointer_offset = offset;
11470
11471 offset += size;
11472
11473 /* Add outgoing arguments area. Can be skipped if we eliminated
11474 all the function calls as dead code.
11475 Skipping is however impossible when function calls alloca. Alloca
11476 expander assumes that last crtl->outgoing_args_size
11477 of stack frame are unused. */
11478 if (ACCUMULATE_OUTGOING_ARGS
11479 && (!crtl->is_leaf || cfun->calls_alloca
11480 || ix86_current_function_calls_tls_descriptor))
11481 {
11482 offset += crtl->outgoing_args_size;
11483 frame->outgoing_arguments_size = crtl->outgoing_args_size;
11484 }
11485 else
11486 frame->outgoing_arguments_size = 0;
11487
11488 /* Align stack boundary. Only needed if we're calling another function
11489 or using alloca. */
11490 if (!crtl->is_leaf || cfun->calls_alloca
11491 || ix86_current_function_calls_tls_descriptor)
11492 offset = ROUND_UP (offset, preferred_alignment);
11493
11494 /* We've reached end of stack frame. */
11495 frame->stack_pointer_offset = offset;
11496
11497 /* Size prologue needs to allocate. */
11498 to_allocate = offset - frame->sse_reg_save_offset;
11499
11500 if ((!to_allocate && frame->nregs <= 1)
11501 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
11502 frame->save_regs_using_mov = false;
11503
11504 if (ix86_using_red_zone ()
11505 && crtl->sp_is_unchanging
11506 && crtl->is_leaf
11507 && !ix86_current_function_calls_tls_descriptor)
11508 {
11509 frame->red_zone_size = to_allocate;
11510 if (frame->save_regs_using_mov)
11511 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
11512 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
11513 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
11514 }
11515 else
11516 frame->red_zone_size = 0;
11517 frame->stack_pointer_offset -= frame->red_zone_size;
11518
11519 /* The SEH frame pointer location is near the bottom of the frame.
11520 This is enforced by the fact that the difference between the
11521 stack pointer and the frame pointer is limited to 240 bytes in
11522 the unwind data structure. */
11523 if (TARGET_SEH)
11524 {
11525 HOST_WIDE_INT diff;
11526
11527 /* If we can leave the frame pointer where it is, do so. Also, returns
11528 the establisher frame for __builtin_frame_address (0). */
11529 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
11530 if (diff <= SEH_MAX_FRAME_SIZE
11531 && (diff > 240 || (diff & 15) != 0)
11532 && !crtl->accesses_prior_frames)
11533 {
11534 /* Ideally we'd determine what portion of the local stack frame
11535 (within the constraint of the lowest 240) is most heavily used.
11536 But without that complication, simply bias the frame pointer
11537 by 128 bytes so as to maximize the amount of the local stack
11538 frame that is addressable with 8-bit offsets. */
11539 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
11540 }
11541 }
11542 }
11543
11544 /* This is semi-inlined memory_address_length, but simplified
11545 since we know that we're always dealing with reg+offset, and
11546 to avoid having to create and discard all that rtl. */
11547
11548 static inline int
11549 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
11550 {
11551 int len = 4;
11552
11553 if (offset == 0)
11554 {
11555 /* EBP and R13 cannot be encoded without an offset. */
11556 len = (regno == BP_REG || regno == R13_REG);
11557 }
11558 else if (IN_RANGE (offset, -128, 127))
11559 len = 1;
11560
11561 /* ESP and R12 must be encoded with a SIB byte. */
11562 if (regno == SP_REG || regno == R12_REG)
11563 len++;
11564
11565 return len;
11566 }
11567
11568 /* Return an RTX that points to CFA_OFFSET within the stack frame.
11569 The valid base registers are taken from CFUN->MACHINE->FS. */
11570
11571 static rtx
11572 choose_baseaddr (HOST_WIDE_INT cfa_offset)
11573 {
11574 const struct machine_function *m = cfun->machine;
11575 rtx base_reg = NULL;
11576 HOST_WIDE_INT base_offset = 0;
11577
11578 if (m->use_fast_prologue_epilogue)
11579 {
11580 /* Choose the base register most likely to allow the most scheduling
11581 opportunities. Generally FP is valid throughout the function,
11582 while DRAP must be reloaded within the epilogue. But choose either
11583 over the SP due to increased encoding size. */
11584
11585 if (m->fs.fp_valid)
11586 {
11587 base_reg = hard_frame_pointer_rtx;
11588 base_offset = m->fs.fp_offset - cfa_offset;
11589 }
11590 else if (m->fs.drap_valid)
11591 {
11592 base_reg = crtl->drap_reg;
11593 base_offset = 0 - cfa_offset;
11594 }
11595 else if (m->fs.sp_valid)
11596 {
11597 base_reg = stack_pointer_rtx;
11598 base_offset = m->fs.sp_offset - cfa_offset;
11599 }
11600 }
11601 else
11602 {
11603 HOST_WIDE_INT toffset;
11604 int len = 16, tlen;
11605
11606 /* Choose the base register with the smallest address encoding.
11607 With a tie, choose FP > DRAP > SP. */
11608 if (m->fs.sp_valid)
11609 {
11610 base_reg = stack_pointer_rtx;
11611 base_offset = m->fs.sp_offset - cfa_offset;
11612 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
11613 }
11614 if (m->fs.drap_valid)
11615 {
11616 toffset = 0 - cfa_offset;
11617 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
11618 if (tlen <= len)
11619 {
11620 base_reg = crtl->drap_reg;
11621 base_offset = toffset;
11622 len = tlen;
11623 }
11624 }
11625 if (m->fs.fp_valid)
11626 {
11627 toffset = m->fs.fp_offset - cfa_offset;
11628 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
11629 if (tlen <= len)
11630 {
11631 base_reg = hard_frame_pointer_rtx;
11632 base_offset = toffset;
11633 len = tlen;
11634 }
11635 }
11636 }
11637 gcc_assert (base_reg != NULL);
11638
11639 return plus_constant (Pmode, base_reg, base_offset);
11640 }
11641
11642 /* Emit code to save registers in the prologue. */
11643
11644 static void
11645 ix86_emit_save_regs (void)
11646 {
11647 unsigned int regno;
11648 rtx_insn *insn;
11649
11650 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
11651 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true))
11652 {
11653 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
11654 RTX_FRAME_RELATED_P (insn) = 1;
11655 }
11656 }
11657
11658 /* Emit a single register save at CFA - CFA_OFFSET. */
11659
11660 static void
11661 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
11662 HOST_WIDE_INT cfa_offset)
11663 {
11664 struct machine_function *m = cfun->machine;
11665 rtx reg = gen_rtx_REG (mode, regno);
11666 rtx unspec = NULL_RTX;
11667 rtx mem, addr, base, insn;
11668 unsigned int align;
11669
11670 addr = choose_baseaddr (cfa_offset);
11671 mem = gen_frame_mem (mode, addr);
11672
11673 /* The location is aligned up to INCOMING_STACK_BOUNDARY. */
11674 align = MIN (GET_MODE_ALIGNMENT (mode), INCOMING_STACK_BOUNDARY);
11675 set_mem_align (mem, align);
11676
11677 /* SSE saves are not within re-aligned local stack frame.
11678 In case INCOMING_STACK_BOUNDARY is misaligned, we have
11679 to emit unaligned store. */
11680 if (mode == V4SFmode && align < 128)
11681 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (1, reg), UNSPEC_STOREU);
11682
11683 insn = emit_insn (gen_rtx_SET (mem, unspec ? unspec : reg));
11684 RTX_FRAME_RELATED_P (insn) = 1;
11685
11686 base = addr;
11687 if (GET_CODE (base) == PLUS)
11688 base = XEXP (base, 0);
11689 gcc_checking_assert (REG_P (base));
11690
11691 /* When saving registers into a re-aligned local stack frame, avoid
11692 any tricky guessing by dwarf2out. */
11693 if (m->fs.realigned)
11694 {
11695 gcc_checking_assert (stack_realign_drap);
11696
11697 if (regno == REGNO (crtl->drap_reg))
11698 {
11699 /* A bit of a hack. We force the DRAP register to be saved in
11700 the re-aligned stack frame, which provides us with a copy
11701 of the CFA that will last past the prologue. Install it. */
11702 gcc_checking_assert (cfun->machine->fs.fp_valid);
11703 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
11704 cfun->machine->fs.fp_offset - cfa_offset);
11705 mem = gen_rtx_MEM (mode, addr);
11706 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
11707 }
11708 else
11709 {
11710 /* The frame pointer is a stable reference within the
11711 aligned frame. Use it. */
11712 gcc_checking_assert (cfun->machine->fs.fp_valid);
11713 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
11714 cfun->machine->fs.fp_offset - cfa_offset);
11715 mem = gen_rtx_MEM (mode, addr);
11716 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
11717 }
11718 }
11719
11720 /* The memory may not be relative to the current CFA register,
11721 which means that we may need to generate a new pattern for
11722 use by the unwind info. */
11723 else if (base != m->fs.cfa_reg)
11724 {
11725 addr = plus_constant (Pmode, m->fs.cfa_reg,
11726 m->fs.cfa_offset - cfa_offset);
11727 mem = gen_rtx_MEM (mode, addr);
11728 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
11729 }
11730 else if (unspec)
11731 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
11732 }
11733
11734 /* Emit code to save registers using MOV insns.
11735 First register is stored at CFA - CFA_OFFSET. */
11736 static void
11737 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
11738 {
11739 unsigned int regno;
11740
11741 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11742 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true))
11743 {
11744 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
11745 cfa_offset -= UNITS_PER_WORD;
11746 }
11747 }
11748
11749 /* Emit code to save SSE registers using MOV insns.
11750 First register is stored at CFA - CFA_OFFSET. */
11751 static void
11752 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
11753 {
11754 unsigned int regno;
11755
11756 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11757 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
11758 {
11759 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
11760 cfa_offset -= GET_MODE_SIZE (V4SFmode);
11761 }
11762 }
11763
11764 static GTY(()) rtx queued_cfa_restores;
11765
11766 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
11767 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
11768 Don't add the note if the previously saved value will be left untouched
11769 within stack red-zone till return, as unwinders can find the same value
11770 in the register and on the stack. */
11771
11772 static void
11773 ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
11774 {
11775 if (!crtl->shrink_wrapped
11776 && cfa_offset <= cfun->machine->fs.red_zone_offset)
11777 return;
11778
11779 if (insn)
11780 {
11781 add_reg_note (insn, REG_CFA_RESTORE, reg);
11782 RTX_FRAME_RELATED_P (insn) = 1;
11783 }
11784 else
11785 queued_cfa_restores
11786 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
11787 }
11788
11789 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
11790
11791 static void
11792 ix86_add_queued_cfa_restore_notes (rtx insn)
11793 {
11794 rtx last;
11795 if (!queued_cfa_restores)
11796 return;
11797 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
11798 ;
11799 XEXP (last, 1) = REG_NOTES (insn);
11800 REG_NOTES (insn) = queued_cfa_restores;
11801 queued_cfa_restores = NULL_RTX;
11802 RTX_FRAME_RELATED_P (insn) = 1;
11803 }
11804
11805 /* Expand prologue or epilogue stack adjustment.
11806 The pattern exist to put a dependency on all ebp-based memory accesses.
11807 STYLE should be negative if instructions should be marked as frame related,
11808 zero if %r11 register is live and cannot be freely used and positive
11809 otherwise. */
11810
11811 static void
11812 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
11813 int style, bool set_cfa)
11814 {
11815 struct machine_function *m = cfun->machine;
11816 rtx insn;
11817 bool add_frame_related_expr = false;
11818
11819 if (Pmode == SImode)
11820 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
11821 else if (x86_64_immediate_operand (offset, DImode))
11822 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
11823 else
11824 {
11825 rtx tmp;
11826 /* r11 is used by indirect sibcall return as well, set before the
11827 epilogue and used after the epilogue. */
11828 if (style)
11829 tmp = gen_rtx_REG (DImode, R11_REG);
11830 else
11831 {
11832 gcc_assert (src != hard_frame_pointer_rtx
11833 && dest != hard_frame_pointer_rtx);
11834 tmp = hard_frame_pointer_rtx;
11835 }
11836 insn = emit_insn (gen_rtx_SET (tmp, offset));
11837 if (style < 0)
11838 add_frame_related_expr = true;
11839
11840 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
11841 }
11842
11843 insn = emit_insn (insn);
11844 if (style >= 0)
11845 ix86_add_queued_cfa_restore_notes (insn);
11846
11847 if (set_cfa)
11848 {
11849 rtx r;
11850
11851 gcc_assert (m->fs.cfa_reg == src);
11852 m->fs.cfa_offset += INTVAL (offset);
11853 m->fs.cfa_reg = dest;
11854
11855 r = gen_rtx_PLUS (Pmode, src, offset);
11856 r = gen_rtx_SET (dest, r);
11857 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
11858 RTX_FRAME_RELATED_P (insn) = 1;
11859 }
11860 else if (style < 0)
11861 {
11862 RTX_FRAME_RELATED_P (insn) = 1;
11863 if (add_frame_related_expr)
11864 {
11865 rtx r = gen_rtx_PLUS (Pmode, src, offset);
11866 r = gen_rtx_SET (dest, r);
11867 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
11868 }
11869 }
11870
11871 if (dest == stack_pointer_rtx)
11872 {
11873 HOST_WIDE_INT ooffset = m->fs.sp_offset;
11874 bool valid = m->fs.sp_valid;
11875
11876 if (src == hard_frame_pointer_rtx)
11877 {
11878 valid = m->fs.fp_valid;
11879 ooffset = m->fs.fp_offset;
11880 }
11881 else if (src == crtl->drap_reg)
11882 {
11883 valid = m->fs.drap_valid;
11884 ooffset = 0;
11885 }
11886 else
11887 {
11888 /* Else there are two possibilities: SP itself, which we set
11889 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
11890 taken care of this by hand along the eh_return path. */
11891 gcc_checking_assert (src == stack_pointer_rtx
11892 || offset == const0_rtx);
11893 }
11894
11895 m->fs.sp_offset = ooffset - INTVAL (offset);
11896 m->fs.sp_valid = valid;
11897 }
11898 }
11899
11900 /* Find an available register to be used as dynamic realign argument
11901 pointer regsiter. Such a register will be written in prologue and
11902 used in begin of body, so it must not be
11903 1. parameter passing register.
11904 2. GOT pointer.
11905 We reuse static-chain register if it is available. Otherwise, we
11906 use DI for i386 and R13 for x86-64. We chose R13 since it has
11907 shorter encoding.
11908
11909 Return: the regno of chosen register. */
11910
11911 static unsigned int
11912 find_drap_reg (void)
11913 {
11914 tree decl = cfun->decl;
11915
11916 if (TARGET_64BIT)
11917 {
11918 /* Use R13 for nested function or function need static chain.
11919 Since function with tail call may use any caller-saved
11920 registers in epilogue, DRAP must not use caller-saved
11921 register in such case. */
11922 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
11923 return R13_REG;
11924
11925 return R10_REG;
11926 }
11927 else
11928 {
11929 /* Use DI for nested function or function need static chain.
11930 Since function with tail call may use any caller-saved
11931 registers in epilogue, DRAP must not use caller-saved
11932 register in such case. */
11933 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
11934 return DI_REG;
11935
11936 /* Reuse static chain register if it isn't used for parameter
11937 passing. */
11938 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
11939 {
11940 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
11941 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
11942 return CX_REG;
11943 }
11944 return DI_REG;
11945 }
11946 }
11947
11948 /* Handle a "force_align_arg_pointer" attribute. */
11949
11950 static tree
11951 ix86_handle_force_align_arg_pointer_attribute (tree *node, tree name,
11952 tree, int, bool *no_add_attrs)
11953 {
11954 if (TREE_CODE (*node) != FUNCTION_TYPE
11955 && TREE_CODE (*node) != METHOD_TYPE
11956 && TREE_CODE (*node) != FIELD_DECL
11957 && TREE_CODE (*node) != TYPE_DECL)
11958 {
11959 warning (OPT_Wattributes, "%qE attribute only applies to functions",
11960 name);
11961 *no_add_attrs = true;
11962 }
11963
11964 return NULL_TREE;
11965 }
11966
11967 /* Return minimum incoming stack alignment. */
11968
11969 static unsigned int
11970 ix86_minimum_incoming_stack_boundary (bool sibcall)
11971 {
11972 unsigned int incoming_stack_boundary;
11973
11974 /* Prefer the one specified at command line. */
11975 if (ix86_user_incoming_stack_boundary)
11976 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
11977 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
11978 if -mstackrealign is used, it isn't used for sibcall check and
11979 estimated stack alignment is 128bit. */
11980 else if (!sibcall
11981 && ix86_force_align_arg_pointer
11982 && crtl->stack_alignment_estimated == 128)
11983 incoming_stack_boundary = MIN_STACK_BOUNDARY;
11984 else
11985 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
11986
11987 /* Incoming stack alignment can be changed on individual functions
11988 via force_align_arg_pointer attribute. We use the smallest
11989 incoming stack boundary. */
11990 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
11991 && lookup_attribute (ix86_force_align_arg_pointer_string,
11992 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
11993 incoming_stack_boundary = MIN_STACK_BOUNDARY;
11994
11995 /* The incoming stack frame has to be aligned at least at
11996 parm_stack_boundary. */
11997 if (incoming_stack_boundary < crtl->parm_stack_boundary)
11998 incoming_stack_boundary = crtl->parm_stack_boundary;
11999
12000 /* Stack at entrance of main is aligned by runtime. We use the
12001 smallest incoming stack boundary. */
12002 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
12003 && DECL_NAME (current_function_decl)
12004 && MAIN_NAME_P (DECL_NAME (current_function_decl))
12005 && DECL_FILE_SCOPE_P (current_function_decl))
12006 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
12007
12008 return incoming_stack_boundary;
12009 }
12010
12011 /* Update incoming stack boundary and estimated stack alignment. */
12012
12013 static void
12014 ix86_update_stack_boundary (void)
12015 {
12016 ix86_incoming_stack_boundary
12017 = ix86_minimum_incoming_stack_boundary (false);
12018
12019 /* x86_64 vararg needs 16byte stack alignment for register save
12020 area. */
12021 if (TARGET_64BIT
12022 && cfun->stdarg
12023 && crtl->stack_alignment_estimated < 128)
12024 crtl->stack_alignment_estimated = 128;
12025 }
12026
12027 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
12028 needed or an rtx for DRAP otherwise. */
12029
12030 static rtx
12031 ix86_get_drap_rtx (void)
12032 {
12033 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
12034 crtl->need_drap = true;
12035
12036 if (stack_realign_drap)
12037 {
12038 /* Assign DRAP to vDRAP and returns vDRAP */
12039 unsigned int regno = find_drap_reg ();
12040 rtx drap_vreg;
12041 rtx arg_ptr;
12042 rtx_insn *seq, *insn;
12043
12044 arg_ptr = gen_rtx_REG (Pmode, regno);
12045 crtl->drap_reg = arg_ptr;
12046
12047 start_sequence ();
12048 drap_vreg = copy_to_reg (arg_ptr);
12049 seq = get_insns ();
12050 end_sequence ();
12051
12052 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
12053 if (!optimize)
12054 {
12055 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
12056 RTX_FRAME_RELATED_P (insn) = 1;
12057 }
12058 return drap_vreg;
12059 }
12060 else
12061 return NULL;
12062 }
12063
12064 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
12065
12066 static rtx
12067 ix86_internal_arg_pointer (void)
12068 {
12069 return virtual_incoming_args_rtx;
12070 }
12071
12072 struct scratch_reg {
12073 rtx reg;
12074 bool saved;
12075 };
12076
12077 /* Return a short-lived scratch register for use on function entry.
12078 In 32-bit mode, it is valid only after the registers are saved
12079 in the prologue. This register must be released by means of
12080 release_scratch_register_on_entry once it is dead. */
12081
12082 static void
12083 get_scratch_register_on_entry (struct scratch_reg *sr)
12084 {
12085 int regno;
12086
12087 sr->saved = false;
12088
12089 if (TARGET_64BIT)
12090 {
12091 /* We always use R11 in 64-bit mode. */
12092 regno = R11_REG;
12093 }
12094 else
12095 {
12096 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
12097 bool fastcall_p
12098 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
12099 bool thiscall_p
12100 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
12101 bool static_chain_p = DECL_STATIC_CHAIN (decl);
12102 int regparm = ix86_function_regparm (fntype, decl);
12103 int drap_regno
12104 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
12105
12106 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
12107 for the static chain register. */
12108 if ((regparm < 1 || (fastcall_p && !static_chain_p))
12109 && drap_regno != AX_REG)
12110 regno = AX_REG;
12111 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
12112 for the static chain register. */
12113 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
12114 regno = AX_REG;
12115 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
12116 regno = DX_REG;
12117 /* ecx is the static chain register. */
12118 else if (regparm < 3 && !fastcall_p && !thiscall_p
12119 && !static_chain_p
12120 && drap_regno != CX_REG)
12121 regno = CX_REG;
12122 else if (ix86_save_reg (BX_REG, true))
12123 regno = BX_REG;
12124 /* esi is the static chain register. */
12125 else if (!(regparm == 3 && static_chain_p)
12126 && ix86_save_reg (SI_REG, true))
12127 regno = SI_REG;
12128 else if (ix86_save_reg (DI_REG, true))
12129 regno = DI_REG;
12130 else
12131 {
12132 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
12133 sr->saved = true;
12134 }
12135 }
12136
12137 sr->reg = gen_rtx_REG (Pmode, regno);
12138 if (sr->saved)
12139 {
12140 rtx_insn *insn = emit_insn (gen_push (sr->reg));
12141 RTX_FRAME_RELATED_P (insn) = 1;
12142 }
12143 }
12144
12145 /* Release a scratch register obtained from the preceding function. */
12146
12147 static void
12148 release_scratch_register_on_entry (struct scratch_reg *sr)
12149 {
12150 if (sr->saved)
12151 {
12152 struct machine_function *m = cfun->machine;
12153 rtx x, insn = emit_insn (gen_pop (sr->reg));
12154
12155 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
12156 RTX_FRAME_RELATED_P (insn) = 1;
12157 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
12158 x = gen_rtx_SET (stack_pointer_rtx, x);
12159 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
12160 m->fs.sp_offset -= UNITS_PER_WORD;
12161 }
12162 }
12163
12164 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
12165
12166 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
12167
12168 static void
12169 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
12170 {
12171 /* We skip the probe for the first interval + a small dope of 4 words and
12172 probe that many bytes past the specified size to maintain a protection
12173 area at the botton of the stack. */
12174 const int dope = 4 * UNITS_PER_WORD;
12175 rtx size_rtx = GEN_INT (size), last;
12176
12177 /* See if we have a constant small number of probes to generate. If so,
12178 that's the easy case. The run-time loop is made up of 9 insns in the
12179 generic case while the compile-time loop is made up of 3+2*(n-1) insns
12180 for n # of intervals. */
12181 if (size <= 4 * PROBE_INTERVAL)
12182 {
12183 HOST_WIDE_INT i, adjust;
12184 bool first_probe = true;
12185
12186 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
12187 values of N from 1 until it exceeds SIZE. If only one probe is
12188 needed, this will not generate any code. Then adjust and probe
12189 to PROBE_INTERVAL + SIZE. */
12190 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
12191 {
12192 if (first_probe)
12193 {
12194 adjust = 2 * PROBE_INTERVAL + dope;
12195 first_probe = false;
12196 }
12197 else
12198 adjust = PROBE_INTERVAL;
12199
12200 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12201 plus_constant (Pmode, stack_pointer_rtx,
12202 -adjust)));
12203 emit_stack_probe (stack_pointer_rtx);
12204 }
12205
12206 if (first_probe)
12207 adjust = size + PROBE_INTERVAL + dope;
12208 else
12209 adjust = size + PROBE_INTERVAL - i;
12210
12211 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12212 plus_constant (Pmode, stack_pointer_rtx,
12213 -adjust)));
12214 emit_stack_probe (stack_pointer_rtx);
12215
12216 /* Adjust back to account for the additional first interval. */
12217 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
12218 plus_constant (Pmode, stack_pointer_rtx,
12219 PROBE_INTERVAL + dope)));
12220 }
12221
12222 /* Otherwise, do the same as above, but in a loop. Note that we must be
12223 extra careful with variables wrapping around because we might be at
12224 the very top (or the very bottom) of the address space and we have
12225 to be able to handle this case properly; in particular, we use an
12226 equality test for the loop condition. */
12227 else
12228 {
12229 HOST_WIDE_INT rounded_size;
12230 struct scratch_reg sr;
12231
12232 get_scratch_register_on_entry (&sr);
12233
12234
12235 /* Step 1: round SIZE to the previous multiple of the interval. */
12236
12237 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
12238
12239
12240 /* Step 2: compute initial and final value of the loop counter. */
12241
12242 /* SP = SP_0 + PROBE_INTERVAL. */
12243 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12244 plus_constant (Pmode, stack_pointer_rtx,
12245 - (PROBE_INTERVAL + dope))));
12246
12247 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
12248 if (rounded_size <= (HOST_WIDE_INT_1 << 31))
12249 emit_insn (gen_rtx_SET (sr.reg,
12250 plus_constant (Pmode, stack_pointer_rtx,
12251 -rounded_size)));
12252 else
12253 {
12254 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
12255 emit_insn (gen_rtx_SET (sr.reg,
12256 gen_rtx_PLUS (Pmode, sr.reg,
12257 stack_pointer_rtx)));
12258 }
12259
12260
12261 /* Step 3: the loop
12262
12263 do
12264 {
12265 SP = SP + PROBE_INTERVAL
12266 probe at SP
12267 }
12268 while (SP != LAST_ADDR)
12269
12270 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
12271 values of N from 1 until it is equal to ROUNDED_SIZE. */
12272
12273 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
12274
12275
12276 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
12277 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
12278
12279 if (size != rounded_size)
12280 {
12281 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12282 plus_constant (Pmode, stack_pointer_rtx,
12283 rounded_size - size)));
12284 emit_stack_probe (stack_pointer_rtx);
12285 }
12286
12287 /* Adjust back to account for the additional first interval. */
12288 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
12289 plus_constant (Pmode, stack_pointer_rtx,
12290 PROBE_INTERVAL + dope)));
12291
12292 release_scratch_register_on_entry (&sr);
12293 }
12294
12295 /* Even if the stack pointer isn't the CFA register, we need to correctly
12296 describe the adjustments made to it, in particular differentiate the
12297 frame-related ones from the frame-unrelated ones. */
12298 if (size > 0)
12299 {
12300 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
12301 XVECEXP (expr, 0, 0)
12302 = gen_rtx_SET (stack_pointer_rtx,
12303 plus_constant (Pmode, stack_pointer_rtx, -size));
12304 XVECEXP (expr, 0, 1)
12305 = gen_rtx_SET (stack_pointer_rtx,
12306 plus_constant (Pmode, stack_pointer_rtx,
12307 PROBE_INTERVAL + dope + size));
12308 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
12309 RTX_FRAME_RELATED_P (last) = 1;
12310
12311 cfun->machine->fs.sp_offset += size;
12312 }
12313
12314 /* Make sure nothing is scheduled before we are done. */
12315 emit_insn (gen_blockage ());
12316 }
12317
12318 /* Adjust the stack pointer up to REG while probing it. */
12319
12320 const char *
12321 output_adjust_stack_and_probe (rtx reg)
12322 {
12323 static int labelno = 0;
12324 char loop_lab[32];
12325 rtx xops[2];
12326
12327 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
12328
12329 /* Loop. */
12330 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
12331
12332 /* SP = SP + PROBE_INTERVAL. */
12333 xops[0] = stack_pointer_rtx;
12334 xops[1] = GEN_INT (PROBE_INTERVAL);
12335 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
12336
12337 /* Probe at SP. */
12338 xops[1] = const0_rtx;
12339 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
12340
12341 /* Test if SP == LAST_ADDR. */
12342 xops[0] = stack_pointer_rtx;
12343 xops[1] = reg;
12344 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
12345
12346 /* Branch. */
12347 fputs ("\tjne\t", asm_out_file);
12348 assemble_name_raw (asm_out_file, loop_lab);
12349 fputc ('\n', asm_out_file);
12350
12351 return "";
12352 }
12353
12354 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
12355 inclusive. These are offsets from the current stack pointer. */
12356
12357 static void
12358 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
12359 {
12360 /* See if we have a constant small number of probes to generate. If so,
12361 that's the easy case. The run-time loop is made up of 6 insns in the
12362 generic case while the compile-time loop is made up of n insns for n #
12363 of intervals. */
12364 if (size <= 6 * PROBE_INTERVAL)
12365 {
12366 HOST_WIDE_INT i;
12367
12368 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
12369 it exceeds SIZE. If only one probe is needed, this will not
12370 generate any code. Then probe at FIRST + SIZE. */
12371 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
12372 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
12373 -(first + i)));
12374
12375 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
12376 -(first + size)));
12377 }
12378
12379 /* Otherwise, do the same as above, but in a loop. Note that we must be
12380 extra careful with variables wrapping around because we might be at
12381 the very top (or the very bottom) of the address space and we have
12382 to be able to handle this case properly; in particular, we use an
12383 equality test for the loop condition. */
12384 else
12385 {
12386 HOST_WIDE_INT rounded_size, last;
12387 struct scratch_reg sr;
12388
12389 get_scratch_register_on_entry (&sr);
12390
12391
12392 /* Step 1: round SIZE to the previous multiple of the interval. */
12393
12394 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
12395
12396
12397 /* Step 2: compute initial and final value of the loop counter. */
12398
12399 /* TEST_OFFSET = FIRST. */
12400 emit_move_insn (sr.reg, GEN_INT (-first));
12401
12402 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
12403 last = first + rounded_size;
12404
12405
12406 /* Step 3: the loop
12407
12408 do
12409 {
12410 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
12411 probe at TEST_ADDR
12412 }
12413 while (TEST_ADDR != LAST_ADDR)
12414
12415 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
12416 until it is equal to ROUNDED_SIZE. */
12417
12418 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
12419
12420
12421 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
12422 that SIZE is equal to ROUNDED_SIZE. */
12423
12424 if (size != rounded_size)
12425 emit_stack_probe (plus_constant (Pmode,
12426 gen_rtx_PLUS (Pmode,
12427 stack_pointer_rtx,
12428 sr.reg),
12429 rounded_size - size));
12430
12431 release_scratch_register_on_entry (&sr);
12432 }
12433
12434 /* Make sure nothing is scheduled before we are done. */
12435 emit_insn (gen_blockage ());
12436 }
12437
12438 /* Probe a range of stack addresses from REG to END, inclusive. These are
12439 offsets from the current stack pointer. */
12440
12441 const char *
12442 output_probe_stack_range (rtx reg, rtx end)
12443 {
12444 static int labelno = 0;
12445 char loop_lab[32];
12446 rtx xops[3];
12447
12448 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
12449
12450 /* Loop. */
12451 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
12452
12453 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
12454 xops[0] = reg;
12455 xops[1] = GEN_INT (PROBE_INTERVAL);
12456 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
12457
12458 /* Probe at TEST_ADDR. */
12459 xops[0] = stack_pointer_rtx;
12460 xops[1] = reg;
12461 xops[2] = const0_rtx;
12462 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
12463
12464 /* Test if TEST_ADDR == LAST_ADDR. */
12465 xops[0] = reg;
12466 xops[1] = end;
12467 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
12468
12469 /* Branch. */
12470 fputs ("\tjne\t", asm_out_file);
12471 assemble_name_raw (asm_out_file, loop_lab);
12472 fputc ('\n', asm_out_file);
12473
12474 return "";
12475 }
12476
12477 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
12478 to be generated in correct form. */
12479 static void
12480 ix86_finalize_stack_realign_flags (void)
12481 {
12482 /* Check if stack realign is really needed after reload, and
12483 stores result in cfun */
12484 unsigned int incoming_stack_boundary
12485 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
12486 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
12487 unsigned int stack_realign = (incoming_stack_boundary
12488 < (crtl->is_leaf
12489 ? crtl->max_used_stack_slot_alignment
12490 : crtl->stack_alignment_needed));
12491
12492 if (crtl->stack_realign_finalized)
12493 {
12494 /* After stack_realign_needed is finalized, we can't no longer
12495 change it. */
12496 gcc_assert (crtl->stack_realign_needed == stack_realign);
12497 return;
12498 }
12499
12500 /* If the only reason for frame_pointer_needed is that we conservatively
12501 assumed stack realignment might be needed, but in the end nothing that
12502 needed the stack alignment had been spilled, clear frame_pointer_needed
12503 and say we don't need stack realignment. */
12504 if (stack_realign
12505 && frame_pointer_needed
12506 && crtl->is_leaf
12507 && flag_omit_frame_pointer
12508 && crtl->sp_is_unchanging
12509 && !ix86_current_function_calls_tls_descriptor
12510 && !crtl->accesses_prior_frames
12511 && !cfun->calls_alloca
12512 && !crtl->calls_eh_return
12513 /* See ira_setup_eliminable_regset for the rationale. */
12514 && !(STACK_CHECK_MOVING_SP
12515 && flag_stack_check
12516 && flag_exceptions
12517 && cfun->can_throw_non_call_exceptions)
12518 && !ix86_frame_pointer_required ()
12519 && get_frame_size () == 0
12520 && ix86_nsaved_sseregs () == 0
12521 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
12522 {
12523 HARD_REG_SET set_up_by_prologue, prologue_used;
12524 basic_block bb;
12525
12526 CLEAR_HARD_REG_SET (prologue_used);
12527 CLEAR_HARD_REG_SET (set_up_by_prologue);
12528 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
12529 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
12530 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
12531 HARD_FRAME_POINTER_REGNUM);
12532 FOR_EACH_BB_FN (bb, cfun)
12533 {
12534 rtx_insn *insn;
12535 FOR_BB_INSNS (bb, insn)
12536 if (NONDEBUG_INSN_P (insn)
12537 && requires_stack_frame_p (insn, prologue_used,
12538 set_up_by_prologue))
12539 {
12540 crtl->stack_realign_needed = stack_realign;
12541 crtl->stack_realign_finalized = true;
12542 return;
12543 }
12544 }
12545
12546 /* If drap has been set, but it actually isn't live at the start
12547 of the function, there is no reason to set it up. */
12548 if (crtl->drap_reg)
12549 {
12550 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
12551 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
12552 {
12553 crtl->drap_reg = NULL_RTX;
12554 crtl->need_drap = false;
12555 }
12556 }
12557 else
12558 cfun->machine->no_drap_save_restore = true;
12559
12560 frame_pointer_needed = false;
12561 stack_realign = false;
12562 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
12563 crtl->stack_alignment_needed = incoming_stack_boundary;
12564 crtl->stack_alignment_estimated = incoming_stack_boundary;
12565 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
12566 crtl->preferred_stack_boundary = incoming_stack_boundary;
12567 df_finish_pass (true);
12568 df_scan_alloc (NULL);
12569 df_scan_blocks ();
12570 df_compute_regs_ever_live (true);
12571 df_analyze ();
12572 }
12573
12574 crtl->stack_realign_needed = stack_realign;
12575 crtl->stack_realign_finalized = true;
12576 }
12577
12578 /* Delete SET_GOT right after entry block if it is allocated to reg. */
12579
12580 static void
12581 ix86_elim_entry_set_got (rtx reg)
12582 {
12583 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
12584 rtx_insn *c_insn = BB_HEAD (bb);
12585 if (!NONDEBUG_INSN_P (c_insn))
12586 c_insn = next_nonnote_nondebug_insn (c_insn);
12587 if (c_insn && NONJUMP_INSN_P (c_insn))
12588 {
12589 rtx pat = PATTERN (c_insn);
12590 if (GET_CODE (pat) == PARALLEL)
12591 {
12592 rtx vec = XVECEXP (pat, 0, 0);
12593 if (GET_CODE (vec) == SET
12594 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
12595 && REGNO (XEXP (vec, 0)) == REGNO (reg))
12596 delete_insn (c_insn);
12597 }
12598 }
12599 }
12600
12601 /* Expand the prologue into a bunch of separate insns. */
12602
12603 void
12604 ix86_expand_prologue (void)
12605 {
12606 struct machine_function *m = cfun->machine;
12607 rtx insn, t;
12608 struct ix86_frame frame;
12609 HOST_WIDE_INT allocate;
12610 bool int_registers_saved;
12611 bool sse_registers_saved;
12612 rtx static_chain = NULL_RTX;
12613
12614 ix86_finalize_stack_realign_flags ();
12615
12616 /* DRAP should not coexist with stack_realign_fp */
12617 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
12618
12619 memset (&m->fs, 0, sizeof (m->fs));
12620
12621 /* Initialize CFA state for before the prologue. */
12622 m->fs.cfa_reg = stack_pointer_rtx;
12623 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
12624
12625 /* Track SP offset to the CFA. We continue tracking this after we've
12626 swapped the CFA register away from SP. In the case of re-alignment
12627 this is fudged; we're interested to offsets within the local frame. */
12628 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
12629 m->fs.sp_valid = true;
12630
12631 ix86_compute_frame_layout (&frame);
12632
12633 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
12634 {
12635 /* We should have already generated an error for any use of
12636 ms_hook on a nested function. */
12637 gcc_checking_assert (!ix86_static_chain_on_stack);
12638
12639 /* Check if profiling is active and we shall use profiling before
12640 prologue variant. If so sorry. */
12641 if (crtl->profile && flag_fentry != 0)
12642 sorry ("ms_hook_prologue attribute isn%'t compatible "
12643 "with -mfentry for 32-bit");
12644
12645 /* In ix86_asm_output_function_label we emitted:
12646 8b ff movl.s %edi,%edi
12647 55 push %ebp
12648 8b ec movl.s %esp,%ebp
12649
12650 This matches the hookable function prologue in Win32 API
12651 functions in Microsoft Windows XP Service Pack 2 and newer.
12652 Wine uses this to enable Windows apps to hook the Win32 API
12653 functions provided by Wine.
12654
12655 What that means is that we've already set up the frame pointer. */
12656
12657 if (frame_pointer_needed
12658 && !(crtl->drap_reg && crtl->stack_realign_needed))
12659 {
12660 rtx push, mov;
12661
12662 /* We've decided to use the frame pointer already set up.
12663 Describe this to the unwinder by pretending that both
12664 push and mov insns happen right here.
12665
12666 Putting the unwind info here at the end of the ms_hook
12667 is done so that we can make absolutely certain we get
12668 the required byte sequence at the start of the function,
12669 rather than relying on an assembler that can produce
12670 the exact encoding required.
12671
12672 However it does mean (in the unpatched case) that we have
12673 a 1 insn window where the asynchronous unwind info is
12674 incorrect. However, if we placed the unwind info at
12675 its correct location we would have incorrect unwind info
12676 in the patched case. Which is probably all moot since
12677 I don't expect Wine generates dwarf2 unwind info for the
12678 system libraries that use this feature. */
12679
12680 insn = emit_insn (gen_blockage ());
12681
12682 push = gen_push (hard_frame_pointer_rtx);
12683 mov = gen_rtx_SET (hard_frame_pointer_rtx,
12684 stack_pointer_rtx);
12685 RTX_FRAME_RELATED_P (push) = 1;
12686 RTX_FRAME_RELATED_P (mov) = 1;
12687
12688 RTX_FRAME_RELATED_P (insn) = 1;
12689 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
12690 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
12691
12692 /* Note that gen_push incremented m->fs.cfa_offset, even
12693 though we didn't emit the push insn here. */
12694 m->fs.cfa_reg = hard_frame_pointer_rtx;
12695 m->fs.fp_offset = m->fs.cfa_offset;
12696 m->fs.fp_valid = true;
12697 }
12698 else
12699 {
12700 /* The frame pointer is not needed so pop %ebp again.
12701 This leaves us with a pristine state. */
12702 emit_insn (gen_pop (hard_frame_pointer_rtx));
12703 }
12704 }
12705
12706 /* The first insn of a function that accepts its static chain on the
12707 stack is to push the register that would be filled in by a direct
12708 call. This insn will be skipped by the trampoline. */
12709 else if (ix86_static_chain_on_stack)
12710 {
12711 static_chain = ix86_static_chain (cfun->decl, false);
12712 insn = emit_insn (gen_push (static_chain));
12713 emit_insn (gen_blockage ());
12714
12715 /* We don't want to interpret this push insn as a register save,
12716 only as a stack adjustment. The real copy of the register as
12717 a save will be done later, if needed. */
12718 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
12719 t = gen_rtx_SET (stack_pointer_rtx, t);
12720 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
12721 RTX_FRAME_RELATED_P (insn) = 1;
12722 }
12723
12724 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
12725 of DRAP is needed and stack realignment is really needed after reload */
12726 if (stack_realign_drap)
12727 {
12728 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
12729
12730 /* Only need to push parameter pointer reg if it is caller saved. */
12731 if (!call_used_regs[REGNO (crtl->drap_reg)])
12732 {
12733 /* Push arg pointer reg */
12734 insn = emit_insn (gen_push (crtl->drap_reg));
12735 RTX_FRAME_RELATED_P (insn) = 1;
12736 }
12737
12738 /* Grab the argument pointer. */
12739 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
12740 insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
12741 RTX_FRAME_RELATED_P (insn) = 1;
12742 m->fs.cfa_reg = crtl->drap_reg;
12743 m->fs.cfa_offset = 0;
12744
12745 /* Align the stack. */
12746 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
12747 stack_pointer_rtx,
12748 GEN_INT (-align_bytes)));
12749 RTX_FRAME_RELATED_P (insn) = 1;
12750
12751 /* Replicate the return address on the stack so that return
12752 address can be reached via (argp - 1) slot. This is needed
12753 to implement macro RETURN_ADDR_RTX and intrinsic function
12754 expand_builtin_return_addr etc. */
12755 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
12756 t = gen_frame_mem (word_mode, t);
12757 insn = emit_insn (gen_push (t));
12758 RTX_FRAME_RELATED_P (insn) = 1;
12759
12760 /* For the purposes of frame and register save area addressing,
12761 we've started over with a new frame. */
12762 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
12763 m->fs.realigned = true;
12764
12765 if (static_chain)
12766 {
12767 /* Replicate static chain on the stack so that static chain
12768 can be reached via (argp - 2) slot. This is needed for
12769 nested function with stack realignment. */
12770 insn = emit_insn (gen_push (static_chain));
12771 RTX_FRAME_RELATED_P (insn) = 1;
12772 }
12773 }
12774
12775 int_registers_saved = (frame.nregs == 0);
12776 sse_registers_saved = (frame.nsseregs == 0);
12777
12778 if (frame_pointer_needed && !m->fs.fp_valid)
12779 {
12780 /* Note: AT&T enter does NOT have reversed args. Enter is probably
12781 slower on all targets. Also sdb doesn't like it. */
12782 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
12783 RTX_FRAME_RELATED_P (insn) = 1;
12784
12785 /* Push registers now, before setting the frame pointer
12786 on SEH target. */
12787 if (!int_registers_saved
12788 && TARGET_SEH
12789 && !frame.save_regs_using_mov)
12790 {
12791 ix86_emit_save_regs ();
12792 int_registers_saved = true;
12793 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
12794 }
12795
12796 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
12797 {
12798 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
12799 RTX_FRAME_RELATED_P (insn) = 1;
12800
12801 if (m->fs.cfa_reg == stack_pointer_rtx)
12802 m->fs.cfa_reg = hard_frame_pointer_rtx;
12803 m->fs.fp_offset = m->fs.sp_offset;
12804 m->fs.fp_valid = true;
12805 }
12806 }
12807
12808 if (!int_registers_saved)
12809 {
12810 /* If saving registers via PUSH, do so now. */
12811 if (!frame.save_regs_using_mov)
12812 {
12813 ix86_emit_save_regs ();
12814 int_registers_saved = true;
12815 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
12816 }
12817
12818 /* When using red zone we may start register saving before allocating
12819 the stack frame saving one cycle of the prologue. However, avoid
12820 doing this if we have to probe the stack; at least on x86_64 the
12821 stack probe can turn into a call that clobbers a red zone location. */
12822 else if (ix86_using_red_zone ()
12823 && (! TARGET_STACK_PROBE
12824 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
12825 {
12826 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
12827 int_registers_saved = true;
12828 }
12829 }
12830
12831 if (stack_realign_fp)
12832 {
12833 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
12834 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
12835
12836 /* The computation of the size of the re-aligned stack frame means
12837 that we must allocate the size of the register save area before
12838 performing the actual alignment. Otherwise we cannot guarantee
12839 that there's enough storage above the realignment point. */
12840 if (m->fs.sp_offset != frame.sse_reg_save_offset)
12841 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12842 GEN_INT (m->fs.sp_offset
12843 - frame.sse_reg_save_offset),
12844 -1, false);
12845
12846 /* Align the stack. */
12847 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
12848 stack_pointer_rtx,
12849 GEN_INT (-align_bytes)));
12850
12851 /* For the purposes of register save area addressing, the stack
12852 pointer is no longer valid. As for the value of sp_offset,
12853 see ix86_compute_frame_layout, which we need to match in order
12854 to pass verification of stack_pointer_offset at the end. */
12855 m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes);
12856 m->fs.sp_valid = false;
12857 }
12858
12859 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
12860
12861 if (flag_stack_usage_info)
12862 {
12863 /* We start to count from ARG_POINTER. */
12864 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
12865
12866 /* If it was realigned, take into account the fake frame. */
12867 if (stack_realign_drap)
12868 {
12869 if (ix86_static_chain_on_stack)
12870 stack_size += UNITS_PER_WORD;
12871
12872 if (!call_used_regs[REGNO (crtl->drap_reg)])
12873 stack_size += UNITS_PER_WORD;
12874
12875 /* This over-estimates by 1 minimal-stack-alignment-unit but
12876 mitigates that by counting in the new return address slot. */
12877 current_function_dynamic_stack_size
12878 += crtl->stack_alignment_needed / BITS_PER_UNIT;
12879 }
12880
12881 current_function_static_stack_size = stack_size;
12882 }
12883
12884 /* On SEH target with very large frame size, allocate an area to save
12885 SSE registers (as the very large allocation won't be described). */
12886 if (TARGET_SEH
12887 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
12888 && !sse_registers_saved)
12889 {
12890 HOST_WIDE_INT sse_size =
12891 frame.sse_reg_save_offset - frame.reg_save_offset;
12892
12893 gcc_assert (int_registers_saved);
12894
12895 /* No need to do stack checking as the area will be immediately
12896 written. */
12897 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12898 GEN_INT (-sse_size), -1,
12899 m->fs.cfa_reg == stack_pointer_rtx);
12900 allocate -= sse_size;
12901 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
12902 sse_registers_saved = true;
12903 }
12904
12905 /* The stack has already been decremented by the instruction calling us
12906 so probe if the size is non-negative to preserve the protection area. */
12907 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
12908 {
12909 /* We expect the registers to be saved when probes are used. */
12910 gcc_assert (int_registers_saved);
12911
12912 if (STACK_CHECK_MOVING_SP)
12913 {
12914 if (!(crtl->is_leaf && !cfun->calls_alloca
12915 && allocate <= PROBE_INTERVAL))
12916 {
12917 ix86_adjust_stack_and_probe (allocate);
12918 allocate = 0;
12919 }
12920 }
12921 else
12922 {
12923 HOST_WIDE_INT size = allocate;
12924
12925 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
12926 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
12927
12928 if (TARGET_STACK_PROBE)
12929 {
12930 if (crtl->is_leaf && !cfun->calls_alloca)
12931 {
12932 if (size > PROBE_INTERVAL)
12933 ix86_emit_probe_stack_range (0, size);
12934 }
12935 else
12936 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
12937 }
12938 else
12939 {
12940 if (crtl->is_leaf && !cfun->calls_alloca)
12941 {
12942 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
12943 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
12944 size - STACK_CHECK_PROTECT);
12945 }
12946 else
12947 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
12948 }
12949 }
12950 }
12951
12952 if (allocate == 0)
12953 ;
12954 else if (!ix86_target_stack_probe ()
12955 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
12956 {
12957 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12958 GEN_INT (-allocate), -1,
12959 m->fs.cfa_reg == stack_pointer_rtx);
12960 }
12961 else
12962 {
12963 rtx eax = gen_rtx_REG (Pmode, AX_REG);
12964 rtx r10 = NULL;
12965 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
12966 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
12967 bool eax_live = ix86_eax_live_at_start_p ();
12968 bool r10_live = false;
12969
12970 if (TARGET_64BIT)
12971 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
12972
12973 if (eax_live)
12974 {
12975 insn = emit_insn (gen_push (eax));
12976 allocate -= UNITS_PER_WORD;
12977 /* Note that SEH directives need to continue tracking the stack
12978 pointer even after the frame pointer has been set up. */
12979 if (sp_is_cfa_reg || TARGET_SEH)
12980 {
12981 if (sp_is_cfa_reg)
12982 m->fs.cfa_offset += UNITS_PER_WORD;
12983 RTX_FRAME_RELATED_P (insn) = 1;
12984 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
12985 gen_rtx_SET (stack_pointer_rtx,
12986 plus_constant (Pmode, stack_pointer_rtx,
12987 -UNITS_PER_WORD)));
12988 }
12989 }
12990
12991 if (r10_live)
12992 {
12993 r10 = gen_rtx_REG (Pmode, R10_REG);
12994 insn = emit_insn (gen_push (r10));
12995 allocate -= UNITS_PER_WORD;
12996 if (sp_is_cfa_reg || TARGET_SEH)
12997 {
12998 if (sp_is_cfa_reg)
12999 m->fs.cfa_offset += UNITS_PER_WORD;
13000 RTX_FRAME_RELATED_P (insn) = 1;
13001 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
13002 gen_rtx_SET (stack_pointer_rtx,
13003 plus_constant (Pmode, stack_pointer_rtx,
13004 -UNITS_PER_WORD)));
13005 }
13006 }
13007
13008 emit_move_insn (eax, GEN_INT (allocate));
13009 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
13010
13011 /* Use the fact that AX still contains ALLOCATE. */
13012 adjust_stack_insn = (Pmode == DImode
13013 ? gen_pro_epilogue_adjust_stack_di_sub
13014 : gen_pro_epilogue_adjust_stack_si_sub);
13015
13016 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
13017 stack_pointer_rtx, eax));
13018
13019 if (sp_is_cfa_reg || TARGET_SEH)
13020 {
13021 if (sp_is_cfa_reg)
13022 m->fs.cfa_offset += allocate;
13023 RTX_FRAME_RELATED_P (insn) = 1;
13024 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
13025 gen_rtx_SET (stack_pointer_rtx,
13026 plus_constant (Pmode, stack_pointer_rtx,
13027 -allocate)));
13028 }
13029 m->fs.sp_offset += allocate;
13030
13031 /* Use stack_pointer_rtx for relative addressing so that code
13032 works for realigned stack, too. */
13033 if (r10_live && eax_live)
13034 {
13035 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
13036 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
13037 gen_frame_mem (word_mode, t));
13038 t = plus_constant (Pmode, t, UNITS_PER_WORD);
13039 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
13040 gen_frame_mem (word_mode, t));
13041 }
13042 else if (eax_live || r10_live)
13043 {
13044 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
13045 emit_move_insn (gen_rtx_REG (word_mode,
13046 (eax_live ? AX_REG : R10_REG)),
13047 gen_frame_mem (word_mode, t));
13048 }
13049 }
13050 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
13051
13052 /* If we havn't already set up the frame pointer, do so now. */
13053 if (frame_pointer_needed && !m->fs.fp_valid)
13054 {
13055 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
13056 GEN_INT (frame.stack_pointer_offset
13057 - frame.hard_frame_pointer_offset));
13058 insn = emit_insn (insn);
13059 RTX_FRAME_RELATED_P (insn) = 1;
13060 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
13061
13062 if (m->fs.cfa_reg == stack_pointer_rtx)
13063 m->fs.cfa_reg = hard_frame_pointer_rtx;
13064 m->fs.fp_offset = frame.hard_frame_pointer_offset;
13065 m->fs.fp_valid = true;
13066 }
13067
13068 if (!int_registers_saved)
13069 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
13070 if (!sse_registers_saved)
13071 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
13072
13073 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
13074 in PROLOGUE. */
13075 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
13076 {
13077 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
13078 insn = emit_insn (gen_set_got (pic));
13079 RTX_FRAME_RELATED_P (insn) = 1;
13080 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
13081 emit_insn (gen_prologue_use (pic));
13082 /* Deleting already emmitted SET_GOT if exist and allocated to
13083 REAL_PIC_OFFSET_TABLE_REGNUM. */
13084 ix86_elim_entry_set_got (pic);
13085 }
13086
13087 if (crtl->drap_reg && !crtl->stack_realign_needed)
13088 {
13089 /* vDRAP is setup but after reload it turns out stack realign
13090 isn't necessary, here we will emit prologue to setup DRAP
13091 without stack realign adjustment */
13092 t = choose_baseaddr (0);
13093 emit_insn (gen_rtx_SET (crtl->drap_reg, t));
13094 }
13095
13096 /* Prevent instructions from being scheduled into register save push
13097 sequence when access to the redzone area is done through frame pointer.
13098 The offset between the frame pointer and the stack pointer is calculated
13099 relative to the value of the stack pointer at the end of the function
13100 prologue, and moving instructions that access redzone area via frame
13101 pointer inside push sequence violates this assumption. */
13102 if (frame_pointer_needed && frame.red_zone_size)
13103 emit_insn (gen_memory_blockage ());
13104
13105 /* Emit cld instruction if stringops are used in the function. */
13106 if (TARGET_CLD && ix86_current_function_needs_cld)
13107 emit_insn (gen_cld ());
13108
13109 /* SEH requires that the prologue end within 256 bytes of the start of
13110 the function. Prevent instruction schedules that would extend that.
13111 Further, prevent alloca modifications to the stack pointer from being
13112 combined with prologue modifications. */
13113 if (TARGET_SEH)
13114 emit_insn (gen_prologue_use (stack_pointer_rtx));
13115 }
13116
13117 /* Emit code to restore REG using a POP insn. */
13118
13119 static void
13120 ix86_emit_restore_reg_using_pop (rtx reg)
13121 {
13122 struct machine_function *m = cfun->machine;
13123 rtx_insn *insn = emit_insn (gen_pop (reg));
13124
13125 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
13126 m->fs.sp_offset -= UNITS_PER_WORD;
13127
13128 if (m->fs.cfa_reg == crtl->drap_reg
13129 && REGNO (reg) == REGNO (crtl->drap_reg))
13130 {
13131 /* Previously we'd represented the CFA as an expression
13132 like *(%ebp - 8). We've just popped that value from
13133 the stack, which means we need to reset the CFA to
13134 the drap register. This will remain until we restore
13135 the stack pointer. */
13136 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
13137 RTX_FRAME_RELATED_P (insn) = 1;
13138
13139 /* This means that the DRAP register is valid for addressing too. */
13140 m->fs.drap_valid = true;
13141 return;
13142 }
13143
13144 if (m->fs.cfa_reg == stack_pointer_rtx)
13145 {
13146 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
13147 x = gen_rtx_SET (stack_pointer_rtx, x);
13148 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
13149 RTX_FRAME_RELATED_P (insn) = 1;
13150
13151 m->fs.cfa_offset -= UNITS_PER_WORD;
13152 }
13153
13154 /* When the frame pointer is the CFA, and we pop it, we are
13155 swapping back to the stack pointer as the CFA. This happens
13156 for stack frames that don't allocate other data, so we assume
13157 the stack pointer is now pointing at the return address, i.e.
13158 the function entry state, which makes the offset be 1 word. */
13159 if (reg == hard_frame_pointer_rtx)
13160 {
13161 m->fs.fp_valid = false;
13162 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
13163 {
13164 m->fs.cfa_reg = stack_pointer_rtx;
13165 m->fs.cfa_offset -= UNITS_PER_WORD;
13166
13167 add_reg_note (insn, REG_CFA_DEF_CFA,
13168 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13169 GEN_INT (m->fs.cfa_offset)));
13170 RTX_FRAME_RELATED_P (insn) = 1;
13171 }
13172 }
13173 }
13174
13175 /* Emit code to restore saved registers using POP insns. */
13176
13177 static void
13178 ix86_emit_restore_regs_using_pop (void)
13179 {
13180 unsigned int regno;
13181
13182 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13183 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false))
13184 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
13185 }
13186
13187 /* Emit code and notes for the LEAVE instruction. */
13188
13189 static void
13190 ix86_emit_leave (void)
13191 {
13192 struct machine_function *m = cfun->machine;
13193 rtx_insn *insn = emit_insn (ix86_gen_leave ());
13194
13195 ix86_add_queued_cfa_restore_notes (insn);
13196
13197 gcc_assert (m->fs.fp_valid);
13198 m->fs.sp_valid = true;
13199 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
13200 m->fs.fp_valid = false;
13201
13202 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
13203 {
13204 m->fs.cfa_reg = stack_pointer_rtx;
13205 m->fs.cfa_offset = m->fs.sp_offset;
13206
13207 add_reg_note (insn, REG_CFA_DEF_CFA,
13208 plus_constant (Pmode, stack_pointer_rtx,
13209 m->fs.sp_offset));
13210 RTX_FRAME_RELATED_P (insn) = 1;
13211 }
13212 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
13213 m->fs.fp_offset);
13214 }
13215
13216 /* Emit code to restore saved registers using MOV insns.
13217 First register is restored from CFA - CFA_OFFSET. */
13218 static void
13219 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
13220 bool maybe_eh_return)
13221 {
13222 struct machine_function *m = cfun->machine;
13223 unsigned int regno;
13224
13225 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13226 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
13227 {
13228 rtx reg = gen_rtx_REG (word_mode, regno);
13229 rtx mem;
13230 rtx_insn *insn;
13231
13232 mem = choose_baseaddr (cfa_offset);
13233 mem = gen_frame_mem (word_mode, mem);
13234 insn = emit_move_insn (reg, mem);
13235
13236 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
13237 {
13238 /* Previously we'd represented the CFA as an expression
13239 like *(%ebp - 8). We've just popped that value from
13240 the stack, which means we need to reset the CFA to
13241 the drap register. This will remain until we restore
13242 the stack pointer. */
13243 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
13244 RTX_FRAME_RELATED_P (insn) = 1;
13245
13246 /* This means that the DRAP register is valid for addressing. */
13247 m->fs.drap_valid = true;
13248 }
13249 else
13250 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
13251
13252 cfa_offset -= UNITS_PER_WORD;
13253 }
13254 }
13255
13256 /* Emit code to restore saved registers using MOV insns.
13257 First register is restored from CFA - CFA_OFFSET. */
13258 static void
13259 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
13260 bool maybe_eh_return)
13261 {
13262 unsigned int regno;
13263
13264 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13265 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
13266 {
13267 rtx reg = gen_rtx_REG (V4SFmode, regno);
13268 rtx mem;
13269 unsigned int align;
13270
13271 mem = choose_baseaddr (cfa_offset);
13272 mem = gen_rtx_MEM (V4SFmode, mem);
13273
13274 /* The location is aligned up to INCOMING_STACK_BOUNDARY. */
13275 align = MIN (GET_MODE_ALIGNMENT (V4SFmode), INCOMING_STACK_BOUNDARY);
13276 set_mem_align (mem, align);
13277
13278 /* SSE saves are not within re-aligned local stack frame.
13279 In case INCOMING_STACK_BOUNDARY is misaligned, we have
13280 to emit unaligned load. */
13281 if (align < 128)
13282 {
13283 rtx unspec = gen_rtx_UNSPEC (V4SFmode, gen_rtvec (1, mem),
13284 UNSPEC_LOADU);
13285 emit_insn (gen_rtx_SET (reg, unspec));
13286 }
13287 else
13288 emit_insn (gen_rtx_SET (reg, mem));
13289
13290 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
13291
13292 cfa_offset -= GET_MODE_SIZE (V4SFmode);
13293 }
13294 }
13295
13296 /* Restore function stack, frame, and registers. */
13297
13298 void
13299 ix86_expand_epilogue (int style)
13300 {
13301 struct machine_function *m = cfun->machine;
13302 struct machine_frame_state frame_state_save = m->fs;
13303 struct ix86_frame frame;
13304 bool restore_regs_via_mov;
13305 bool using_drap;
13306
13307 ix86_finalize_stack_realign_flags ();
13308 ix86_compute_frame_layout (&frame);
13309
13310 m->fs.sp_valid = (!frame_pointer_needed
13311 || (crtl->sp_is_unchanging
13312 && !stack_realign_fp));
13313 gcc_assert (!m->fs.sp_valid
13314 || m->fs.sp_offset == frame.stack_pointer_offset);
13315
13316 /* The FP must be valid if the frame pointer is present. */
13317 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
13318 gcc_assert (!m->fs.fp_valid
13319 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
13320
13321 /* We must have *some* valid pointer to the stack frame. */
13322 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
13323
13324 /* The DRAP is never valid at this point. */
13325 gcc_assert (!m->fs.drap_valid);
13326
13327 /* See the comment about red zone and frame
13328 pointer usage in ix86_expand_prologue. */
13329 if (frame_pointer_needed && frame.red_zone_size)
13330 emit_insn (gen_memory_blockage ());
13331
13332 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
13333 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
13334
13335 /* Determine the CFA offset of the end of the red-zone. */
13336 m->fs.red_zone_offset = 0;
13337 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
13338 {
13339 /* The red-zone begins below the return address. */
13340 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
13341
13342 /* When the register save area is in the aligned portion of
13343 the stack, determine the maximum runtime displacement that
13344 matches up with the aligned frame. */
13345 if (stack_realign_drap)
13346 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
13347 + UNITS_PER_WORD);
13348 }
13349
13350 /* Special care must be taken for the normal return case of a function
13351 using eh_return: the eax and edx registers are marked as saved, but
13352 not restored along this path. Adjust the save location to match. */
13353 if (crtl->calls_eh_return && style != 2)
13354 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
13355
13356 /* EH_RETURN requires the use of moves to function properly. */
13357 if (crtl->calls_eh_return)
13358 restore_regs_via_mov = true;
13359 /* SEH requires the use of pops to identify the epilogue. */
13360 else if (TARGET_SEH)
13361 restore_regs_via_mov = false;
13362 /* If we're only restoring one register and sp is not valid then
13363 using a move instruction to restore the register since it's
13364 less work than reloading sp and popping the register. */
13365 else if (!m->fs.sp_valid && frame.nregs <= 1)
13366 restore_regs_via_mov = true;
13367 else if (TARGET_EPILOGUE_USING_MOVE
13368 && cfun->machine->use_fast_prologue_epilogue
13369 && (frame.nregs > 1
13370 || m->fs.sp_offset != frame.reg_save_offset))
13371 restore_regs_via_mov = true;
13372 else if (frame_pointer_needed
13373 && !frame.nregs
13374 && m->fs.sp_offset != frame.reg_save_offset)
13375 restore_regs_via_mov = true;
13376 else if (frame_pointer_needed
13377 && TARGET_USE_LEAVE
13378 && cfun->machine->use_fast_prologue_epilogue
13379 && frame.nregs == 1)
13380 restore_regs_via_mov = true;
13381 else
13382 restore_regs_via_mov = false;
13383
13384 if (restore_regs_via_mov || frame.nsseregs)
13385 {
13386 /* Ensure that the entire register save area is addressable via
13387 the stack pointer, if we will restore via sp. */
13388 if (TARGET_64BIT
13389 && m->fs.sp_offset > 0x7fffffff
13390 && !(m->fs.fp_valid || m->fs.drap_valid)
13391 && (frame.nsseregs + frame.nregs) != 0)
13392 {
13393 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13394 GEN_INT (m->fs.sp_offset
13395 - frame.sse_reg_save_offset),
13396 style,
13397 m->fs.cfa_reg == stack_pointer_rtx);
13398 }
13399 }
13400
13401 /* If there are any SSE registers to restore, then we have to do it
13402 via moves, since there's obviously no pop for SSE regs. */
13403 if (frame.nsseregs)
13404 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
13405 style == 2);
13406
13407 if (restore_regs_via_mov)
13408 {
13409 rtx t;
13410
13411 if (frame.nregs)
13412 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
13413
13414 /* eh_return epilogues need %ecx added to the stack pointer. */
13415 if (style == 2)
13416 {
13417 rtx sa = EH_RETURN_STACKADJ_RTX;
13418 rtx_insn *insn;
13419
13420 /* Stack align doesn't work with eh_return. */
13421 gcc_assert (!stack_realign_drap);
13422 /* Neither does regparm nested functions. */
13423 gcc_assert (!ix86_static_chain_on_stack);
13424
13425 if (frame_pointer_needed)
13426 {
13427 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
13428 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
13429 emit_insn (gen_rtx_SET (sa, t));
13430
13431 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
13432 insn = emit_move_insn (hard_frame_pointer_rtx, t);
13433
13434 /* Note that we use SA as a temporary CFA, as the return
13435 address is at the proper place relative to it. We
13436 pretend this happens at the FP restore insn because
13437 prior to this insn the FP would be stored at the wrong
13438 offset relative to SA, and after this insn we have no
13439 other reasonable register to use for the CFA. We don't
13440 bother resetting the CFA to the SP for the duration of
13441 the return insn. */
13442 add_reg_note (insn, REG_CFA_DEF_CFA,
13443 plus_constant (Pmode, sa, UNITS_PER_WORD));
13444 ix86_add_queued_cfa_restore_notes (insn);
13445 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
13446 RTX_FRAME_RELATED_P (insn) = 1;
13447
13448 m->fs.cfa_reg = sa;
13449 m->fs.cfa_offset = UNITS_PER_WORD;
13450 m->fs.fp_valid = false;
13451
13452 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
13453 const0_rtx, style, false);
13454 }
13455 else
13456 {
13457 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
13458 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
13459 insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
13460 ix86_add_queued_cfa_restore_notes (insn);
13461
13462 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
13463 if (m->fs.cfa_offset != UNITS_PER_WORD)
13464 {
13465 m->fs.cfa_offset = UNITS_PER_WORD;
13466 add_reg_note (insn, REG_CFA_DEF_CFA,
13467 plus_constant (Pmode, stack_pointer_rtx,
13468 UNITS_PER_WORD));
13469 RTX_FRAME_RELATED_P (insn) = 1;
13470 }
13471 }
13472 m->fs.sp_offset = UNITS_PER_WORD;
13473 m->fs.sp_valid = true;
13474 }
13475 }
13476 else
13477 {
13478 /* SEH requires that the function end with (1) a stack adjustment
13479 if necessary, (2) a sequence of pops, and (3) a return or
13480 jump instruction. Prevent insns from the function body from
13481 being scheduled into this sequence. */
13482 if (TARGET_SEH)
13483 {
13484 /* Prevent a catch region from being adjacent to the standard
13485 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
13486 several other flags that would be interesting to test are
13487 not yet set up. */
13488 if (flag_non_call_exceptions)
13489 emit_insn (gen_nops (const1_rtx));
13490 else
13491 emit_insn (gen_blockage ());
13492 }
13493
13494 /* First step is to deallocate the stack frame so that we can
13495 pop the registers. Also do it on SEH target for very large
13496 frame as the emitted instructions aren't allowed by the ABI in
13497 epilogues. */
13498 if (!m->fs.sp_valid
13499 || (TARGET_SEH
13500 && (m->fs.sp_offset - frame.reg_save_offset
13501 >= SEH_MAX_FRAME_SIZE)))
13502 {
13503 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
13504 GEN_INT (m->fs.fp_offset
13505 - frame.reg_save_offset),
13506 style, false);
13507 }
13508 else if (m->fs.sp_offset != frame.reg_save_offset)
13509 {
13510 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13511 GEN_INT (m->fs.sp_offset
13512 - frame.reg_save_offset),
13513 style,
13514 m->fs.cfa_reg == stack_pointer_rtx);
13515 }
13516
13517 ix86_emit_restore_regs_using_pop ();
13518 }
13519
13520 /* If we used a stack pointer and haven't already got rid of it,
13521 then do so now. */
13522 if (m->fs.fp_valid)
13523 {
13524 /* If the stack pointer is valid and pointing at the frame
13525 pointer store address, then we only need a pop. */
13526 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
13527 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
13528 /* Leave results in shorter dependency chains on CPUs that are
13529 able to grok it fast. */
13530 else if (TARGET_USE_LEAVE
13531 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
13532 || !cfun->machine->use_fast_prologue_epilogue)
13533 ix86_emit_leave ();
13534 else
13535 {
13536 pro_epilogue_adjust_stack (stack_pointer_rtx,
13537 hard_frame_pointer_rtx,
13538 const0_rtx, style, !using_drap);
13539 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
13540 }
13541 }
13542
13543 if (using_drap)
13544 {
13545 int param_ptr_offset = UNITS_PER_WORD;
13546 rtx_insn *insn;
13547
13548 gcc_assert (stack_realign_drap);
13549
13550 if (ix86_static_chain_on_stack)
13551 param_ptr_offset += UNITS_PER_WORD;
13552 if (!call_used_regs[REGNO (crtl->drap_reg)])
13553 param_ptr_offset += UNITS_PER_WORD;
13554
13555 insn = emit_insn (gen_rtx_SET
13556 (stack_pointer_rtx,
13557 gen_rtx_PLUS (Pmode,
13558 crtl->drap_reg,
13559 GEN_INT (-param_ptr_offset))));
13560 m->fs.cfa_reg = stack_pointer_rtx;
13561 m->fs.cfa_offset = param_ptr_offset;
13562 m->fs.sp_offset = param_ptr_offset;
13563 m->fs.realigned = false;
13564
13565 add_reg_note (insn, REG_CFA_DEF_CFA,
13566 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13567 GEN_INT (param_ptr_offset)));
13568 RTX_FRAME_RELATED_P (insn) = 1;
13569
13570 if (!call_used_regs[REGNO (crtl->drap_reg)])
13571 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
13572 }
13573
13574 /* At this point the stack pointer must be valid, and we must have
13575 restored all of the registers. We may not have deallocated the
13576 entire stack frame. We've delayed this until now because it may
13577 be possible to merge the local stack deallocation with the
13578 deallocation forced by ix86_static_chain_on_stack. */
13579 gcc_assert (m->fs.sp_valid);
13580 gcc_assert (!m->fs.fp_valid);
13581 gcc_assert (!m->fs.realigned);
13582 if (m->fs.sp_offset != UNITS_PER_WORD)
13583 {
13584 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13585 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
13586 style, true);
13587 }
13588 else
13589 ix86_add_queued_cfa_restore_notes (get_last_insn ());
13590
13591 /* Sibcall epilogues don't want a return instruction. */
13592 if (style == 0)
13593 {
13594 m->fs = frame_state_save;
13595 return;
13596 }
13597
13598 if (crtl->args.pops_args && crtl->args.size)
13599 {
13600 rtx popc = GEN_INT (crtl->args.pops_args);
13601
13602 /* i386 can only pop 64K bytes. If asked to pop more, pop return
13603 address, do explicit add, and jump indirectly to the caller. */
13604
13605 if (crtl->args.pops_args >= 65536)
13606 {
13607 rtx ecx = gen_rtx_REG (SImode, CX_REG);
13608 rtx_insn *insn;
13609
13610 /* There is no "pascal" calling convention in any 64bit ABI. */
13611 gcc_assert (!TARGET_64BIT);
13612
13613 insn = emit_insn (gen_pop (ecx));
13614 m->fs.cfa_offset -= UNITS_PER_WORD;
13615 m->fs.sp_offset -= UNITS_PER_WORD;
13616
13617 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
13618 x = gen_rtx_SET (stack_pointer_rtx, x);
13619 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
13620 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
13621 RTX_FRAME_RELATED_P (insn) = 1;
13622
13623 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13624 popc, -1, true);
13625 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
13626 }
13627 else
13628 emit_jump_insn (gen_simple_return_pop_internal (popc));
13629 }
13630 else
13631 emit_jump_insn (gen_simple_return_internal ());
13632
13633 /* Restore the state back to the state from the prologue,
13634 so that it's correct for the next epilogue. */
13635 m->fs = frame_state_save;
13636 }
13637
13638 /* Reset from the function's potential modifications. */
13639
13640 static void
13641 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
13642 {
13643 if (pic_offset_table_rtx
13644 && !ix86_use_pseudo_pic_reg ())
13645 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
13646 #if TARGET_MACHO
13647 /* Mach-O doesn't support labels at the end of objects, so if
13648 it looks like we might want one, insert a NOP. */
13649 {
13650 rtx_insn *insn = get_last_insn ();
13651 rtx_insn *deleted_debug_label = NULL;
13652 while (insn
13653 && NOTE_P (insn)
13654 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
13655 {
13656 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
13657 notes only, instead set their CODE_LABEL_NUMBER to -1,
13658 otherwise there would be code generation differences
13659 in between -g and -g0. */
13660 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
13661 deleted_debug_label = insn;
13662 insn = PREV_INSN (insn);
13663 }
13664 if (insn
13665 && (LABEL_P (insn)
13666 || (NOTE_P (insn)
13667 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
13668 fputs ("\tnop\n", file);
13669 else if (deleted_debug_label)
13670 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
13671 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
13672 CODE_LABEL_NUMBER (insn) = -1;
13673 }
13674 #endif
13675
13676 }
13677
13678 /* Return a scratch register to use in the split stack prologue. The
13679 split stack prologue is used for -fsplit-stack. It is the first
13680 instructions in the function, even before the regular prologue.
13681 The scratch register can be any caller-saved register which is not
13682 used for parameters or for the static chain. */
13683
13684 static unsigned int
13685 split_stack_prologue_scratch_regno (void)
13686 {
13687 if (TARGET_64BIT)
13688 return R11_REG;
13689 else
13690 {
13691 bool is_fastcall, is_thiscall;
13692 int regparm;
13693
13694 is_fastcall = (lookup_attribute ("fastcall",
13695 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
13696 != NULL);
13697 is_thiscall = (lookup_attribute ("thiscall",
13698 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
13699 != NULL);
13700 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
13701
13702 if (is_fastcall)
13703 {
13704 if (DECL_STATIC_CHAIN (cfun->decl))
13705 {
13706 sorry ("-fsplit-stack does not support fastcall with "
13707 "nested function");
13708 return INVALID_REGNUM;
13709 }
13710 return AX_REG;
13711 }
13712 else if (is_thiscall)
13713 {
13714 if (!DECL_STATIC_CHAIN (cfun->decl))
13715 return DX_REG;
13716 return AX_REG;
13717 }
13718 else if (regparm < 3)
13719 {
13720 if (!DECL_STATIC_CHAIN (cfun->decl))
13721 return CX_REG;
13722 else
13723 {
13724 if (regparm >= 2)
13725 {
13726 sorry ("-fsplit-stack does not support 2 register "
13727 "parameters for a nested function");
13728 return INVALID_REGNUM;
13729 }
13730 return DX_REG;
13731 }
13732 }
13733 else
13734 {
13735 /* FIXME: We could make this work by pushing a register
13736 around the addition and comparison. */
13737 sorry ("-fsplit-stack does not support 3 register parameters");
13738 return INVALID_REGNUM;
13739 }
13740 }
13741 }
13742
13743 /* A SYMBOL_REF for the function which allocates new stackspace for
13744 -fsplit-stack. */
13745
13746 static GTY(()) rtx split_stack_fn;
13747
13748 /* A SYMBOL_REF for the more stack function when using the large
13749 model. */
13750
13751 static GTY(()) rtx split_stack_fn_large;
13752
13753 /* Handle -fsplit-stack. These are the first instructions in the
13754 function, even before the regular prologue. */
13755
13756 void
13757 ix86_expand_split_stack_prologue (void)
13758 {
13759 struct ix86_frame frame;
13760 HOST_WIDE_INT allocate;
13761 unsigned HOST_WIDE_INT args_size;
13762 rtx_code_label *label;
13763 rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
13764 rtx scratch_reg = NULL_RTX;
13765 rtx_code_label *varargs_label = NULL;
13766 rtx fn;
13767
13768 gcc_assert (flag_split_stack && reload_completed);
13769
13770 ix86_finalize_stack_realign_flags ();
13771 ix86_compute_frame_layout (&frame);
13772 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
13773
13774 /* This is the label we will branch to if we have enough stack
13775 space. We expect the basic block reordering pass to reverse this
13776 branch if optimizing, so that we branch in the unlikely case. */
13777 label = gen_label_rtx ();
13778
13779 /* We need to compare the stack pointer minus the frame size with
13780 the stack boundary in the TCB. The stack boundary always gives
13781 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
13782 can compare directly. Otherwise we need to do an addition. */
13783
13784 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13785 UNSPEC_STACK_CHECK);
13786 limit = gen_rtx_CONST (Pmode, limit);
13787 limit = gen_rtx_MEM (Pmode, limit);
13788 if (allocate < SPLIT_STACK_AVAILABLE)
13789 current = stack_pointer_rtx;
13790 else
13791 {
13792 unsigned int scratch_regno;
13793 rtx offset;
13794
13795 /* We need a scratch register to hold the stack pointer minus
13796 the required frame size. Since this is the very start of the
13797 function, the scratch register can be any caller-saved
13798 register which is not used for parameters. */
13799 offset = GEN_INT (- allocate);
13800 scratch_regno = split_stack_prologue_scratch_regno ();
13801 if (scratch_regno == INVALID_REGNUM)
13802 return;
13803 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
13804 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
13805 {
13806 /* We don't use ix86_gen_add3 in this case because it will
13807 want to split to lea, but when not optimizing the insn
13808 will not be split after this point. */
13809 emit_insn (gen_rtx_SET (scratch_reg,
13810 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13811 offset)));
13812 }
13813 else
13814 {
13815 emit_move_insn (scratch_reg, offset);
13816 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
13817 stack_pointer_rtx));
13818 }
13819 current = scratch_reg;
13820 }
13821
13822 ix86_expand_branch (GEU, current, limit, label);
13823 jump_insn = get_last_insn ();
13824 JUMP_LABEL (jump_insn) = label;
13825
13826 /* Mark the jump as very likely to be taken. */
13827 add_int_reg_note (jump_insn, REG_BR_PROB,
13828 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
13829
13830 if (split_stack_fn == NULL_RTX)
13831 {
13832 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
13833 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
13834 }
13835 fn = split_stack_fn;
13836
13837 /* Get more stack space. We pass in the desired stack space and the
13838 size of the arguments to copy to the new stack. In 32-bit mode
13839 we push the parameters; __morestack will return on a new stack
13840 anyhow. In 64-bit mode we pass the parameters in r10 and
13841 r11. */
13842 allocate_rtx = GEN_INT (allocate);
13843 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
13844 call_fusage = NULL_RTX;
13845 if (TARGET_64BIT)
13846 {
13847 rtx reg10, reg11;
13848
13849 reg10 = gen_rtx_REG (Pmode, R10_REG);
13850 reg11 = gen_rtx_REG (Pmode, R11_REG);
13851
13852 /* If this function uses a static chain, it will be in %r10.
13853 Preserve it across the call to __morestack. */
13854 if (DECL_STATIC_CHAIN (cfun->decl))
13855 {
13856 rtx rax;
13857
13858 rax = gen_rtx_REG (word_mode, AX_REG);
13859 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
13860 use_reg (&call_fusage, rax);
13861 }
13862
13863 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
13864 && !TARGET_PECOFF)
13865 {
13866 HOST_WIDE_INT argval;
13867
13868 gcc_assert (Pmode == DImode);
13869 /* When using the large model we need to load the address
13870 into a register, and we've run out of registers. So we
13871 switch to a different calling convention, and we call a
13872 different function: __morestack_large. We pass the
13873 argument size in the upper 32 bits of r10 and pass the
13874 frame size in the lower 32 bits. */
13875 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
13876 gcc_assert ((args_size & 0xffffffff) == args_size);
13877
13878 if (split_stack_fn_large == NULL_RTX)
13879 {
13880 split_stack_fn_large =
13881 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
13882 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
13883 }
13884 if (ix86_cmodel == CM_LARGE_PIC)
13885 {
13886 rtx_code_label *label;
13887 rtx x;
13888
13889 label = gen_label_rtx ();
13890 emit_label (label);
13891 LABEL_PRESERVE_P (label) = 1;
13892 emit_insn (gen_set_rip_rex64 (reg10, label));
13893 emit_insn (gen_set_got_offset_rex64 (reg11, label));
13894 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
13895 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
13896 UNSPEC_GOT);
13897 x = gen_rtx_CONST (Pmode, x);
13898 emit_move_insn (reg11, x);
13899 x = gen_rtx_PLUS (Pmode, reg10, reg11);
13900 x = gen_const_mem (Pmode, x);
13901 emit_move_insn (reg11, x);
13902 }
13903 else
13904 emit_move_insn (reg11, split_stack_fn_large);
13905
13906 fn = reg11;
13907
13908 argval = ((args_size << 16) << 16) + allocate;
13909 emit_move_insn (reg10, GEN_INT (argval));
13910 }
13911 else
13912 {
13913 emit_move_insn (reg10, allocate_rtx);
13914 emit_move_insn (reg11, GEN_INT (args_size));
13915 use_reg (&call_fusage, reg11);
13916 }
13917
13918 use_reg (&call_fusage, reg10);
13919 }
13920 else
13921 {
13922 emit_insn (gen_push (GEN_INT (args_size)));
13923 emit_insn (gen_push (allocate_rtx));
13924 }
13925 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
13926 GEN_INT (UNITS_PER_WORD), constm1_rtx,
13927 NULL_RTX, false);
13928 add_function_usage_to (call_insn, call_fusage);
13929
13930 /* In order to make call/return prediction work right, we now need
13931 to execute a return instruction. See
13932 libgcc/config/i386/morestack.S for the details on how this works.
13933
13934 For flow purposes gcc must not see this as a return
13935 instruction--we need control flow to continue at the subsequent
13936 label. Therefore, we use an unspec. */
13937 gcc_assert (crtl->args.pops_args < 65536);
13938 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
13939
13940 /* If we are in 64-bit mode and this function uses a static chain,
13941 we saved %r10 in %rax before calling _morestack. */
13942 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
13943 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
13944 gen_rtx_REG (word_mode, AX_REG));
13945
13946 /* If this function calls va_start, we need to store a pointer to
13947 the arguments on the old stack, because they may not have been
13948 all copied to the new stack. At this point the old stack can be
13949 found at the frame pointer value used by __morestack, because
13950 __morestack has set that up before calling back to us. Here we
13951 store that pointer in a scratch register, and in
13952 ix86_expand_prologue we store the scratch register in a stack
13953 slot. */
13954 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
13955 {
13956 unsigned int scratch_regno;
13957 rtx frame_reg;
13958 int words;
13959
13960 scratch_regno = split_stack_prologue_scratch_regno ();
13961 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
13962 frame_reg = gen_rtx_REG (Pmode, BP_REG);
13963
13964 /* 64-bit:
13965 fp -> old fp value
13966 return address within this function
13967 return address of caller of this function
13968 stack arguments
13969 So we add three words to get to the stack arguments.
13970
13971 32-bit:
13972 fp -> old fp value
13973 return address within this function
13974 first argument to __morestack
13975 second argument to __morestack
13976 return address of caller of this function
13977 stack arguments
13978 So we add five words to get to the stack arguments.
13979 */
13980 words = TARGET_64BIT ? 3 : 5;
13981 emit_insn (gen_rtx_SET (scratch_reg,
13982 gen_rtx_PLUS (Pmode, frame_reg,
13983 GEN_INT (words * UNITS_PER_WORD))));
13984
13985 varargs_label = gen_label_rtx ();
13986 emit_jump_insn (gen_jump (varargs_label));
13987 JUMP_LABEL (get_last_insn ()) = varargs_label;
13988
13989 emit_barrier ();
13990 }
13991
13992 emit_label (label);
13993 LABEL_NUSES (label) = 1;
13994
13995 /* If this function calls va_start, we now have to set the scratch
13996 register for the case where we do not call __morestack. In this
13997 case we need to set it based on the stack pointer. */
13998 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
13999 {
14000 emit_insn (gen_rtx_SET (scratch_reg,
14001 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14002 GEN_INT (UNITS_PER_WORD))));
14003
14004 emit_label (varargs_label);
14005 LABEL_NUSES (varargs_label) = 1;
14006 }
14007 }
14008
14009 /* We may have to tell the dataflow pass that the split stack prologue
14010 is initializing a scratch register. */
14011
14012 static void
14013 ix86_live_on_entry (bitmap regs)
14014 {
14015 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
14016 {
14017 gcc_assert (flag_split_stack);
14018 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
14019 }
14020 }
14021 \f
14022 /* Extract the parts of an RTL expression that is a valid memory address
14023 for an instruction. Return 0 if the structure of the address is
14024 grossly off. Return -1 if the address contains ASHIFT, so it is not
14025 strictly valid, but still used for computing length of lea instruction. */
14026
14027 int
14028 ix86_decompose_address (rtx addr, struct ix86_address *out)
14029 {
14030 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
14031 rtx base_reg, index_reg;
14032 HOST_WIDE_INT scale = 1;
14033 rtx scale_rtx = NULL_RTX;
14034 rtx tmp;
14035 int retval = 1;
14036 addr_space_t seg = ADDR_SPACE_GENERIC;
14037
14038 /* Allow zero-extended SImode addresses,
14039 they will be emitted with addr32 prefix. */
14040 if (TARGET_64BIT && GET_MODE (addr) == DImode)
14041 {
14042 if (GET_CODE (addr) == ZERO_EXTEND
14043 && GET_MODE (XEXP (addr, 0)) == SImode)
14044 {
14045 addr = XEXP (addr, 0);
14046 if (CONST_INT_P (addr))
14047 return 0;
14048 }
14049 else if (GET_CODE (addr) == AND
14050 && const_32bit_mask (XEXP (addr, 1), DImode))
14051 {
14052 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
14053 if (addr == NULL_RTX)
14054 return 0;
14055
14056 if (CONST_INT_P (addr))
14057 return 0;
14058 }
14059 }
14060
14061 /* Allow SImode subregs of DImode addresses,
14062 they will be emitted with addr32 prefix. */
14063 if (TARGET_64BIT && GET_MODE (addr) == SImode)
14064 {
14065 if (SUBREG_P (addr)
14066 && GET_MODE (SUBREG_REG (addr)) == DImode)
14067 {
14068 addr = SUBREG_REG (addr);
14069 if (CONST_INT_P (addr))
14070 return 0;
14071 }
14072 }
14073
14074 if (REG_P (addr))
14075 base = addr;
14076 else if (SUBREG_P (addr))
14077 {
14078 if (REG_P (SUBREG_REG (addr)))
14079 base = addr;
14080 else
14081 return 0;
14082 }
14083 else if (GET_CODE (addr) == PLUS)
14084 {
14085 rtx addends[4], op;
14086 int n = 0, i;
14087
14088 op = addr;
14089 do
14090 {
14091 if (n >= 4)
14092 return 0;
14093 addends[n++] = XEXP (op, 1);
14094 op = XEXP (op, 0);
14095 }
14096 while (GET_CODE (op) == PLUS);
14097 if (n >= 4)
14098 return 0;
14099 addends[n] = op;
14100
14101 for (i = n; i >= 0; --i)
14102 {
14103 op = addends[i];
14104 switch (GET_CODE (op))
14105 {
14106 case MULT:
14107 if (index)
14108 return 0;
14109 index = XEXP (op, 0);
14110 scale_rtx = XEXP (op, 1);
14111 break;
14112
14113 case ASHIFT:
14114 if (index)
14115 return 0;
14116 index = XEXP (op, 0);
14117 tmp = XEXP (op, 1);
14118 if (!CONST_INT_P (tmp))
14119 return 0;
14120 scale = INTVAL (tmp);
14121 if ((unsigned HOST_WIDE_INT) scale > 3)
14122 return 0;
14123 scale = 1 << scale;
14124 break;
14125
14126 case ZERO_EXTEND:
14127 op = XEXP (op, 0);
14128 if (GET_CODE (op) != UNSPEC)
14129 return 0;
14130 /* FALLTHRU */
14131
14132 case UNSPEC:
14133 if (XINT (op, 1) == UNSPEC_TP
14134 && TARGET_TLS_DIRECT_SEG_REFS
14135 && seg == ADDR_SPACE_GENERIC)
14136 seg = DEFAULT_TLS_SEG_REG;
14137 else
14138 return 0;
14139 break;
14140
14141 case SUBREG:
14142 if (!REG_P (SUBREG_REG (op)))
14143 return 0;
14144 /* FALLTHRU */
14145
14146 case REG:
14147 if (!base)
14148 base = op;
14149 else if (!index)
14150 index = op;
14151 else
14152 return 0;
14153 break;
14154
14155 case CONST:
14156 case CONST_INT:
14157 case SYMBOL_REF:
14158 case LABEL_REF:
14159 if (disp)
14160 return 0;
14161 disp = op;
14162 break;
14163
14164 default:
14165 return 0;
14166 }
14167 }
14168 }
14169 else if (GET_CODE (addr) == MULT)
14170 {
14171 index = XEXP (addr, 0); /* index*scale */
14172 scale_rtx = XEXP (addr, 1);
14173 }
14174 else if (GET_CODE (addr) == ASHIFT)
14175 {
14176 /* We're called for lea too, which implements ashift on occasion. */
14177 index = XEXP (addr, 0);
14178 tmp = XEXP (addr, 1);
14179 if (!CONST_INT_P (tmp))
14180 return 0;
14181 scale = INTVAL (tmp);
14182 if ((unsigned HOST_WIDE_INT) scale > 3)
14183 return 0;
14184 scale = 1 << scale;
14185 retval = -1;
14186 }
14187 else
14188 disp = addr; /* displacement */
14189
14190 if (index)
14191 {
14192 if (REG_P (index))
14193 ;
14194 else if (SUBREG_P (index)
14195 && REG_P (SUBREG_REG (index)))
14196 ;
14197 else
14198 return 0;
14199 }
14200
14201 /* Extract the integral value of scale. */
14202 if (scale_rtx)
14203 {
14204 if (!CONST_INT_P (scale_rtx))
14205 return 0;
14206 scale = INTVAL (scale_rtx);
14207 }
14208
14209 base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base;
14210 index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index;
14211
14212 /* Avoid useless 0 displacement. */
14213 if (disp == const0_rtx && (base || index))
14214 disp = NULL_RTX;
14215
14216 /* Allow arg pointer and stack pointer as index if there is not scaling. */
14217 if (base_reg && index_reg && scale == 1
14218 && (index_reg == arg_pointer_rtx
14219 || index_reg == frame_pointer_rtx
14220 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
14221 {
14222 std::swap (base, index);
14223 std::swap (base_reg, index_reg);
14224 }
14225
14226 /* Special case: %ebp cannot be encoded as a base without a displacement.
14227 Similarly %r13. */
14228 if (!disp
14229 && base_reg
14230 && (base_reg == hard_frame_pointer_rtx
14231 || base_reg == frame_pointer_rtx
14232 || base_reg == arg_pointer_rtx
14233 || (REG_P (base_reg)
14234 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
14235 || REGNO (base_reg) == R13_REG))))
14236 disp = const0_rtx;
14237
14238 /* Special case: on K6, [%esi] makes the instruction vector decoded.
14239 Avoid this by transforming to [%esi+0].
14240 Reload calls address legitimization without cfun defined, so we need
14241 to test cfun for being non-NULL. */
14242 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
14243 && base_reg && !index_reg && !disp
14244 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
14245 disp = const0_rtx;
14246
14247 /* Special case: encode reg+reg instead of reg*2. */
14248 if (!base && index && scale == 2)
14249 base = index, base_reg = index_reg, scale = 1;
14250
14251 /* Special case: scaling cannot be encoded without base or displacement. */
14252 if (!base && !disp && index && scale != 1)
14253 disp = const0_rtx;
14254
14255 out->base = base;
14256 out->index = index;
14257 out->disp = disp;
14258 out->scale = scale;
14259 out->seg = seg;
14260
14261 return retval;
14262 }
14263 \f
14264 /* Return cost of the memory address x.
14265 For i386, it is better to use a complex address than let gcc copy
14266 the address into a reg and make a new pseudo. But not if the address
14267 requires to two regs - that would mean more pseudos with longer
14268 lifetimes. */
14269 static int
14270 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
14271 {
14272 struct ix86_address parts;
14273 int cost = 1;
14274 int ok = ix86_decompose_address (x, &parts);
14275
14276 gcc_assert (ok);
14277
14278 if (parts.base && SUBREG_P (parts.base))
14279 parts.base = SUBREG_REG (parts.base);
14280 if (parts.index && SUBREG_P (parts.index))
14281 parts.index = SUBREG_REG (parts.index);
14282
14283 /* Attempt to minimize number of registers in the address by increasing
14284 address cost for each used register. We don't increase address cost
14285 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
14286 is not invariant itself it most likely means that base or index is not
14287 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
14288 which is not profitable for x86. */
14289 if (parts.base
14290 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
14291 && (current_pass->type == GIMPLE_PASS
14292 || !pic_offset_table_rtx
14293 || !REG_P (parts.base)
14294 || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
14295 cost++;
14296
14297 if (parts.index
14298 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
14299 && (current_pass->type == GIMPLE_PASS
14300 || !pic_offset_table_rtx
14301 || !REG_P (parts.index)
14302 || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
14303 cost++;
14304
14305 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
14306 since it's predecode logic can't detect the length of instructions
14307 and it degenerates to vector decoded. Increase cost of such
14308 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
14309 to split such addresses or even refuse such addresses at all.
14310
14311 Following addressing modes are affected:
14312 [base+scale*index]
14313 [scale*index+disp]
14314 [base+index]
14315
14316 The first and last case may be avoidable by explicitly coding the zero in
14317 memory address, but I don't have AMD-K6 machine handy to check this
14318 theory. */
14319
14320 if (TARGET_K6
14321 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
14322 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
14323 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
14324 cost += 10;
14325
14326 return cost;
14327 }
14328 \f
14329 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
14330 this is used for to form addresses to local data when -fPIC is in
14331 use. */
14332
14333 static bool
14334 darwin_local_data_pic (rtx disp)
14335 {
14336 return (GET_CODE (disp) == UNSPEC
14337 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
14338 }
14339
14340 /* Determine if a given RTX is a valid constant. We already know this
14341 satisfies CONSTANT_P. */
14342
14343 static bool
14344 ix86_legitimate_constant_p (machine_mode, rtx x)
14345 {
14346 /* Pointer bounds constants are not valid. */
14347 if (POINTER_BOUNDS_MODE_P (GET_MODE (x)))
14348 return false;
14349
14350 switch (GET_CODE (x))
14351 {
14352 case CONST:
14353 x = XEXP (x, 0);
14354
14355 if (GET_CODE (x) == PLUS)
14356 {
14357 if (!CONST_INT_P (XEXP (x, 1)))
14358 return false;
14359 x = XEXP (x, 0);
14360 }
14361
14362 if (TARGET_MACHO && darwin_local_data_pic (x))
14363 return true;
14364
14365 /* Only some unspecs are valid as "constants". */
14366 if (GET_CODE (x) == UNSPEC)
14367 switch (XINT (x, 1))
14368 {
14369 case UNSPEC_GOT:
14370 case UNSPEC_GOTOFF:
14371 case UNSPEC_PLTOFF:
14372 return TARGET_64BIT;
14373 case UNSPEC_TPOFF:
14374 case UNSPEC_NTPOFF:
14375 x = XVECEXP (x, 0, 0);
14376 return (GET_CODE (x) == SYMBOL_REF
14377 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
14378 case UNSPEC_DTPOFF:
14379 x = XVECEXP (x, 0, 0);
14380 return (GET_CODE (x) == SYMBOL_REF
14381 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
14382 default:
14383 return false;
14384 }
14385
14386 /* We must have drilled down to a symbol. */
14387 if (GET_CODE (x) == LABEL_REF)
14388 return true;
14389 if (GET_CODE (x) != SYMBOL_REF)
14390 return false;
14391 /* FALLTHRU */
14392
14393 case SYMBOL_REF:
14394 /* TLS symbols are never valid. */
14395 if (SYMBOL_REF_TLS_MODEL (x))
14396 return false;
14397
14398 /* DLLIMPORT symbols are never valid. */
14399 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
14400 && SYMBOL_REF_DLLIMPORT_P (x))
14401 return false;
14402
14403 #if TARGET_MACHO
14404 /* mdynamic-no-pic */
14405 if (MACHO_DYNAMIC_NO_PIC_P)
14406 return machopic_symbol_defined_p (x);
14407 #endif
14408 break;
14409
14410 case CONST_WIDE_INT:
14411 if (!TARGET_64BIT && !standard_sse_constant_p (x))
14412 return false;
14413 break;
14414
14415 case CONST_VECTOR:
14416 if (!standard_sse_constant_p (x))
14417 return false;
14418
14419 default:
14420 break;
14421 }
14422
14423 /* Otherwise we handle everything else in the move patterns. */
14424 return true;
14425 }
14426
14427 /* Determine if it's legal to put X into the constant pool. This
14428 is not possible for the address of thread-local symbols, which
14429 is checked above. */
14430
14431 static bool
14432 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
14433 {
14434 /* We can always put integral constants and vectors in memory. */
14435 switch (GET_CODE (x))
14436 {
14437 case CONST_INT:
14438 case CONST_WIDE_INT:
14439 case CONST_DOUBLE:
14440 case CONST_VECTOR:
14441 return false;
14442
14443 default:
14444 break;
14445 }
14446 return !ix86_legitimate_constant_p (mode, x);
14447 }
14448
14449 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
14450 otherwise zero. */
14451
14452 static bool
14453 is_imported_p (rtx x)
14454 {
14455 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
14456 || GET_CODE (x) != SYMBOL_REF)
14457 return false;
14458
14459 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
14460 }
14461
14462
14463 /* Nonzero if the constant value X is a legitimate general operand
14464 when generating PIC code. It is given that flag_pic is on and
14465 that X satisfies CONSTANT_P. */
14466
14467 bool
14468 legitimate_pic_operand_p (rtx x)
14469 {
14470 rtx inner;
14471
14472 switch (GET_CODE (x))
14473 {
14474 case CONST:
14475 inner = XEXP (x, 0);
14476 if (GET_CODE (inner) == PLUS
14477 && CONST_INT_P (XEXP (inner, 1)))
14478 inner = XEXP (inner, 0);
14479
14480 /* Only some unspecs are valid as "constants". */
14481 if (GET_CODE (inner) == UNSPEC)
14482 switch (XINT (inner, 1))
14483 {
14484 case UNSPEC_GOT:
14485 case UNSPEC_GOTOFF:
14486 case UNSPEC_PLTOFF:
14487 return TARGET_64BIT;
14488 case UNSPEC_TPOFF:
14489 x = XVECEXP (inner, 0, 0);
14490 return (GET_CODE (x) == SYMBOL_REF
14491 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
14492 case UNSPEC_MACHOPIC_OFFSET:
14493 return legitimate_pic_address_disp_p (x);
14494 default:
14495 return false;
14496 }
14497 /* FALLTHRU */
14498
14499 case SYMBOL_REF:
14500 case LABEL_REF:
14501 return legitimate_pic_address_disp_p (x);
14502
14503 default:
14504 return true;
14505 }
14506 }
14507
14508 /* Determine if a given CONST RTX is a valid memory displacement
14509 in PIC mode. */
14510
14511 bool
14512 legitimate_pic_address_disp_p (rtx disp)
14513 {
14514 bool saw_plus;
14515
14516 /* In 64bit mode we can allow direct addresses of symbols and labels
14517 when they are not dynamic symbols. */
14518 if (TARGET_64BIT)
14519 {
14520 rtx op0 = disp, op1;
14521
14522 switch (GET_CODE (disp))
14523 {
14524 case LABEL_REF:
14525 return true;
14526
14527 case CONST:
14528 if (GET_CODE (XEXP (disp, 0)) != PLUS)
14529 break;
14530 op0 = XEXP (XEXP (disp, 0), 0);
14531 op1 = XEXP (XEXP (disp, 0), 1);
14532 if (!CONST_INT_P (op1)
14533 || INTVAL (op1) >= 16*1024*1024
14534 || INTVAL (op1) < -16*1024*1024)
14535 break;
14536 if (GET_CODE (op0) == LABEL_REF)
14537 return true;
14538 if (GET_CODE (op0) == CONST
14539 && GET_CODE (XEXP (op0, 0)) == UNSPEC
14540 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
14541 return true;
14542 if (GET_CODE (op0) == UNSPEC
14543 && XINT (op0, 1) == UNSPEC_PCREL)
14544 return true;
14545 if (GET_CODE (op0) != SYMBOL_REF)
14546 break;
14547 /* FALLTHRU */
14548
14549 case SYMBOL_REF:
14550 /* TLS references should always be enclosed in UNSPEC.
14551 The dllimported symbol needs always to be resolved. */
14552 if (SYMBOL_REF_TLS_MODEL (op0)
14553 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
14554 return false;
14555
14556 if (TARGET_PECOFF)
14557 {
14558 if (is_imported_p (op0))
14559 return true;
14560
14561 if (SYMBOL_REF_FAR_ADDR_P (op0)
14562 || !SYMBOL_REF_LOCAL_P (op0))
14563 break;
14564
14565 /* Function-symbols need to be resolved only for
14566 large-model.
14567 For the small-model we don't need to resolve anything
14568 here. */
14569 if ((ix86_cmodel != CM_LARGE_PIC
14570 && SYMBOL_REF_FUNCTION_P (op0))
14571 || ix86_cmodel == CM_SMALL_PIC)
14572 return true;
14573 /* Non-external symbols don't need to be resolved for
14574 large, and medium-model. */
14575 if ((ix86_cmodel == CM_LARGE_PIC
14576 || ix86_cmodel == CM_MEDIUM_PIC)
14577 && !SYMBOL_REF_EXTERNAL_P (op0))
14578 return true;
14579 }
14580 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
14581 && (SYMBOL_REF_LOCAL_P (op0)
14582 || (HAVE_LD_PIE_COPYRELOC
14583 && flag_pie
14584 && !SYMBOL_REF_WEAK (op0)
14585 && !SYMBOL_REF_FUNCTION_P (op0)))
14586 && ix86_cmodel != CM_LARGE_PIC)
14587 return true;
14588 break;
14589
14590 default:
14591 break;
14592 }
14593 }
14594 if (GET_CODE (disp) != CONST)
14595 return false;
14596 disp = XEXP (disp, 0);
14597
14598 if (TARGET_64BIT)
14599 {
14600 /* We are unsafe to allow PLUS expressions. This limit allowed distance
14601 of GOT tables. We should not need these anyway. */
14602 if (GET_CODE (disp) != UNSPEC
14603 || (XINT (disp, 1) != UNSPEC_GOTPCREL
14604 && XINT (disp, 1) != UNSPEC_GOTOFF
14605 && XINT (disp, 1) != UNSPEC_PCREL
14606 && XINT (disp, 1) != UNSPEC_PLTOFF))
14607 return false;
14608
14609 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
14610 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
14611 return false;
14612 return true;
14613 }
14614
14615 saw_plus = false;
14616 if (GET_CODE (disp) == PLUS)
14617 {
14618 if (!CONST_INT_P (XEXP (disp, 1)))
14619 return false;
14620 disp = XEXP (disp, 0);
14621 saw_plus = true;
14622 }
14623
14624 if (TARGET_MACHO && darwin_local_data_pic (disp))
14625 return true;
14626
14627 if (GET_CODE (disp) != UNSPEC)
14628 return false;
14629
14630 switch (XINT (disp, 1))
14631 {
14632 case UNSPEC_GOT:
14633 if (saw_plus)
14634 return false;
14635 /* We need to check for both symbols and labels because VxWorks loads
14636 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
14637 details. */
14638 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
14639 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
14640 case UNSPEC_GOTOFF:
14641 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
14642 While ABI specify also 32bit relocation but we don't produce it in
14643 small PIC model at all. */
14644 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
14645 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
14646 && !TARGET_64BIT)
14647 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
14648 return false;
14649 case UNSPEC_GOTTPOFF:
14650 case UNSPEC_GOTNTPOFF:
14651 case UNSPEC_INDNTPOFF:
14652 if (saw_plus)
14653 return false;
14654 disp = XVECEXP (disp, 0, 0);
14655 return (GET_CODE (disp) == SYMBOL_REF
14656 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
14657 case UNSPEC_NTPOFF:
14658 disp = XVECEXP (disp, 0, 0);
14659 return (GET_CODE (disp) == SYMBOL_REF
14660 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
14661 case UNSPEC_DTPOFF:
14662 disp = XVECEXP (disp, 0, 0);
14663 return (GET_CODE (disp) == SYMBOL_REF
14664 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
14665 }
14666
14667 return false;
14668 }
14669
14670 /* Determine if op is suitable RTX for an address register.
14671 Return naked register if a register or a register subreg is
14672 found, otherwise return NULL_RTX. */
14673
14674 static rtx
14675 ix86_validate_address_register (rtx op)
14676 {
14677 machine_mode mode = GET_MODE (op);
14678
14679 /* Only SImode or DImode registers can form the address. */
14680 if (mode != SImode && mode != DImode)
14681 return NULL_RTX;
14682
14683 if (REG_P (op))
14684 return op;
14685 else if (SUBREG_P (op))
14686 {
14687 rtx reg = SUBREG_REG (op);
14688
14689 if (!REG_P (reg))
14690 return NULL_RTX;
14691
14692 mode = GET_MODE (reg);
14693
14694 /* Don't allow SUBREGs that span more than a word. It can
14695 lead to spill failures when the register is one word out
14696 of a two word structure. */
14697 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
14698 return NULL_RTX;
14699
14700 /* Allow only SUBREGs of non-eliminable hard registers. */
14701 if (register_no_elim_operand (reg, mode))
14702 return reg;
14703 }
14704
14705 /* Op is not a register. */
14706 return NULL_RTX;
14707 }
14708
14709 /* Recognizes RTL expressions that are valid memory addresses for an
14710 instruction. The MODE argument is the machine mode for the MEM
14711 expression that wants to use this address.
14712
14713 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
14714 convert common non-canonical forms to canonical form so that they will
14715 be recognized. */
14716
14717 static bool
14718 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
14719 {
14720 struct ix86_address parts;
14721 rtx base, index, disp;
14722 HOST_WIDE_INT scale;
14723 addr_space_t seg;
14724
14725 if (ix86_decompose_address (addr, &parts) <= 0)
14726 /* Decomposition failed. */
14727 return false;
14728
14729 base = parts.base;
14730 index = parts.index;
14731 disp = parts.disp;
14732 scale = parts.scale;
14733 seg = parts.seg;
14734
14735 /* Validate base register. */
14736 if (base)
14737 {
14738 rtx reg = ix86_validate_address_register (base);
14739
14740 if (reg == NULL_RTX)
14741 return false;
14742
14743 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
14744 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
14745 /* Base is not valid. */
14746 return false;
14747 }
14748
14749 /* Validate index register. */
14750 if (index)
14751 {
14752 rtx reg = ix86_validate_address_register (index);
14753
14754 if (reg == NULL_RTX)
14755 return false;
14756
14757 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
14758 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
14759 /* Index is not valid. */
14760 return false;
14761 }
14762
14763 /* Index and base should have the same mode. */
14764 if (base && index
14765 && GET_MODE (base) != GET_MODE (index))
14766 return false;
14767
14768 /* Address override works only on the (%reg) part of %fs:(%reg). */
14769 if (seg != ADDR_SPACE_GENERIC
14770 && ((base && GET_MODE (base) != word_mode)
14771 || (index && GET_MODE (index) != word_mode)))
14772 return false;
14773
14774 /* Validate scale factor. */
14775 if (scale != 1)
14776 {
14777 if (!index)
14778 /* Scale without index. */
14779 return false;
14780
14781 if (scale != 2 && scale != 4 && scale != 8)
14782 /* Scale is not a valid multiplier. */
14783 return false;
14784 }
14785
14786 /* Validate displacement. */
14787 if (disp)
14788 {
14789 if (GET_CODE (disp) == CONST
14790 && GET_CODE (XEXP (disp, 0)) == UNSPEC
14791 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
14792 switch (XINT (XEXP (disp, 0), 1))
14793 {
14794 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
14795 used. While ABI specify also 32bit relocations, we don't produce
14796 them at all and use IP relative instead. */
14797 case UNSPEC_GOT:
14798 case UNSPEC_GOTOFF:
14799 gcc_assert (flag_pic);
14800 if (!TARGET_64BIT)
14801 goto is_legitimate_pic;
14802
14803 /* 64bit address unspec. */
14804 return false;
14805
14806 case UNSPEC_GOTPCREL:
14807 case UNSPEC_PCREL:
14808 gcc_assert (flag_pic);
14809 goto is_legitimate_pic;
14810
14811 case UNSPEC_GOTTPOFF:
14812 case UNSPEC_GOTNTPOFF:
14813 case UNSPEC_INDNTPOFF:
14814 case UNSPEC_NTPOFF:
14815 case UNSPEC_DTPOFF:
14816 break;
14817
14818 case UNSPEC_STACK_CHECK:
14819 gcc_assert (flag_split_stack);
14820 break;
14821
14822 default:
14823 /* Invalid address unspec. */
14824 return false;
14825 }
14826
14827 else if (SYMBOLIC_CONST (disp)
14828 && (flag_pic
14829 || (TARGET_MACHO
14830 #if TARGET_MACHO
14831 && MACHOPIC_INDIRECT
14832 && !machopic_operand_p (disp)
14833 #endif
14834 )))
14835 {
14836
14837 is_legitimate_pic:
14838 if (TARGET_64BIT && (index || base))
14839 {
14840 /* foo@dtpoff(%rX) is ok. */
14841 if (GET_CODE (disp) != CONST
14842 || GET_CODE (XEXP (disp, 0)) != PLUS
14843 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
14844 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
14845 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
14846 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
14847 /* Non-constant pic memory reference. */
14848 return false;
14849 }
14850 else if ((!TARGET_MACHO || flag_pic)
14851 && ! legitimate_pic_address_disp_p (disp))
14852 /* Displacement is an invalid pic construct. */
14853 return false;
14854 #if TARGET_MACHO
14855 else if (MACHO_DYNAMIC_NO_PIC_P
14856 && !ix86_legitimate_constant_p (Pmode, disp))
14857 /* displacment must be referenced via non_lazy_pointer */
14858 return false;
14859 #endif
14860
14861 /* This code used to verify that a symbolic pic displacement
14862 includes the pic_offset_table_rtx register.
14863
14864 While this is good idea, unfortunately these constructs may
14865 be created by "adds using lea" optimization for incorrect
14866 code like:
14867
14868 int a;
14869 int foo(int i)
14870 {
14871 return *(&a+i);
14872 }
14873
14874 This code is nonsensical, but results in addressing
14875 GOT table with pic_offset_table_rtx base. We can't
14876 just refuse it easily, since it gets matched by
14877 "addsi3" pattern, that later gets split to lea in the
14878 case output register differs from input. While this
14879 can be handled by separate addsi pattern for this case
14880 that never results in lea, this seems to be easier and
14881 correct fix for crash to disable this test. */
14882 }
14883 else if (GET_CODE (disp) != LABEL_REF
14884 && !CONST_INT_P (disp)
14885 && (GET_CODE (disp) != CONST
14886 || !ix86_legitimate_constant_p (Pmode, disp))
14887 && (GET_CODE (disp) != SYMBOL_REF
14888 || !ix86_legitimate_constant_p (Pmode, disp)))
14889 /* Displacement is not constant. */
14890 return false;
14891 else if (TARGET_64BIT
14892 && !x86_64_immediate_operand (disp, VOIDmode))
14893 /* Displacement is out of range. */
14894 return false;
14895 /* In x32 mode, constant addresses are sign extended to 64bit, so
14896 we have to prevent addresses from 0x80000000 to 0xffffffff. */
14897 else if (TARGET_X32 && !(index || base)
14898 && CONST_INT_P (disp)
14899 && val_signbit_known_set_p (SImode, INTVAL (disp)))
14900 return false;
14901 }
14902
14903 /* Everything looks valid. */
14904 return true;
14905 }
14906
14907 /* Determine if a given RTX is a valid constant address. */
14908
14909 bool
14910 constant_address_p (rtx x)
14911 {
14912 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
14913 }
14914 \f
14915 /* Return a unique alias set for the GOT. */
14916
14917 static alias_set_type
14918 ix86_GOT_alias_set (void)
14919 {
14920 static alias_set_type set = -1;
14921 if (set == -1)
14922 set = new_alias_set ();
14923 return set;
14924 }
14925
14926 /* Return a legitimate reference for ORIG (an address) using the
14927 register REG. If REG is 0, a new pseudo is generated.
14928
14929 There are two types of references that must be handled:
14930
14931 1. Global data references must load the address from the GOT, via
14932 the PIC reg. An insn is emitted to do this load, and the reg is
14933 returned.
14934
14935 2. Static data references, constant pool addresses, and code labels
14936 compute the address as an offset from the GOT, whose base is in
14937 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
14938 differentiate them from global data objects. The returned
14939 address is the PIC reg + an unspec constant.
14940
14941 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
14942 reg also appears in the address. */
14943
14944 static rtx
14945 legitimize_pic_address (rtx orig, rtx reg)
14946 {
14947 rtx addr = orig;
14948 rtx new_rtx = orig;
14949
14950 #if TARGET_MACHO
14951 if (TARGET_MACHO && !TARGET_64BIT)
14952 {
14953 if (reg == 0)
14954 reg = gen_reg_rtx (Pmode);
14955 /* Use the generic Mach-O PIC machinery. */
14956 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
14957 }
14958 #endif
14959
14960 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14961 {
14962 rtx tmp = legitimize_pe_coff_symbol (addr, true);
14963 if (tmp)
14964 return tmp;
14965 }
14966
14967 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
14968 new_rtx = addr;
14969 else if (TARGET_64BIT && !TARGET_PECOFF
14970 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
14971 {
14972 rtx tmpreg;
14973 /* This symbol may be referenced via a displacement from the PIC
14974 base address (@GOTOFF). */
14975
14976 if (GET_CODE (addr) == CONST)
14977 addr = XEXP (addr, 0);
14978 if (GET_CODE (addr) == PLUS)
14979 {
14980 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
14981 UNSPEC_GOTOFF);
14982 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
14983 }
14984 else
14985 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
14986 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
14987 if (!reg)
14988 tmpreg = gen_reg_rtx (Pmode);
14989 else
14990 tmpreg = reg;
14991 emit_move_insn (tmpreg, new_rtx);
14992
14993 if (reg != 0)
14994 {
14995 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
14996 tmpreg, 1, OPTAB_DIRECT);
14997 new_rtx = reg;
14998 }
14999 else
15000 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
15001 }
15002 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
15003 {
15004 /* This symbol may be referenced via a displacement from the PIC
15005 base address (@GOTOFF). */
15006
15007 if (GET_CODE (addr) == CONST)
15008 addr = XEXP (addr, 0);
15009 if (GET_CODE (addr) == PLUS)
15010 {
15011 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
15012 UNSPEC_GOTOFF);
15013 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
15014 }
15015 else
15016 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
15017 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15018 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
15019
15020 if (reg != 0)
15021 {
15022 emit_move_insn (reg, new_rtx);
15023 new_rtx = reg;
15024 }
15025 }
15026 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
15027 /* We can't use @GOTOFF for text labels on VxWorks;
15028 see gotoff_operand. */
15029 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
15030 {
15031 rtx tmp = legitimize_pe_coff_symbol (addr, true);
15032 if (tmp)
15033 return tmp;
15034
15035 /* For x64 PE-COFF there is no GOT table. So we use address
15036 directly. */
15037 if (TARGET_64BIT && TARGET_PECOFF)
15038 {
15039 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
15040 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15041
15042 if (reg == 0)
15043 reg = gen_reg_rtx (Pmode);
15044 emit_move_insn (reg, new_rtx);
15045 new_rtx = reg;
15046 }
15047 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
15048 {
15049 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
15050 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15051 new_rtx = gen_const_mem (Pmode, new_rtx);
15052 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
15053
15054 if (reg == 0)
15055 reg = gen_reg_rtx (Pmode);
15056 /* Use directly gen_movsi, otherwise the address is loaded
15057 into register for CSE. We don't want to CSE this addresses,
15058 instead we CSE addresses from the GOT table, so skip this. */
15059 emit_insn (gen_movsi (reg, new_rtx));
15060 new_rtx = reg;
15061 }
15062 else
15063 {
15064 /* This symbol must be referenced via a load from the
15065 Global Offset Table (@GOT). */
15066
15067 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
15068 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15069 if (TARGET_64BIT)
15070 new_rtx = force_reg (Pmode, new_rtx);
15071 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
15072 new_rtx = gen_const_mem (Pmode, new_rtx);
15073 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
15074
15075 if (reg == 0)
15076 reg = gen_reg_rtx (Pmode);
15077 emit_move_insn (reg, new_rtx);
15078 new_rtx = reg;
15079 }
15080 }
15081 else
15082 {
15083 if (CONST_INT_P (addr)
15084 && !x86_64_immediate_operand (addr, VOIDmode))
15085 {
15086 if (reg)
15087 {
15088 emit_move_insn (reg, addr);
15089 new_rtx = reg;
15090 }
15091 else
15092 new_rtx = force_reg (Pmode, addr);
15093 }
15094 else if (GET_CODE (addr) == CONST)
15095 {
15096 addr = XEXP (addr, 0);
15097
15098 /* We must match stuff we generate before. Assume the only
15099 unspecs that can get here are ours. Not that we could do
15100 anything with them anyway.... */
15101 if (GET_CODE (addr) == UNSPEC
15102 || (GET_CODE (addr) == PLUS
15103 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
15104 return orig;
15105 gcc_assert (GET_CODE (addr) == PLUS);
15106 }
15107 if (GET_CODE (addr) == PLUS)
15108 {
15109 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
15110
15111 /* Check first to see if this is a constant offset from a @GOTOFF
15112 symbol reference. */
15113 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
15114 && CONST_INT_P (op1))
15115 {
15116 if (!TARGET_64BIT)
15117 {
15118 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
15119 UNSPEC_GOTOFF);
15120 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
15121 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15122 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
15123
15124 if (reg != 0)
15125 {
15126 emit_move_insn (reg, new_rtx);
15127 new_rtx = reg;
15128 }
15129 }
15130 else
15131 {
15132 if (INTVAL (op1) < -16*1024*1024
15133 || INTVAL (op1) >= 16*1024*1024)
15134 {
15135 if (!x86_64_immediate_operand (op1, Pmode))
15136 op1 = force_reg (Pmode, op1);
15137 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
15138 }
15139 }
15140 }
15141 else
15142 {
15143 rtx base = legitimize_pic_address (op0, reg);
15144 machine_mode mode = GET_MODE (base);
15145 new_rtx
15146 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
15147
15148 if (CONST_INT_P (new_rtx))
15149 {
15150 if (INTVAL (new_rtx) < -16*1024*1024
15151 || INTVAL (new_rtx) >= 16*1024*1024)
15152 {
15153 if (!x86_64_immediate_operand (new_rtx, mode))
15154 new_rtx = force_reg (mode, new_rtx);
15155 new_rtx
15156 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
15157 }
15158 else
15159 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
15160 }
15161 else
15162 {
15163 /* For %rip addressing, we have to use just disp32, not
15164 base nor index. */
15165 if (TARGET_64BIT
15166 && (GET_CODE (base) == SYMBOL_REF
15167 || GET_CODE (base) == LABEL_REF))
15168 base = force_reg (mode, base);
15169 if (GET_CODE (new_rtx) == PLUS
15170 && CONSTANT_P (XEXP (new_rtx, 1)))
15171 {
15172 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
15173 new_rtx = XEXP (new_rtx, 1);
15174 }
15175 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
15176 }
15177 }
15178 }
15179 }
15180 return new_rtx;
15181 }
15182 \f
15183 /* Load the thread pointer. If TO_REG is true, force it into a register. */
15184
15185 static rtx
15186 get_thread_pointer (machine_mode tp_mode, bool to_reg)
15187 {
15188 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
15189
15190 if (GET_MODE (tp) != tp_mode)
15191 {
15192 gcc_assert (GET_MODE (tp) == SImode);
15193 gcc_assert (tp_mode == DImode);
15194
15195 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
15196 }
15197
15198 if (to_reg)
15199 tp = copy_to_mode_reg (tp_mode, tp);
15200
15201 return tp;
15202 }
15203
15204 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15205
15206 static GTY(()) rtx ix86_tls_symbol;
15207
15208 static rtx
15209 ix86_tls_get_addr (void)
15210 {
15211 if (!ix86_tls_symbol)
15212 {
15213 const char *sym
15214 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
15215 ? "___tls_get_addr" : "__tls_get_addr");
15216
15217 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
15218 }
15219
15220 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
15221 {
15222 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
15223 UNSPEC_PLTOFF);
15224 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
15225 gen_rtx_CONST (Pmode, unspec));
15226 }
15227
15228 return ix86_tls_symbol;
15229 }
15230
15231 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15232
15233 static GTY(()) rtx ix86_tls_module_base_symbol;
15234
15235 rtx
15236 ix86_tls_module_base (void)
15237 {
15238 if (!ix86_tls_module_base_symbol)
15239 {
15240 ix86_tls_module_base_symbol
15241 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
15242
15243 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
15244 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
15245 }
15246
15247 return ix86_tls_module_base_symbol;
15248 }
15249
15250 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
15251 false if we expect this to be used for a memory address and true if
15252 we expect to load the address into a register. */
15253
15254 static rtx
15255 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
15256 {
15257 rtx dest, base, off;
15258 rtx pic = NULL_RTX, tp = NULL_RTX;
15259 machine_mode tp_mode = Pmode;
15260 int type;
15261
15262 /* Fall back to global dynamic model if tool chain cannot support local
15263 dynamic. */
15264 if (TARGET_SUN_TLS && !TARGET_64BIT
15265 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
15266 && model == TLS_MODEL_LOCAL_DYNAMIC)
15267 model = TLS_MODEL_GLOBAL_DYNAMIC;
15268
15269 switch (model)
15270 {
15271 case TLS_MODEL_GLOBAL_DYNAMIC:
15272 dest = gen_reg_rtx (Pmode);
15273
15274 if (!TARGET_64BIT)
15275 {
15276 if (flag_pic && !TARGET_PECOFF)
15277 pic = pic_offset_table_rtx;
15278 else
15279 {
15280 pic = gen_reg_rtx (Pmode);
15281 emit_insn (gen_set_got (pic));
15282 }
15283 }
15284
15285 if (TARGET_GNU2_TLS)
15286 {
15287 if (TARGET_64BIT)
15288 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
15289 else
15290 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
15291
15292 tp = get_thread_pointer (Pmode, true);
15293 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
15294
15295 if (GET_MODE (x) != Pmode)
15296 x = gen_rtx_ZERO_EXTEND (Pmode, x);
15297
15298 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
15299 }
15300 else
15301 {
15302 rtx caddr = ix86_tls_get_addr ();
15303
15304 if (TARGET_64BIT)
15305 {
15306 rtx rax = gen_rtx_REG (Pmode, AX_REG);
15307 rtx_insn *insns;
15308
15309 start_sequence ();
15310 emit_call_insn
15311 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
15312 insns = get_insns ();
15313 end_sequence ();
15314
15315 if (GET_MODE (x) != Pmode)
15316 x = gen_rtx_ZERO_EXTEND (Pmode, x);
15317
15318 RTL_CONST_CALL_P (insns) = 1;
15319 emit_libcall_block (insns, dest, rax, x);
15320 }
15321 else
15322 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
15323 }
15324 break;
15325
15326 case TLS_MODEL_LOCAL_DYNAMIC:
15327 base = gen_reg_rtx (Pmode);
15328
15329 if (!TARGET_64BIT)
15330 {
15331 if (flag_pic)
15332 pic = pic_offset_table_rtx;
15333 else
15334 {
15335 pic = gen_reg_rtx (Pmode);
15336 emit_insn (gen_set_got (pic));
15337 }
15338 }
15339
15340 if (TARGET_GNU2_TLS)
15341 {
15342 rtx tmp = ix86_tls_module_base ();
15343
15344 if (TARGET_64BIT)
15345 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
15346 else
15347 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
15348
15349 tp = get_thread_pointer (Pmode, true);
15350 set_unique_reg_note (get_last_insn (), REG_EQUAL,
15351 gen_rtx_MINUS (Pmode, tmp, tp));
15352 }
15353 else
15354 {
15355 rtx caddr = ix86_tls_get_addr ();
15356
15357 if (TARGET_64BIT)
15358 {
15359 rtx rax = gen_rtx_REG (Pmode, AX_REG);
15360 rtx_insn *insns;
15361 rtx eqv;
15362
15363 start_sequence ();
15364 emit_call_insn
15365 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
15366 insns = get_insns ();
15367 end_sequence ();
15368
15369 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
15370 share the LD_BASE result with other LD model accesses. */
15371 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
15372 UNSPEC_TLS_LD_BASE);
15373
15374 RTL_CONST_CALL_P (insns) = 1;
15375 emit_libcall_block (insns, base, rax, eqv);
15376 }
15377 else
15378 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
15379 }
15380
15381 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
15382 off = gen_rtx_CONST (Pmode, off);
15383
15384 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
15385
15386 if (TARGET_GNU2_TLS)
15387 {
15388 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
15389
15390 if (GET_MODE (x) != Pmode)
15391 x = gen_rtx_ZERO_EXTEND (Pmode, x);
15392
15393 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
15394 }
15395 break;
15396
15397 case TLS_MODEL_INITIAL_EXEC:
15398 if (TARGET_64BIT)
15399 {
15400 if (TARGET_SUN_TLS && !TARGET_X32)
15401 {
15402 /* The Sun linker took the AMD64 TLS spec literally
15403 and can only handle %rax as destination of the
15404 initial executable code sequence. */
15405
15406 dest = gen_reg_rtx (DImode);
15407 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
15408 return dest;
15409 }
15410
15411 /* Generate DImode references to avoid %fs:(%reg32)
15412 problems and linker IE->LE relaxation bug. */
15413 tp_mode = DImode;
15414 pic = NULL;
15415 type = UNSPEC_GOTNTPOFF;
15416 }
15417 else if (flag_pic)
15418 {
15419 pic = pic_offset_table_rtx;
15420 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
15421 }
15422 else if (!TARGET_ANY_GNU_TLS)
15423 {
15424 pic = gen_reg_rtx (Pmode);
15425 emit_insn (gen_set_got (pic));
15426 type = UNSPEC_GOTTPOFF;
15427 }
15428 else
15429 {
15430 pic = NULL;
15431 type = UNSPEC_INDNTPOFF;
15432 }
15433
15434 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
15435 off = gen_rtx_CONST (tp_mode, off);
15436 if (pic)
15437 off = gen_rtx_PLUS (tp_mode, pic, off);
15438 off = gen_const_mem (tp_mode, off);
15439 set_mem_alias_set (off, ix86_GOT_alias_set ());
15440
15441 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
15442 {
15443 base = get_thread_pointer (tp_mode,
15444 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
15445 off = force_reg (tp_mode, off);
15446 return gen_rtx_PLUS (tp_mode, base, off);
15447 }
15448 else
15449 {
15450 base = get_thread_pointer (Pmode, true);
15451 dest = gen_reg_rtx (Pmode);
15452 emit_insn (ix86_gen_sub3 (dest, base, off));
15453 }
15454 break;
15455
15456 case TLS_MODEL_LOCAL_EXEC:
15457 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
15458 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
15459 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
15460 off = gen_rtx_CONST (Pmode, off);
15461
15462 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
15463 {
15464 base = get_thread_pointer (Pmode,
15465 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
15466 return gen_rtx_PLUS (Pmode, base, off);
15467 }
15468 else
15469 {
15470 base = get_thread_pointer (Pmode, true);
15471 dest = gen_reg_rtx (Pmode);
15472 emit_insn (ix86_gen_sub3 (dest, base, off));
15473 }
15474 break;
15475
15476 default:
15477 gcc_unreachable ();
15478 }
15479
15480 return dest;
15481 }
15482
15483 /* Create or return the unique __imp_DECL dllimport symbol corresponding
15484 to symbol DECL if BEIMPORT is true. Otherwise create or return the
15485 unique refptr-DECL symbol corresponding to symbol DECL. */
15486
15487 struct dllimport_hasher : ggc_cache_ptr_hash<tree_map>
15488 {
15489 static inline hashval_t hash (tree_map *m) { return m->hash; }
15490 static inline bool
15491 equal (tree_map *a, tree_map *b)
15492 {
15493 return a->base.from == b->base.from;
15494 }
15495
15496 static int
15497 keep_cache_entry (tree_map *&m)
15498 {
15499 return ggc_marked_p (m->base.from);
15500 }
15501 };
15502
15503 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
15504
15505 static tree
15506 get_dllimport_decl (tree decl, bool beimport)
15507 {
15508 struct tree_map *h, in;
15509 const char *name;
15510 const char *prefix;
15511 size_t namelen, prefixlen;
15512 char *imp_name;
15513 tree to;
15514 rtx rtl;
15515
15516 if (!dllimport_map)
15517 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
15518
15519 in.hash = htab_hash_pointer (decl);
15520 in.base.from = decl;
15521 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
15522 h = *loc;
15523 if (h)
15524 return h->to;
15525
15526 *loc = h = ggc_alloc<tree_map> ();
15527 h->hash = in.hash;
15528 h->base.from = decl;
15529 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
15530 VAR_DECL, NULL, ptr_type_node);
15531 DECL_ARTIFICIAL (to) = 1;
15532 DECL_IGNORED_P (to) = 1;
15533 DECL_EXTERNAL (to) = 1;
15534 TREE_READONLY (to) = 1;
15535
15536 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
15537 name = targetm.strip_name_encoding (name);
15538 if (beimport)
15539 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
15540 ? "*__imp_" : "*__imp__";
15541 else
15542 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
15543 namelen = strlen (name);
15544 prefixlen = strlen (prefix);
15545 imp_name = (char *) alloca (namelen + prefixlen + 1);
15546 memcpy (imp_name, prefix, prefixlen);
15547 memcpy (imp_name + prefixlen, name, namelen + 1);
15548
15549 name = ggc_alloc_string (imp_name, namelen + prefixlen);
15550 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
15551 SET_SYMBOL_REF_DECL (rtl, to);
15552 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
15553 if (!beimport)
15554 {
15555 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
15556 #ifdef SUB_TARGET_RECORD_STUB
15557 SUB_TARGET_RECORD_STUB (name);
15558 #endif
15559 }
15560
15561 rtl = gen_const_mem (Pmode, rtl);
15562 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
15563
15564 SET_DECL_RTL (to, rtl);
15565 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
15566
15567 return to;
15568 }
15569
15570 /* Expand SYMBOL into its corresponding far-addresse symbol.
15571 WANT_REG is true if we require the result be a register. */
15572
15573 static rtx
15574 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
15575 {
15576 tree imp_decl;
15577 rtx x;
15578
15579 gcc_assert (SYMBOL_REF_DECL (symbol));
15580 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
15581
15582 x = DECL_RTL (imp_decl);
15583 if (want_reg)
15584 x = force_reg (Pmode, x);
15585 return x;
15586 }
15587
15588 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
15589 true if we require the result be a register. */
15590
15591 static rtx
15592 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
15593 {
15594 tree imp_decl;
15595 rtx x;
15596
15597 gcc_assert (SYMBOL_REF_DECL (symbol));
15598 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
15599
15600 x = DECL_RTL (imp_decl);
15601 if (want_reg)
15602 x = force_reg (Pmode, x);
15603 return x;
15604 }
15605
15606 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
15607 is true if we require the result be a register. */
15608
15609 static rtx
15610 legitimize_pe_coff_symbol (rtx addr, bool inreg)
15611 {
15612 if (!TARGET_PECOFF)
15613 return NULL_RTX;
15614
15615 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
15616 {
15617 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
15618 return legitimize_dllimport_symbol (addr, inreg);
15619 if (GET_CODE (addr) == CONST
15620 && GET_CODE (XEXP (addr, 0)) == PLUS
15621 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
15622 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
15623 {
15624 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
15625 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
15626 }
15627 }
15628
15629 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
15630 return NULL_RTX;
15631 if (GET_CODE (addr) == SYMBOL_REF
15632 && !is_imported_p (addr)
15633 && SYMBOL_REF_EXTERNAL_P (addr)
15634 && SYMBOL_REF_DECL (addr))
15635 return legitimize_pe_coff_extern_decl (addr, inreg);
15636
15637 if (GET_CODE (addr) == CONST
15638 && GET_CODE (XEXP (addr, 0)) == PLUS
15639 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
15640 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
15641 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
15642 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
15643 {
15644 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
15645 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
15646 }
15647 return NULL_RTX;
15648 }
15649
15650 /* Try machine-dependent ways of modifying an illegitimate address
15651 to be legitimate. If we find one, return the new, valid address.
15652 This macro is used in only one place: `memory_address' in explow.c.
15653
15654 OLDX is the address as it was before break_out_memory_refs was called.
15655 In some cases it is useful to look at this to decide what needs to be done.
15656
15657 It is always safe for this macro to do nothing. It exists to recognize
15658 opportunities to optimize the output.
15659
15660 For the 80386, we handle X+REG by loading X into a register R and
15661 using R+REG. R will go in a general reg and indexing will be used.
15662 However, if REG is a broken-out memory address or multiplication,
15663 nothing needs to be done because REG can certainly go in a general reg.
15664
15665 When -fpic is used, special handling is needed for symbolic references.
15666 See comments by legitimize_pic_address in i386.c for details. */
15667
15668 static rtx
15669 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
15670 {
15671 bool changed = false;
15672 unsigned log;
15673
15674 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
15675 if (log)
15676 return legitimize_tls_address (x, (enum tls_model) log, false);
15677 if (GET_CODE (x) == CONST
15678 && GET_CODE (XEXP (x, 0)) == PLUS
15679 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
15680 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
15681 {
15682 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
15683 (enum tls_model) log, false);
15684 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
15685 }
15686
15687 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
15688 {
15689 rtx tmp = legitimize_pe_coff_symbol (x, true);
15690 if (tmp)
15691 return tmp;
15692 }
15693
15694 if (flag_pic && SYMBOLIC_CONST (x))
15695 return legitimize_pic_address (x, 0);
15696
15697 #if TARGET_MACHO
15698 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
15699 return machopic_indirect_data_reference (x, 0);
15700 #endif
15701
15702 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
15703 if (GET_CODE (x) == ASHIFT
15704 && CONST_INT_P (XEXP (x, 1))
15705 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
15706 {
15707 changed = true;
15708 log = INTVAL (XEXP (x, 1));
15709 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
15710 GEN_INT (1 << log));
15711 }
15712
15713 if (GET_CODE (x) == PLUS)
15714 {
15715 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
15716
15717 if (GET_CODE (XEXP (x, 0)) == ASHIFT
15718 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
15719 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
15720 {
15721 changed = true;
15722 log = INTVAL (XEXP (XEXP (x, 0), 1));
15723 XEXP (x, 0) = gen_rtx_MULT (Pmode,
15724 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
15725 GEN_INT (1 << log));
15726 }
15727
15728 if (GET_CODE (XEXP (x, 1)) == ASHIFT
15729 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
15730 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
15731 {
15732 changed = true;
15733 log = INTVAL (XEXP (XEXP (x, 1), 1));
15734 XEXP (x, 1) = gen_rtx_MULT (Pmode,
15735 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
15736 GEN_INT (1 << log));
15737 }
15738
15739 /* Put multiply first if it isn't already. */
15740 if (GET_CODE (XEXP (x, 1)) == MULT)
15741 {
15742 std::swap (XEXP (x, 0), XEXP (x, 1));
15743 changed = true;
15744 }
15745
15746 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
15747 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
15748 created by virtual register instantiation, register elimination, and
15749 similar optimizations. */
15750 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
15751 {
15752 changed = true;
15753 x = gen_rtx_PLUS (Pmode,
15754 gen_rtx_PLUS (Pmode, XEXP (x, 0),
15755 XEXP (XEXP (x, 1), 0)),
15756 XEXP (XEXP (x, 1), 1));
15757 }
15758
15759 /* Canonicalize
15760 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
15761 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
15762 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
15763 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15764 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
15765 && CONSTANT_P (XEXP (x, 1)))
15766 {
15767 rtx constant;
15768 rtx other = NULL_RTX;
15769
15770 if (CONST_INT_P (XEXP (x, 1)))
15771 {
15772 constant = XEXP (x, 1);
15773 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
15774 }
15775 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
15776 {
15777 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
15778 other = XEXP (x, 1);
15779 }
15780 else
15781 constant = 0;
15782
15783 if (constant)
15784 {
15785 changed = true;
15786 x = gen_rtx_PLUS (Pmode,
15787 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
15788 XEXP (XEXP (XEXP (x, 0), 1), 0)),
15789 plus_constant (Pmode, other,
15790 INTVAL (constant)));
15791 }
15792 }
15793
15794 if (changed && ix86_legitimate_address_p (mode, x, false))
15795 return x;
15796
15797 if (GET_CODE (XEXP (x, 0)) == MULT)
15798 {
15799 changed = true;
15800 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
15801 }
15802
15803 if (GET_CODE (XEXP (x, 1)) == MULT)
15804 {
15805 changed = true;
15806 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
15807 }
15808
15809 if (changed
15810 && REG_P (XEXP (x, 1))
15811 && REG_P (XEXP (x, 0)))
15812 return x;
15813
15814 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
15815 {
15816 changed = true;
15817 x = legitimize_pic_address (x, 0);
15818 }
15819
15820 if (changed && ix86_legitimate_address_p (mode, x, false))
15821 return x;
15822
15823 if (REG_P (XEXP (x, 0)))
15824 {
15825 rtx temp = gen_reg_rtx (Pmode);
15826 rtx val = force_operand (XEXP (x, 1), temp);
15827 if (val != temp)
15828 {
15829 val = convert_to_mode (Pmode, val, 1);
15830 emit_move_insn (temp, val);
15831 }
15832
15833 XEXP (x, 1) = temp;
15834 return x;
15835 }
15836
15837 else if (REG_P (XEXP (x, 1)))
15838 {
15839 rtx temp = gen_reg_rtx (Pmode);
15840 rtx val = force_operand (XEXP (x, 0), temp);
15841 if (val != temp)
15842 {
15843 val = convert_to_mode (Pmode, val, 1);
15844 emit_move_insn (temp, val);
15845 }
15846
15847 XEXP (x, 0) = temp;
15848 return x;
15849 }
15850 }
15851
15852 return x;
15853 }
15854 \f
15855 /* Print an integer constant expression in assembler syntax. Addition
15856 and subtraction are the only arithmetic that may appear in these
15857 expressions. FILE is the stdio stream to write to, X is the rtx, and
15858 CODE is the operand print code from the output string. */
15859
15860 static void
15861 output_pic_addr_const (FILE *file, rtx x, int code)
15862 {
15863 char buf[256];
15864
15865 switch (GET_CODE (x))
15866 {
15867 case PC:
15868 gcc_assert (flag_pic);
15869 putc ('.', file);
15870 break;
15871
15872 case SYMBOL_REF:
15873 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
15874 output_addr_const (file, x);
15875 else
15876 {
15877 const char *name = XSTR (x, 0);
15878
15879 /* Mark the decl as referenced so that cgraph will
15880 output the function. */
15881 if (SYMBOL_REF_DECL (x))
15882 mark_decl_referenced (SYMBOL_REF_DECL (x));
15883
15884 #if TARGET_MACHO
15885 if (MACHOPIC_INDIRECT
15886 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
15887 name = machopic_indirection_name (x, /*stub_p=*/true);
15888 #endif
15889 assemble_name (file, name);
15890 }
15891 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
15892 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
15893 fputs ("@PLT", file);
15894 break;
15895
15896 case LABEL_REF:
15897 x = XEXP (x, 0);
15898 /* FALLTHRU */
15899 case CODE_LABEL:
15900 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
15901 assemble_name (asm_out_file, buf);
15902 break;
15903
15904 case CONST_INT:
15905 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15906 break;
15907
15908 case CONST:
15909 /* This used to output parentheses around the expression,
15910 but that does not work on the 386 (either ATT or BSD assembler). */
15911 output_pic_addr_const (file, XEXP (x, 0), code);
15912 break;
15913
15914 case CONST_DOUBLE:
15915 /* We can't handle floating point constants;
15916 TARGET_PRINT_OPERAND must handle them. */
15917 output_operand_lossage ("floating constant misused");
15918 break;
15919
15920 case PLUS:
15921 /* Some assemblers need integer constants to appear first. */
15922 if (CONST_INT_P (XEXP (x, 0)))
15923 {
15924 output_pic_addr_const (file, XEXP (x, 0), code);
15925 putc ('+', file);
15926 output_pic_addr_const (file, XEXP (x, 1), code);
15927 }
15928 else
15929 {
15930 gcc_assert (CONST_INT_P (XEXP (x, 1)));
15931 output_pic_addr_const (file, XEXP (x, 1), code);
15932 putc ('+', file);
15933 output_pic_addr_const (file, XEXP (x, 0), code);
15934 }
15935 break;
15936
15937 case MINUS:
15938 if (!TARGET_MACHO)
15939 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
15940 output_pic_addr_const (file, XEXP (x, 0), code);
15941 putc ('-', file);
15942 output_pic_addr_const (file, XEXP (x, 1), code);
15943 if (!TARGET_MACHO)
15944 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
15945 break;
15946
15947 case UNSPEC:
15948 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
15949 {
15950 bool f = i386_asm_output_addr_const_extra (file, x);
15951 gcc_assert (f);
15952 break;
15953 }
15954
15955 gcc_assert (XVECLEN (x, 0) == 1);
15956 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
15957 switch (XINT (x, 1))
15958 {
15959 case UNSPEC_GOT:
15960 fputs ("@GOT", file);
15961 break;
15962 case UNSPEC_GOTOFF:
15963 fputs ("@GOTOFF", file);
15964 break;
15965 case UNSPEC_PLTOFF:
15966 fputs ("@PLTOFF", file);
15967 break;
15968 case UNSPEC_PCREL:
15969 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15970 "(%rip)" : "[rip]", file);
15971 break;
15972 case UNSPEC_GOTPCREL:
15973 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15974 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
15975 break;
15976 case UNSPEC_GOTTPOFF:
15977 /* FIXME: This might be @TPOFF in Sun ld too. */
15978 fputs ("@gottpoff", file);
15979 break;
15980 case UNSPEC_TPOFF:
15981 fputs ("@tpoff", file);
15982 break;
15983 case UNSPEC_NTPOFF:
15984 if (TARGET_64BIT)
15985 fputs ("@tpoff", file);
15986 else
15987 fputs ("@ntpoff", file);
15988 break;
15989 case UNSPEC_DTPOFF:
15990 fputs ("@dtpoff", file);
15991 break;
15992 case UNSPEC_GOTNTPOFF:
15993 if (TARGET_64BIT)
15994 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15995 "@gottpoff(%rip)": "@gottpoff[rip]", file);
15996 else
15997 fputs ("@gotntpoff", file);
15998 break;
15999 case UNSPEC_INDNTPOFF:
16000 fputs ("@indntpoff", file);
16001 break;
16002 #if TARGET_MACHO
16003 case UNSPEC_MACHOPIC_OFFSET:
16004 putc ('-', file);
16005 machopic_output_function_base_name (file);
16006 break;
16007 #endif
16008 default:
16009 output_operand_lossage ("invalid UNSPEC as operand");
16010 break;
16011 }
16012 break;
16013
16014 default:
16015 output_operand_lossage ("invalid expression as operand");
16016 }
16017 }
16018
16019 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
16020 We need to emit DTP-relative relocations. */
16021
16022 static void ATTRIBUTE_UNUSED
16023 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
16024 {
16025 fputs (ASM_LONG, file);
16026 output_addr_const (file, x);
16027 fputs ("@dtpoff", file);
16028 switch (size)
16029 {
16030 case 4:
16031 break;
16032 case 8:
16033 fputs (", 0", file);
16034 break;
16035 default:
16036 gcc_unreachable ();
16037 }
16038 }
16039
16040 /* Return true if X is a representation of the PIC register. This copes
16041 with calls from ix86_find_base_term, where the register might have
16042 been replaced by a cselib value. */
16043
16044 static bool
16045 ix86_pic_register_p (rtx x)
16046 {
16047 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
16048 return (pic_offset_table_rtx
16049 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
16050 else if (!REG_P (x))
16051 return false;
16052 else if (pic_offset_table_rtx)
16053 {
16054 if (REGNO (x) == REGNO (pic_offset_table_rtx))
16055 return true;
16056 if (HARD_REGISTER_P (x)
16057 && !HARD_REGISTER_P (pic_offset_table_rtx)
16058 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
16059 return true;
16060 return false;
16061 }
16062 else
16063 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
16064 }
16065
16066 /* Helper function for ix86_delegitimize_address.
16067 Attempt to delegitimize TLS local-exec accesses. */
16068
16069 static rtx
16070 ix86_delegitimize_tls_address (rtx orig_x)
16071 {
16072 rtx x = orig_x, unspec;
16073 struct ix86_address addr;
16074
16075 if (!TARGET_TLS_DIRECT_SEG_REFS)
16076 return orig_x;
16077 if (MEM_P (x))
16078 x = XEXP (x, 0);
16079 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
16080 return orig_x;
16081 if (ix86_decompose_address (x, &addr) == 0
16082 || addr.seg != DEFAULT_TLS_SEG_REG
16083 || addr.disp == NULL_RTX
16084 || GET_CODE (addr.disp) != CONST)
16085 return orig_x;
16086 unspec = XEXP (addr.disp, 0);
16087 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
16088 unspec = XEXP (unspec, 0);
16089 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
16090 return orig_x;
16091 x = XVECEXP (unspec, 0, 0);
16092 gcc_assert (GET_CODE (x) == SYMBOL_REF);
16093 if (unspec != XEXP (addr.disp, 0))
16094 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
16095 if (addr.index)
16096 {
16097 rtx idx = addr.index;
16098 if (addr.scale != 1)
16099 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
16100 x = gen_rtx_PLUS (Pmode, idx, x);
16101 }
16102 if (addr.base)
16103 x = gen_rtx_PLUS (Pmode, addr.base, x);
16104 if (MEM_P (orig_x))
16105 x = replace_equiv_address_nv (orig_x, x);
16106 return x;
16107 }
16108
16109 /* In the name of slightly smaller debug output, and to cater to
16110 general assembler lossage, recognize PIC+GOTOFF and turn it back
16111 into a direct symbol reference.
16112
16113 On Darwin, this is necessary to avoid a crash, because Darwin
16114 has a different PIC label for each routine but the DWARF debugging
16115 information is not associated with any particular routine, so it's
16116 necessary to remove references to the PIC label from RTL stored by
16117 the DWARF output code. */
16118
16119 static rtx
16120 ix86_delegitimize_address (rtx x)
16121 {
16122 rtx orig_x = delegitimize_mem_from_attrs (x);
16123 /* addend is NULL or some rtx if x is something+GOTOFF where
16124 something doesn't include the PIC register. */
16125 rtx addend = NULL_RTX;
16126 /* reg_addend is NULL or a multiple of some register. */
16127 rtx reg_addend = NULL_RTX;
16128 /* const_addend is NULL or a const_int. */
16129 rtx const_addend = NULL_RTX;
16130 /* This is the result, or NULL. */
16131 rtx result = NULL_RTX;
16132
16133 x = orig_x;
16134
16135 if (MEM_P (x))
16136 x = XEXP (x, 0);
16137
16138 if (TARGET_64BIT)
16139 {
16140 if (GET_CODE (x) == CONST
16141 && GET_CODE (XEXP (x, 0)) == PLUS
16142 && GET_MODE (XEXP (x, 0)) == Pmode
16143 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
16144 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
16145 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
16146 {
16147 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
16148 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
16149 if (MEM_P (orig_x))
16150 x = replace_equiv_address_nv (orig_x, x);
16151 return x;
16152 }
16153
16154 if (GET_CODE (x) == CONST
16155 && GET_CODE (XEXP (x, 0)) == UNSPEC
16156 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
16157 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
16158 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
16159 {
16160 x = XVECEXP (XEXP (x, 0), 0, 0);
16161 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
16162 {
16163 x = simplify_gen_subreg (GET_MODE (orig_x), x,
16164 GET_MODE (x), 0);
16165 if (x == NULL_RTX)
16166 return orig_x;
16167 }
16168 return x;
16169 }
16170
16171 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
16172 return ix86_delegitimize_tls_address (orig_x);
16173
16174 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
16175 and -mcmodel=medium -fpic. */
16176 }
16177
16178 if (GET_CODE (x) != PLUS
16179 || GET_CODE (XEXP (x, 1)) != CONST)
16180 return ix86_delegitimize_tls_address (orig_x);
16181
16182 if (ix86_pic_register_p (XEXP (x, 0)))
16183 /* %ebx + GOT/GOTOFF */
16184 ;
16185 else if (GET_CODE (XEXP (x, 0)) == PLUS)
16186 {
16187 /* %ebx + %reg * scale + GOT/GOTOFF */
16188 reg_addend = XEXP (x, 0);
16189 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
16190 reg_addend = XEXP (reg_addend, 1);
16191 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
16192 reg_addend = XEXP (reg_addend, 0);
16193 else
16194 {
16195 reg_addend = NULL_RTX;
16196 addend = XEXP (x, 0);
16197 }
16198 }
16199 else
16200 addend = XEXP (x, 0);
16201
16202 x = XEXP (XEXP (x, 1), 0);
16203 if (GET_CODE (x) == PLUS
16204 && CONST_INT_P (XEXP (x, 1)))
16205 {
16206 const_addend = XEXP (x, 1);
16207 x = XEXP (x, 0);
16208 }
16209
16210 if (GET_CODE (x) == UNSPEC
16211 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
16212 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
16213 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
16214 && !MEM_P (orig_x) && !addend)))
16215 result = XVECEXP (x, 0, 0);
16216
16217 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
16218 && !MEM_P (orig_x))
16219 result = XVECEXP (x, 0, 0);
16220
16221 if (! result)
16222 return ix86_delegitimize_tls_address (orig_x);
16223
16224 if (const_addend)
16225 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
16226 if (reg_addend)
16227 result = gen_rtx_PLUS (Pmode, reg_addend, result);
16228 if (addend)
16229 {
16230 /* If the rest of original X doesn't involve the PIC register, add
16231 addend and subtract pic_offset_table_rtx. This can happen e.g.
16232 for code like:
16233 leal (%ebx, %ecx, 4), %ecx
16234 ...
16235 movl foo@GOTOFF(%ecx), %edx
16236 in which case we return (%ecx - %ebx) + foo
16237 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
16238 and reload has completed. */
16239 if (pic_offset_table_rtx
16240 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
16241 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
16242 pic_offset_table_rtx),
16243 result);
16244 else if (pic_offset_table_rtx && !TARGET_MACHO && !TARGET_VXWORKS_RTP)
16245 {
16246 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
16247 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
16248 result = gen_rtx_PLUS (Pmode, tmp, result);
16249 }
16250 else
16251 return orig_x;
16252 }
16253 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
16254 {
16255 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
16256 if (result == NULL_RTX)
16257 return orig_x;
16258 }
16259 return result;
16260 }
16261
16262 /* If X is a machine specific address (i.e. a symbol or label being
16263 referenced as a displacement from the GOT implemented using an
16264 UNSPEC), then return the base term. Otherwise return X. */
16265
16266 rtx
16267 ix86_find_base_term (rtx x)
16268 {
16269 rtx term;
16270
16271 if (TARGET_64BIT)
16272 {
16273 if (GET_CODE (x) != CONST)
16274 return x;
16275 term = XEXP (x, 0);
16276 if (GET_CODE (term) == PLUS
16277 && CONST_INT_P (XEXP (term, 1)))
16278 term = XEXP (term, 0);
16279 if (GET_CODE (term) != UNSPEC
16280 || (XINT (term, 1) != UNSPEC_GOTPCREL
16281 && XINT (term, 1) != UNSPEC_PCREL))
16282 return x;
16283
16284 return XVECEXP (term, 0, 0);
16285 }
16286
16287 return ix86_delegitimize_address (x);
16288 }
16289 \f
16290 static void
16291 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
16292 bool fp, FILE *file)
16293 {
16294 const char *suffix;
16295
16296 if (mode == CCFPmode || mode == CCFPUmode)
16297 {
16298 code = ix86_fp_compare_code_to_integer (code);
16299 mode = CCmode;
16300 }
16301 if (reverse)
16302 code = reverse_condition (code);
16303
16304 switch (code)
16305 {
16306 case EQ:
16307 switch (mode)
16308 {
16309 case CCAmode:
16310 suffix = "a";
16311 break;
16312 case CCCmode:
16313 suffix = "c";
16314 break;
16315 case CCOmode:
16316 suffix = "o";
16317 break;
16318 case CCPmode:
16319 suffix = "p";
16320 break;
16321 case CCSmode:
16322 suffix = "s";
16323 break;
16324 default:
16325 suffix = "e";
16326 break;
16327 }
16328 break;
16329 case NE:
16330 switch (mode)
16331 {
16332 case CCAmode:
16333 suffix = "na";
16334 break;
16335 case CCCmode:
16336 suffix = "nc";
16337 break;
16338 case CCOmode:
16339 suffix = "no";
16340 break;
16341 case CCPmode:
16342 suffix = "np";
16343 break;
16344 case CCSmode:
16345 suffix = "ns";
16346 break;
16347 default:
16348 suffix = "ne";
16349 break;
16350 }
16351 break;
16352 case GT:
16353 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
16354 suffix = "g";
16355 break;
16356 case GTU:
16357 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
16358 Those same assemblers have the same but opposite lossage on cmov. */
16359 if (mode == CCmode)
16360 suffix = fp ? "nbe" : "a";
16361 else
16362 gcc_unreachable ();
16363 break;
16364 case LT:
16365 switch (mode)
16366 {
16367 case CCNOmode:
16368 case CCGOCmode:
16369 suffix = "s";
16370 break;
16371
16372 case CCmode:
16373 case CCGCmode:
16374 suffix = "l";
16375 break;
16376
16377 default:
16378 gcc_unreachable ();
16379 }
16380 break;
16381 case LTU:
16382 if (mode == CCmode)
16383 suffix = "b";
16384 else if (mode == CCCmode)
16385 suffix = fp ? "b" : "c";
16386 else
16387 gcc_unreachable ();
16388 break;
16389 case GE:
16390 switch (mode)
16391 {
16392 case CCNOmode:
16393 case CCGOCmode:
16394 suffix = "ns";
16395 break;
16396
16397 case CCmode:
16398 case CCGCmode:
16399 suffix = "ge";
16400 break;
16401
16402 default:
16403 gcc_unreachable ();
16404 }
16405 break;
16406 case GEU:
16407 if (mode == CCmode)
16408 suffix = "nb";
16409 else if (mode == CCCmode)
16410 suffix = fp ? "nb" : "nc";
16411 else
16412 gcc_unreachable ();
16413 break;
16414 case LE:
16415 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
16416 suffix = "le";
16417 break;
16418 case LEU:
16419 if (mode == CCmode)
16420 suffix = "be";
16421 else
16422 gcc_unreachable ();
16423 break;
16424 case UNORDERED:
16425 suffix = fp ? "u" : "p";
16426 break;
16427 case ORDERED:
16428 suffix = fp ? "nu" : "np";
16429 break;
16430 default:
16431 gcc_unreachable ();
16432 }
16433 fputs (suffix, file);
16434 }
16435
16436 /* Print the name of register X to FILE based on its machine mode and number.
16437 If CODE is 'w', pretend the mode is HImode.
16438 If CODE is 'b', pretend the mode is QImode.
16439 If CODE is 'k', pretend the mode is SImode.
16440 If CODE is 'q', pretend the mode is DImode.
16441 If CODE is 'x', pretend the mode is V4SFmode.
16442 If CODE is 't', pretend the mode is V8SFmode.
16443 If CODE is 'g', pretend the mode is V16SFmode.
16444 If CODE is 'h', pretend the reg is the 'high' byte register.
16445 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
16446 If CODE is 'd', duplicate the operand for AVX instruction.
16447 */
16448
16449 void
16450 print_reg (rtx x, int code, FILE *file)
16451 {
16452 const char *reg;
16453 int msize;
16454 unsigned int regno;
16455 bool duplicated;
16456
16457 if (ASSEMBLER_DIALECT == ASM_ATT)
16458 putc ('%', file);
16459
16460 if (x == pc_rtx)
16461 {
16462 gcc_assert (TARGET_64BIT);
16463 fputs ("rip", file);
16464 return;
16465 }
16466
16467 if (code == 'y' && STACK_TOP_P (x))
16468 {
16469 fputs ("st(0)", file);
16470 return;
16471 }
16472
16473 if (code == 'w')
16474 msize = 2;
16475 else if (code == 'b')
16476 msize = 1;
16477 else if (code == 'k')
16478 msize = 4;
16479 else if (code == 'q')
16480 msize = 8;
16481 else if (code == 'h')
16482 msize = 0;
16483 else if (code == 'x')
16484 msize = 16;
16485 else if (code == 't')
16486 msize = 32;
16487 else if (code == 'g')
16488 msize = 64;
16489 else
16490 msize = GET_MODE_SIZE (GET_MODE (x));
16491
16492 regno = true_regnum (x);
16493
16494 gcc_assert (regno != ARG_POINTER_REGNUM
16495 && regno != FRAME_POINTER_REGNUM
16496 && regno != FLAGS_REG
16497 && regno != FPSR_REG
16498 && regno != FPCR_REG);
16499
16500 duplicated = code == 'd' && TARGET_AVX;
16501
16502 switch (msize)
16503 {
16504 case 8:
16505 case 4:
16506 if (LEGACY_INT_REGNO_P (regno))
16507 putc (msize == 8 && TARGET_64BIT ? 'r' : 'e', file);
16508 case 16:
16509 case 12:
16510 case 2:
16511 normal:
16512 reg = hi_reg_name[regno];
16513 break;
16514 case 1:
16515 if (regno >= ARRAY_SIZE (qi_reg_name))
16516 goto normal;
16517 reg = qi_reg_name[regno];
16518 break;
16519 case 0:
16520 if (regno >= ARRAY_SIZE (qi_high_reg_name))
16521 goto normal;
16522 reg = qi_high_reg_name[regno];
16523 break;
16524 case 32:
16525 case 64:
16526 if (SSE_REGNO_P (regno))
16527 {
16528 gcc_assert (!duplicated);
16529 putc (msize == 32 ? 'y' : 'z', file);
16530 reg = hi_reg_name[regno] + 1;
16531 break;
16532 }
16533 goto normal;
16534 default:
16535 gcc_unreachable ();
16536 }
16537
16538 fputs (reg, file);
16539
16540 /* Irritatingly, AMD extended registers use
16541 different naming convention: "r%d[bwd]" */
16542 if (REX_INT_REGNO_P (regno))
16543 {
16544 gcc_assert (TARGET_64BIT);
16545 switch (msize)
16546 {
16547 case 0:
16548 error ("extended registers have no high halves");
16549 break;
16550 case 1:
16551 putc ('b', file);
16552 break;
16553 case 2:
16554 putc ('w', file);
16555 break;
16556 case 4:
16557 putc ('d', file);
16558 break;
16559 case 8:
16560 /* no suffix */
16561 break;
16562 default:
16563 error ("unsupported operand size for extended register");
16564 break;
16565 }
16566 return;
16567 }
16568
16569 if (duplicated)
16570 {
16571 if (ASSEMBLER_DIALECT == ASM_ATT)
16572 fprintf (file, ", %%%s", reg);
16573 else
16574 fprintf (file, ", %s", reg);
16575 }
16576 }
16577
16578 /* Meaning of CODE:
16579 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
16580 C -- print opcode suffix for set/cmov insn.
16581 c -- like C, but print reversed condition
16582 F,f -- likewise, but for floating-point.
16583 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
16584 otherwise nothing
16585 R -- print embeded rounding and sae.
16586 r -- print only sae.
16587 z -- print the opcode suffix for the size of the current operand.
16588 Z -- likewise, with special suffixes for x87 instructions.
16589 * -- print a star (in certain assembler syntax)
16590 A -- print an absolute memory reference.
16591 E -- print address with DImode register names if TARGET_64BIT.
16592 w -- print the operand as if it's a "word" (HImode) even if it isn't.
16593 s -- print a shift double count, followed by the assemblers argument
16594 delimiter.
16595 b -- print the QImode name of the register for the indicated operand.
16596 %b0 would print %al if operands[0] is reg 0.
16597 w -- likewise, print the HImode name of the register.
16598 k -- likewise, print the SImode name of the register.
16599 q -- likewise, print the DImode name of the register.
16600 x -- likewise, print the V4SFmode name of the register.
16601 t -- likewise, print the V8SFmode name of the register.
16602 g -- likewise, print the V16SFmode name of the register.
16603 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
16604 y -- print "st(0)" instead of "st" as a register.
16605 d -- print duplicated register operand for AVX instruction.
16606 D -- print condition for SSE cmp instruction.
16607 P -- if PIC, print an @PLT suffix.
16608 p -- print raw symbol name.
16609 X -- don't print any sort of PIC '@' suffix for a symbol.
16610 & -- print some in-use local-dynamic symbol name.
16611 H -- print a memory address offset by 8; used for sse high-parts
16612 Y -- print condition for XOP pcom* instruction.
16613 + -- print a branch hint as 'cs' or 'ds' prefix
16614 ; -- print a semicolon (after prefixes due to bug in older gas).
16615 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
16616 @ -- print a segment register of thread base pointer load
16617 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
16618 ! -- print MPX prefix for jxx/call/ret instructions if required.
16619 */
16620
16621 void
16622 ix86_print_operand (FILE *file, rtx x, int code)
16623 {
16624 if (code)
16625 {
16626 switch (code)
16627 {
16628 case 'A':
16629 switch (ASSEMBLER_DIALECT)
16630 {
16631 case ASM_ATT:
16632 putc ('*', file);
16633 break;
16634
16635 case ASM_INTEL:
16636 /* Intel syntax. For absolute addresses, registers should not
16637 be surrounded by braces. */
16638 if (!REG_P (x))
16639 {
16640 putc ('[', file);
16641 ix86_print_operand (file, x, 0);
16642 putc (']', file);
16643 return;
16644 }
16645 break;
16646
16647 default:
16648 gcc_unreachable ();
16649 }
16650
16651 ix86_print_operand (file, x, 0);
16652 return;
16653
16654 case 'E':
16655 /* Wrap address in an UNSPEC to declare special handling. */
16656 if (TARGET_64BIT)
16657 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
16658
16659 output_address (VOIDmode, x);
16660 return;
16661
16662 case 'L':
16663 if (ASSEMBLER_DIALECT == ASM_ATT)
16664 putc ('l', file);
16665 return;
16666
16667 case 'W':
16668 if (ASSEMBLER_DIALECT == ASM_ATT)
16669 putc ('w', file);
16670 return;
16671
16672 case 'B':
16673 if (ASSEMBLER_DIALECT == ASM_ATT)
16674 putc ('b', file);
16675 return;
16676
16677 case 'Q':
16678 if (ASSEMBLER_DIALECT == ASM_ATT)
16679 putc ('l', file);
16680 return;
16681
16682 case 'S':
16683 if (ASSEMBLER_DIALECT == ASM_ATT)
16684 putc ('s', file);
16685 return;
16686
16687 case 'T':
16688 if (ASSEMBLER_DIALECT == ASM_ATT)
16689 putc ('t', file);
16690 return;
16691
16692 case 'O':
16693 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
16694 if (ASSEMBLER_DIALECT != ASM_ATT)
16695 return;
16696
16697 switch (GET_MODE_SIZE (GET_MODE (x)))
16698 {
16699 case 2:
16700 putc ('w', file);
16701 break;
16702
16703 case 4:
16704 putc ('l', file);
16705 break;
16706
16707 case 8:
16708 putc ('q', file);
16709 break;
16710
16711 default:
16712 output_operand_lossage
16713 ("invalid operand size for operand code 'O'");
16714 return;
16715 }
16716
16717 putc ('.', file);
16718 #endif
16719 return;
16720
16721 case 'z':
16722 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
16723 {
16724 /* Opcodes don't get size suffixes if using Intel opcodes. */
16725 if (ASSEMBLER_DIALECT == ASM_INTEL)
16726 return;
16727
16728 switch (GET_MODE_SIZE (GET_MODE (x)))
16729 {
16730 case 1:
16731 putc ('b', file);
16732 return;
16733
16734 case 2:
16735 putc ('w', file);
16736 return;
16737
16738 case 4:
16739 putc ('l', file);
16740 return;
16741
16742 case 8:
16743 putc ('q', file);
16744 return;
16745
16746 default:
16747 output_operand_lossage
16748 ("invalid operand size for operand code 'z'");
16749 return;
16750 }
16751 }
16752
16753 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
16754 warning
16755 (0, "non-integer operand used with operand code 'z'");
16756 /* FALLTHRU */
16757
16758 case 'Z':
16759 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
16760 if (ASSEMBLER_DIALECT == ASM_INTEL)
16761 return;
16762
16763 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
16764 {
16765 switch (GET_MODE_SIZE (GET_MODE (x)))
16766 {
16767 case 2:
16768 #ifdef HAVE_AS_IX86_FILDS
16769 putc ('s', file);
16770 #endif
16771 return;
16772
16773 case 4:
16774 putc ('l', file);
16775 return;
16776
16777 case 8:
16778 #ifdef HAVE_AS_IX86_FILDQ
16779 putc ('q', file);
16780 #else
16781 fputs ("ll", file);
16782 #endif
16783 return;
16784
16785 default:
16786 break;
16787 }
16788 }
16789 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
16790 {
16791 /* 387 opcodes don't get size suffixes
16792 if the operands are registers. */
16793 if (STACK_REG_P (x))
16794 return;
16795
16796 switch (GET_MODE_SIZE (GET_MODE (x)))
16797 {
16798 case 4:
16799 putc ('s', file);
16800 return;
16801
16802 case 8:
16803 putc ('l', file);
16804 return;
16805
16806 case 12:
16807 case 16:
16808 putc ('t', file);
16809 return;
16810
16811 default:
16812 break;
16813 }
16814 }
16815 else
16816 {
16817 output_operand_lossage
16818 ("invalid operand type used with operand code 'Z'");
16819 return;
16820 }
16821
16822 output_operand_lossage
16823 ("invalid operand size for operand code 'Z'");
16824 return;
16825
16826 case 'd':
16827 case 'b':
16828 case 'w':
16829 case 'k':
16830 case 'q':
16831 case 'h':
16832 case 't':
16833 case 'g':
16834 case 'y':
16835 case 'x':
16836 case 'X':
16837 case 'P':
16838 case 'p':
16839 break;
16840
16841 case 's':
16842 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
16843 {
16844 ix86_print_operand (file, x, 0);
16845 fputs (", ", file);
16846 }
16847 return;
16848
16849 case 'Y':
16850 switch (GET_CODE (x))
16851 {
16852 case NE:
16853 fputs ("neq", file);
16854 break;
16855 case EQ:
16856 fputs ("eq", file);
16857 break;
16858 case GE:
16859 case GEU:
16860 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
16861 break;
16862 case GT:
16863 case GTU:
16864 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
16865 break;
16866 case LE:
16867 case LEU:
16868 fputs ("le", file);
16869 break;
16870 case LT:
16871 case LTU:
16872 fputs ("lt", file);
16873 break;
16874 case UNORDERED:
16875 fputs ("unord", file);
16876 break;
16877 case ORDERED:
16878 fputs ("ord", file);
16879 break;
16880 case UNEQ:
16881 fputs ("ueq", file);
16882 break;
16883 case UNGE:
16884 fputs ("nlt", file);
16885 break;
16886 case UNGT:
16887 fputs ("nle", file);
16888 break;
16889 case UNLE:
16890 fputs ("ule", file);
16891 break;
16892 case UNLT:
16893 fputs ("ult", file);
16894 break;
16895 case LTGT:
16896 fputs ("une", file);
16897 break;
16898 default:
16899 output_operand_lossage ("operand is not a condition code, "
16900 "invalid operand code 'Y'");
16901 return;
16902 }
16903 return;
16904
16905 case 'D':
16906 /* Little bit of braindamage here. The SSE compare instructions
16907 does use completely different names for the comparisons that the
16908 fp conditional moves. */
16909 switch (GET_CODE (x))
16910 {
16911 case UNEQ:
16912 if (TARGET_AVX)
16913 {
16914 fputs ("eq_us", file);
16915 break;
16916 }
16917 case EQ:
16918 fputs ("eq", file);
16919 break;
16920 case UNLT:
16921 if (TARGET_AVX)
16922 {
16923 fputs ("nge", file);
16924 break;
16925 }
16926 case LT:
16927 fputs ("lt", file);
16928 break;
16929 case UNLE:
16930 if (TARGET_AVX)
16931 {
16932 fputs ("ngt", file);
16933 break;
16934 }
16935 case LE:
16936 fputs ("le", file);
16937 break;
16938 case UNORDERED:
16939 fputs ("unord", file);
16940 break;
16941 case LTGT:
16942 if (TARGET_AVX)
16943 {
16944 fputs ("neq_oq", file);
16945 break;
16946 }
16947 case NE:
16948 fputs ("neq", file);
16949 break;
16950 case GE:
16951 if (TARGET_AVX)
16952 {
16953 fputs ("ge", file);
16954 break;
16955 }
16956 case UNGE:
16957 fputs ("nlt", file);
16958 break;
16959 case GT:
16960 if (TARGET_AVX)
16961 {
16962 fputs ("gt", file);
16963 break;
16964 }
16965 case UNGT:
16966 fputs ("nle", file);
16967 break;
16968 case ORDERED:
16969 fputs ("ord", file);
16970 break;
16971 default:
16972 output_operand_lossage ("operand is not a condition code, "
16973 "invalid operand code 'D'");
16974 return;
16975 }
16976 return;
16977
16978 case 'F':
16979 case 'f':
16980 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
16981 if (ASSEMBLER_DIALECT == ASM_ATT)
16982 putc ('.', file);
16983 #endif
16984
16985 case 'C':
16986 case 'c':
16987 if (!COMPARISON_P (x))
16988 {
16989 output_operand_lossage ("operand is not a condition code, "
16990 "invalid operand code '%c'", code);
16991 return;
16992 }
16993 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
16994 code == 'c' || code == 'f',
16995 code == 'F' || code == 'f',
16996 file);
16997 return;
16998
16999 case 'H':
17000 if (!offsettable_memref_p (x))
17001 {
17002 output_operand_lossage ("operand is not an offsettable memory "
17003 "reference, invalid operand code 'H'");
17004 return;
17005 }
17006 /* It doesn't actually matter what mode we use here, as we're
17007 only going to use this for printing. */
17008 x = adjust_address_nv (x, DImode, 8);
17009 /* Output 'qword ptr' for intel assembler dialect. */
17010 if (ASSEMBLER_DIALECT == ASM_INTEL)
17011 code = 'q';
17012 break;
17013
17014 case 'K':
17015 gcc_assert (CONST_INT_P (x));
17016
17017 if (INTVAL (x) & IX86_HLE_ACQUIRE)
17018 #ifdef HAVE_AS_IX86_HLE
17019 fputs ("xacquire ", file);
17020 #else
17021 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
17022 #endif
17023 else if (INTVAL (x) & IX86_HLE_RELEASE)
17024 #ifdef HAVE_AS_IX86_HLE
17025 fputs ("xrelease ", file);
17026 #else
17027 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
17028 #endif
17029 /* We do not want to print value of the operand. */
17030 return;
17031
17032 case 'N':
17033 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
17034 fputs ("{z}", file);
17035 return;
17036
17037 case 'r':
17038 gcc_assert (CONST_INT_P (x));
17039 gcc_assert (INTVAL (x) == ROUND_SAE);
17040
17041 if (ASSEMBLER_DIALECT == ASM_INTEL)
17042 fputs (", ", file);
17043
17044 fputs ("{sae}", file);
17045
17046 if (ASSEMBLER_DIALECT == ASM_ATT)
17047 fputs (", ", file);
17048
17049 return;
17050
17051 case 'R':
17052 gcc_assert (CONST_INT_P (x));
17053
17054 if (ASSEMBLER_DIALECT == ASM_INTEL)
17055 fputs (", ", file);
17056
17057 switch (INTVAL (x))
17058 {
17059 case ROUND_NEAREST_INT | ROUND_SAE:
17060 fputs ("{rn-sae}", file);
17061 break;
17062 case ROUND_NEG_INF | ROUND_SAE:
17063 fputs ("{rd-sae}", file);
17064 break;
17065 case ROUND_POS_INF | ROUND_SAE:
17066 fputs ("{ru-sae}", file);
17067 break;
17068 case ROUND_ZERO | ROUND_SAE:
17069 fputs ("{rz-sae}", file);
17070 break;
17071 default:
17072 gcc_unreachable ();
17073 }
17074
17075 if (ASSEMBLER_DIALECT == ASM_ATT)
17076 fputs (", ", file);
17077
17078 return;
17079
17080 case '*':
17081 if (ASSEMBLER_DIALECT == ASM_ATT)
17082 putc ('*', file);
17083 return;
17084
17085 case '&':
17086 {
17087 const char *name = get_some_local_dynamic_name ();
17088 if (name == NULL)
17089 output_operand_lossage ("'%%&' used without any "
17090 "local dynamic TLS references");
17091 else
17092 assemble_name (file, name);
17093 return;
17094 }
17095
17096 case '+':
17097 {
17098 rtx x;
17099
17100 if (!optimize
17101 || optimize_function_for_size_p (cfun)
17102 || !TARGET_BRANCH_PREDICTION_HINTS)
17103 return;
17104
17105 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
17106 if (x)
17107 {
17108 int pred_val = XINT (x, 0);
17109
17110 if (pred_val < REG_BR_PROB_BASE * 45 / 100
17111 || pred_val > REG_BR_PROB_BASE * 55 / 100)
17112 {
17113 bool taken = pred_val > REG_BR_PROB_BASE / 2;
17114 bool cputaken
17115 = final_forward_branch_p (current_output_insn) == 0;
17116
17117 /* Emit hints only in the case default branch prediction
17118 heuristics would fail. */
17119 if (taken != cputaken)
17120 {
17121 /* We use 3e (DS) prefix for taken branches and
17122 2e (CS) prefix for not taken branches. */
17123 if (taken)
17124 fputs ("ds ; ", file);
17125 else
17126 fputs ("cs ; ", file);
17127 }
17128 }
17129 }
17130 return;
17131 }
17132
17133 case ';':
17134 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
17135 putc (';', file);
17136 #endif
17137 return;
17138
17139 case '@':
17140 if (ASSEMBLER_DIALECT == ASM_ATT)
17141 putc ('%', file);
17142
17143 /* The kernel uses a different segment register for performance
17144 reasons; a system call would not have to trash the userspace
17145 segment register, which would be expensive. */
17146 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
17147 fputs ("fs", file);
17148 else
17149 fputs ("gs", file);
17150 return;
17151
17152 case '~':
17153 putc (TARGET_AVX2 ? 'i' : 'f', file);
17154 return;
17155
17156 case '^':
17157 if (TARGET_64BIT && Pmode != word_mode)
17158 fputs ("addr32 ", file);
17159 return;
17160
17161 case '!':
17162 if (ix86_bnd_prefixed_insn_p (current_output_insn))
17163 fputs ("bnd ", file);
17164 return;
17165
17166 default:
17167 output_operand_lossage ("invalid operand code '%c'", code);
17168 }
17169 }
17170
17171 if (REG_P (x))
17172 print_reg (x, code, file);
17173
17174 else if (MEM_P (x))
17175 {
17176 rtx addr = XEXP (x, 0);
17177
17178 /* No `byte ptr' prefix for call instructions ... */
17179 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
17180 {
17181 machine_mode mode = GET_MODE (x);
17182 const char *size;
17183
17184 /* Check for explicit size override codes. */
17185 if (code == 'b')
17186 size = "BYTE";
17187 else if (code == 'w')
17188 size = "WORD";
17189 else if (code == 'k')
17190 size = "DWORD";
17191 else if (code == 'q')
17192 size = "QWORD";
17193 else if (code == 'x')
17194 size = "XMMWORD";
17195 else if (mode == BLKmode)
17196 /* ... or BLKmode operands, when not overridden. */
17197 size = NULL;
17198 else
17199 switch (GET_MODE_SIZE (mode))
17200 {
17201 case 1: size = "BYTE"; break;
17202 case 2: size = "WORD"; break;
17203 case 4: size = "DWORD"; break;
17204 case 8: size = "QWORD"; break;
17205 case 12: size = "TBYTE"; break;
17206 case 16:
17207 if (mode == XFmode)
17208 size = "TBYTE";
17209 else
17210 size = "XMMWORD";
17211 break;
17212 case 32: size = "YMMWORD"; break;
17213 case 64: size = "ZMMWORD"; break;
17214 default:
17215 gcc_unreachable ();
17216 }
17217 if (size)
17218 {
17219 fputs (size, file);
17220 fputs (" PTR ", file);
17221 }
17222 }
17223
17224 if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
17225 output_operand_lossage ("invalid constraints for operand");
17226 else
17227 ix86_print_operand_address_as
17228 (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P');
17229 }
17230
17231 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
17232 {
17233 long l;
17234
17235 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
17236
17237 if (ASSEMBLER_DIALECT == ASM_ATT)
17238 putc ('$', file);
17239 /* Sign extend 32bit SFmode immediate to 8 bytes. */
17240 if (code == 'q')
17241 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
17242 (unsigned long long) (int) l);
17243 else
17244 fprintf (file, "0x%08x", (unsigned int) l);
17245 }
17246
17247 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
17248 {
17249 long l[2];
17250
17251 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
17252
17253 if (ASSEMBLER_DIALECT == ASM_ATT)
17254 putc ('$', file);
17255 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
17256 }
17257
17258 /* These float cases don't actually occur as immediate operands. */
17259 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
17260 {
17261 char dstr[30];
17262
17263 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
17264 fputs (dstr, file);
17265 }
17266
17267 else
17268 {
17269 /* We have patterns that allow zero sets of memory, for instance.
17270 In 64-bit mode, we should probably support all 8-byte vectors,
17271 since we can in fact encode that into an immediate. */
17272 if (GET_CODE (x) == CONST_VECTOR)
17273 {
17274 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
17275 x = const0_rtx;
17276 }
17277
17278 if (code != 'P' && code != 'p')
17279 {
17280 if (CONST_INT_P (x))
17281 {
17282 if (ASSEMBLER_DIALECT == ASM_ATT)
17283 putc ('$', file);
17284 }
17285 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
17286 || GET_CODE (x) == LABEL_REF)
17287 {
17288 if (ASSEMBLER_DIALECT == ASM_ATT)
17289 putc ('$', file);
17290 else
17291 fputs ("OFFSET FLAT:", file);
17292 }
17293 }
17294 if (CONST_INT_P (x))
17295 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
17296 else if (flag_pic || MACHOPIC_INDIRECT)
17297 output_pic_addr_const (file, x, code);
17298 else
17299 output_addr_const (file, x);
17300 }
17301 }
17302
17303 static bool
17304 ix86_print_operand_punct_valid_p (unsigned char code)
17305 {
17306 return (code == '@' || code == '*' || code == '+' || code == '&'
17307 || code == ';' || code == '~' || code == '^' || code == '!');
17308 }
17309 \f
17310 /* Print a memory operand whose address is ADDR. */
17311
17312 static void
17313 ix86_print_operand_address_as (FILE *file, rtx addr,
17314 addr_space_t as, bool no_rip)
17315 {
17316 struct ix86_address parts;
17317 rtx base, index, disp;
17318 int scale;
17319 int ok;
17320 bool vsib = false;
17321 int code = 0;
17322
17323 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
17324 {
17325 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
17326 gcc_assert (parts.index == NULL_RTX);
17327 parts.index = XVECEXP (addr, 0, 1);
17328 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
17329 addr = XVECEXP (addr, 0, 0);
17330 vsib = true;
17331 }
17332 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
17333 {
17334 gcc_assert (TARGET_64BIT);
17335 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
17336 code = 'q';
17337 }
17338 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR)
17339 {
17340 ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts);
17341 gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX);
17342 if (parts.base != NULL_RTX)
17343 {
17344 parts.index = parts.base;
17345 parts.scale = 1;
17346 }
17347 parts.base = XVECEXP (addr, 0, 0);
17348 addr = XVECEXP (addr, 0, 0);
17349 }
17350 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR)
17351 {
17352 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
17353 gcc_assert (parts.index == NULL_RTX);
17354 parts.index = XVECEXP (addr, 0, 1);
17355 addr = XVECEXP (addr, 0, 0);
17356 }
17357 else
17358 ok = ix86_decompose_address (addr, &parts);
17359
17360 gcc_assert (ok);
17361
17362 base = parts.base;
17363 index = parts.index;
17364 disp = parts.disp;
17365 scale = parts.scale;
17366
17367 if (ADDR_SPACE_GENERIC_P (as))
17368 as = parts.seg;
17369 else
17370 gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg));
17371
17372 if (!ADDR_SPACE_GENERIC_P (as))
17373 {
17374 const char *string;
17375
17376 if (as == ADDR_SPACE_SEG_TLS)
17377 as = DEFAULT_TLS_SEG_REG;
17378 if (as == ADDR_SPACE_SEG_FS)
17379 string = (ASSEMBLER_DIALECT == ASM_ATT ? "%fs:" : "fs:");
17380 else if (as == ADDR_SPACE_SEG_GS)
17381 string = (ASSEMBLER_DIALECT == ASM_ATT ? "%gs:" : "gs:");
17382 else
17383 gcc_unreachable ();
17384 fputs (string, file);
17385 }
17386
17387 /* Use one byte shorter RIP relative addressing for 64bit mode. */
17388 if (TARGET_64BIT && !base && !index && !no_rip)
17389 {
17390 rtx symbol = disp;
17391
17392 if (GET_CODE (disp) == CONST
17393 && GET_CODE (XEXP (disp, 0)) == PLUS
17394 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
17395 symbol = XEXP (XEXP (disp, 0), 0);
17396
17397 if (GET_CODE (symbol) == LABEL_REF
17398 || (GET_CODE (symbol) == SYMBOL_REF
17399 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
17400 base = pc_rtx;
17401 }
17402
17403 if (!base && !index)
17404 {
17405 /* Displacement only requires special attention. */
17406 if (CONST_INT_P (disp))
17407 {
17408 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == ADDR_SPACE_GENERIC)
17409 fputs ("ds:", file);
17410 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
17411 }
17412 else if (flag_pic)
17413 output_pic_addr_const (file, disp, 0);
17414 else
17415 output_addr_const (file, disp);
17416 }
17417 else
17418 {
17419 /* Print SImode register names to force addr32 prefix. */
17420 if (SImode_address_operand (addr, VOIDmode))
17421 {
17422 if (flag_checking)
17423 {
17424 gcc_assert (TARGET_64BIT);
17425 switch (GET_CODE (addr))
17426 {
17427 case SUBREG:
17428 gcc_assert (GET_MODE (addr) == SImode);
17429 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
17430 break;
17431 case ZERO_EXTEND:
17432 case AND:
17433 gcc_assert (GET_MODE (addr) == DImode);
17434 break;
17435 default:
17436 gcc_unreachable ();
17437 }
17438 }
17439 gcc_assert (!code);
17440 code = 'k';
17441 }
17442 else if (code == 0
17443 && TARGET_X32
17444 && disp
17445 && CONST_INT_P (disp)
17446 && INTVAL (disp) < -16*1024*1024)
17447 {
17448 /* X32 runs in 64-bit mode, where displacement, DISP, in
17449 address DISP(%r64), is encoded as 32-bit immediate sign-
17450 extended from 32-bit to 64-bit. For -0x40000300(%r64),
17451 address is %r64 + 0xffffffffbffffd00. When %r64 <
17452 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
17453 which is invalid for x32. The correct address is %r64
17454 - 0x40000300 == 0xf7ffdd64. To properly encode
17455 -0x40000300(%r64) for x32, we zero-extend negative
17456 displacement by forcing addr32 prefix which truncates
17457 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
17458 zero-extend all negative displacements, including -1(%rsp).
17459 However, for small negative displacements, sign-extension
17460 won't cause overflow. We only zero-extend negative
17461 displacements if they < -16*1024*1024, which is also used
17462 to check legitimate address displacements for PIC. */
17463 code = 'k';
17464 }
17465
17466 if (ASSEMBLER_DIALECT == ASM_ATT)
17467 {
17468 if (disp)
17469 {
17470 if (flag_pic)
17471 output_pic_addr_const (file, disp, 0);
17472 else if (GET_CODE (disp) == LABEL_REF)
17473 output_asm_label (disp);
17474 else
17475 output_addr_const (file, disp);
17476 }
17477
17478 putc ('(', file);
17479 if (base)
17480 print_reg (base, code, file);
17481 if (index)
17482 {
17483 putc (',', file);
17484 print_reg (index, vsib ? 0 : code, file);
17485 if (scale != 1 || vsib)
17486 fprintf (file, ",%d", scale);
17487 }
17488 putc (')', file);
17489 }
17490 else
17491 {
17492 rtx offset = NULL_RTX;
17493
17494 if (disp)
17495 {
17496 /* Pull out the offset of a symbol; print any symbol itself. */
17497 if (GET_CODE (disp) == CONST
17498 && GET_CODE (XEXP (disp, 0)) == PLUS
17499 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
17500 {
17501 offset = XEXP (XEXP (disp, 0), 1);
17502 disp = gen_rtx_CONST (VOIDmode,
17503 XEXP (XEXP (disp, 0), 0));
17504 }
17505
17506 if (flag_pic)
17507 output_pic_addr_const (file, disp, 0);
17508 else if (GET_CODE (disp) == LABEL_REF)
17509 output_asm_label (disp);
17510 else if (CONST_INT_P (disp))
17511 offset = disp;
17512 else
17513 output_addr_const (file, disp);
17514 }
17515
17516 putc ('[', file);
17517 if (base)
17518 {
17519 print_reg (base, code, file);
17520 if (offset)
17521 {
17522 if (INTVAL (offset) >= 0)
17523 putc ('+', file);
17524 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
17525 }
17526 }
17527 else if (offset)
17528 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
17529 else
17530 putc ('0', file);
17531
17532 if (index)
17533 {
17534 putc ('+', file);
17535 print_reg (index, vsib ? 0 : code, file);
17536 if (scale != 1 || vsib)
17537 fprintf (file, "*%d", scale);
17538 }
17539 putc (']', file);
17540 }
17541 }
17542 }
17543
17544 static void
17545 ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
17546 {
17547 ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, false);
17548 }
17549
17550 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
17551
17552 static bool
17553 i386_asm_output_addr_const_extra (FILE *file, rtx x)
17554 {
17555 rtx op;
17556
17557 if (GET_CODE (x) != UNSPEC)
17558 return false;
17559
17560 op = XVECEXP (x, 0, 0);
17561 switch (XINT (x, 1))
17562 {
17563 case UNSPEC_GOTTPOFF:
17564 output_addr_const (file, op);
17565 /* FIXME: This might be @TPOFF in Sun ld. */
17566 fputs ("@gottpoff", file);
17567 break;
17568 case UNSPEC_TPOFF:
17569 output_addr_const (file, op);
17570 fputs ("@tpoff", file);
17571 break;
17572 case UNSPEC_NTPOFF:
17573 output_addr_const (file, op);
17574 if (TARGET_64BIT)
17575 fputs ("@tpoff", file);
17576 else
17577 fputs ("@ntpoff", file);
17578 break;
17579 case UNSPEC_DTPOFF:
17580 output_addr_const (file, op);
17581 fputs ("@dtpoff", file);
17582 break;
17583 case UNSPEC_GOTNTPOFF:
17584 output_addr_const (file, op);
17585 if (TARGET_64BIT)
17586 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
17587 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
17588 else
17589 fputs ("@gotntpoff", file);
17590 break;
17591 case UNSPEC_INDNTPOFF:
17592 output_addr_const (file, op);
17593 fputs ("@indntpoff", file);
17594 break;
17595 #if TARGET_MACHO
17596 case UNSPEC_MACHOPIC_OFFSET:
17597 output_addr_const (file, op);
17598 putc ('-', file);
17599 machopic_output_function_base_name (file);
17600 break;
17601 #endif
17602
17603 case UNSPEC_STACK_CHECK:
17604 {
17605 int offset;
17606
17607 gcc_assert (flag_split_stack);
17608
17609 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
17610 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
17611 #else
17612 gcc_unreachable ();
17613 #endif
17614
17615 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
17616 }
17617 break;
17618
17619 default:
17620 return false;
17621 }
17622
17623 return true;
17624 }
17625 \f
17626 /* Split one or more double-mode RTL references into pairs of half-mode
17627 references. The RTL can be REG, offsettable MEM, integer constant, or
17628 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
17629 split and "num" is its length. lo_half and hi_half are output arrays
17630 that parallel "operands". */
17631
17632 void
17633 split_double_mode (machine_mode mode, rtx operands[],
17634 int num, rtx lo_half[], rtx hi_half[])
17635 {
17636 machine_mode half_mode;
17637 unsigned int byte;
17638
17639 switch (mode)
17640 {
17641 case TImode:
17642 half_mode = DImode;
17643 break;
17644 case DImode:
17645 half_mode = SImode;
17646 break;
17647 default:
17648 gcc_unreachable ();
17649 }
17650
17651 byte = GET_MODE_SIZE (half_mode);
17652
17653 while (num--)
17654 {
17655 rtx op = operands[num];
17656
17657 /* simplify_subreg refuse to split volatile memory addresses,
17658 but we still have to handle it. */
17659 if (MEM_P (op))
17660 {
17661 lo_half[num] = adjust_address (op, half_mode, 0);
17662 hi_half[num] = adjust_address (op, half_mode, byte);
17663 }
17664 else
17665 {
17666 lo_half[num] = simplify_gen_subreg (half_mode, op,
17667 GET_MODE (op) == VOIDmode
17668 ? mode : GET_MODE (op), 0);
17669 hi_half[num] = simplify_gen_subreg (half_mode, op,
17670 GET_MODE (op) == VOIDmode
17671 ? mode : GET_MODE (op), byte);
17672 }
17673 }
17674 }
17675 \f
17676 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
17677 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
17678 is the expression of the binary operation. The output may either be
17679 emitted here, or returned to the caller, like all output_* functions.
17680
17681 There is no guarantee that the operands are the same mode, as they
17682 might be within FLOAT or FLOAT_EXTEND expressions. */
17683
17684 #ifndef SYSV386_COMPAT
17685 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
17686 wants to fix the assemblers because that causes incompatibility
17687 with gcc. No-one wants to fix gcc because that causes
17688 incompatibility with assemblers... You can use the option of
17689 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
17690 #define SYSV386_COMPAT 1
17691 #endif
17692
17693 const char *
17694 output_387_binary_op (rtx insn, rtx *operands)
17695 {
17696 static char buf[40];
17697 const char *p;
17698 const char *ssep;
17699 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
17700
17701 /* Even if we do not want to check the inputs, this documents input
17702 constraints. Which helps in understanding the following code. */
17703 if (flag_checking)
17704 {
17705 if (STACK_REG_P (operands[0])
17706 && ((REG_P (operands[1])
17707 && REGNO (operands[0]) == REGNO (operands[1])
17708 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
17709 || (REG_P (operands[2])
17710 && REGNO (operands[0]) == REGNO (operands[2])
17711 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
17712 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
17713 ; /* ok */
17714 else
17715 gcc_assert (is_sse);
17716 }
17717
17718 switch (GET_CODE (operands[3]))
17719 {
17720 case PLUS:
17721 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
17722 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
17723 p = "fiadd";
17724 else
17725 p = "fadd";
17726 ssep = "vadd";
17727 break;
17728
17729 case MINUS:
17730 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
17731 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
17732 p = "fisub";
17733 else
17734 p = "fsub";
17735 ssep = "vsub";
17736 break;
17737
17738 case MULT:
17739 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
17740 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
17741 p = "fimul";
17742 else
17743 p = "fmul";
17744 ssep = "vmul";
17745 break;
17746
17747 case DIV:
17748 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
17749 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
17750 p = "fidiv";
17751 else
17752 p = "fdiv";
17753 ssep = "vdiv";
17754 break;
17755
17756 default:
17757 gcc_unreachable ();
17758 }
17759
17760 if (is_sse)
17761 {
17762 if (TARGET_AVX)
17763 {
17764 strcpy (buf, ssep);
17765 if (GET_MODE (operands[0]) == SFmode)
17766 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
17767 else
17768 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
17769 }
17770 else
17771 {
17772 strcpy (buf, ssep + 1);
17773 if (GET_MODE (operands[0]) == SFmode)
17774 strcat (buf, "ss\t{%2, %0|%0, %2}");
17775 else
17776 strcat (buf, "sd\t{%2, %0|%0, %2}");
17777 }
17778 return buf;
17779 }
17780 strcpy (buf, p);
17781
17782 switch (GET_CODE (operands[3]))
17783 {
17784 case MULT:
17785 case PLUS:
17786 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
17787 std::swap (operands[1], operands[2]);
17788
17789 /* know operands[0] == operands[1]. */
17790
17791 if (MEM_P (operands[2]))
17792 {
17793 p = "%Z2\t%2";
17794 break;
17795 }
17796
17797 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
17798 {
17799 if (STACK_TOP_P (operands[0]))
17800 /* How is it that we are storing to a dead operand[2]?
17801 Well, presumably operands[1] is dead too. We can't
17802 store the result to st(0) as st(0) gets popped on this
17803 instruction. Instead store to operands[2] (which I
17804 think has to be st(1)). st(1) will be popped later.
17805 gcc <= 2.8.1 didn't have this check and generated
17806 assembly code that the Unixware assembler rejected. */
17807 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
17808 else
17809 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
17810 break;
17811 }
17812
17813 if (STACK_TOP_P (operands[0]))
17814 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
17815 else
17816 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
17817 break;
17818
17819 case MINUS:
17820 case DIV:
17821 if (MEM_P (operands[1]))
17822 {
17823 p = "r%Z1\t%1";
17824 break;
17825 }
17826
17827 if (MEM_P (operands[2]))
17828 {
17829 p = "%Z2\t%2";
17830 break;
17831 }
17832
17833 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
17834 {
17835 #if SYSV386_COMPAT
17836 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
17837 derived assemblers, confusingly reverse the direction of
17838 the operation for fsub{r} and fdiv{r} when the
17839 destination register is not st(0). The Intel assembler
17840 doesn't have this brain damage. Read !SYSV386_COMPAT to
17841 figure out what the hardware really does. */
17842 if (STACK_TOP_P (operands[0]))
17843 p = "{p\t%0, %2|rp\t%2, %0}";
17844 else
17845 p = "{rp\t%2, %0|p\t%0, %2}";
17846 #else
17847 if (STACK_TOP_P (operands[0]))
17848 /* As above for fmul/fadd, we can't store to st(0). */
17849 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
17850 else
17851 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
17852 #endif
17853 break;
17854 }
17855
17856 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
17857 {
17858 #if SYSV386_COMPAT
17859 if (STACK_TOP_P (operands[0]))
17860 p = "{rp\t%0, %1|p\t%1, %0}";
17861 else
17862 p = "{p\t%1, %0|rp\t%0, %1}";
17863 #else
17864 if (STACK_TOP_P (operands[0]))
17865 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
17866 else
17867 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
17868 #endif
17869 break;
17870 }
17871
17872 if (STACK_TOP_P (operands[0]))
17873 {
17874 if (STACK_TOP_P (operands[1]))
17875 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
17876 else
17877 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
17878 break;
17879 }
17880 else if (STACK_TOP_P (operands[1]))
17881 {
17882 #if SYSV386_COMPAT
17883 p = "{\t%1, %0|r\t%0, %1}";
17884 #else
17885 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
17886 #endif
17887 }
17888 else
17889 {
17890 #if SYSV386_COMPAT
17891 p = "{r\t%2, %0|\t%0, %2}";
17892 #else
17893 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
17894 #endif
17895 }
17896 break;
17897
17898 default:
17899 gcc_unreachable ();
17900 }
17901
17902 strcat (buf, p);
17903 return buf;
17904 }
17905
17906 /* Check if a 256bit AVX register is referenced inside of EXP. */
17907
17908 static bool
17909 ix86_check_avx256_register (const_rtx exp)
17910 {
17911 if (SUBREG_P (exp))
17912 exp = SUBREG_REG (exp);
17913
17914 return (REG_P (exp)
17915 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)));
17916 }
17917
17918 /* Return needed mode for entity in optimize_mode_switching pass. */
17919
17920 static int
17921 ix86_avx_u128_mode_needed (rtx_insn *insn)
17922 {
17923 if (CALL_P (insn))
17924 {
17925 rtx link;
17926
17927 /* Needed mode is set to AVX_U128_CLEAN if there are
17928 no 256bit modes used in function arguments. */
17929 for (link = CALL_INSN_FUNCTION_USAGE (insn);
17930 link;
17931 link = XEXP (link, 1))
17932 {
17933 if (GET_CODE (XEXP (link, 0)) == USE)
17934 {
17935 rtx arg = XEXP (XEXP (link, 0), 0);
17936
17937 if (ix86_check_avx256_register (arg))
17938 return AVX_U128_DIRTY;
17939 }
17940 }
17941
17942 return AVX_U128_CLEAN;
17943 }
17944
17945 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
17946 changes state only when a 256bit register is written to, but we need
17947 to prevent the compiler from moving optimal insertion point above
17948 eventual read from 256bit register. */
17949 subrtx_iterator::array_type array;
17950 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
17951 if (ix86_check_avx256_register (*iter))
17952 return AVX_U128_DIRTY;
17953
17954 return AVX_U128_ANY;
17955 }
17956
17957 /* Return mode that i387 must be switched into
17958 prior to the execution of insn. */
17959
17960 static int
17961 ix86_i387_mode_needed (int entity, rtx_insn *insn)
17962 {
17963 enum attr_i387_cw mode;
17964
17965 /* The mode UNINITIALIZED is used to store control word after a
17966 function call or ASM pattern. The mode ANY specify that function
17967 has no requirements on the control word and make no changes in the
17968 bits we are interested in. */
17969
17970 if (CALL_P (insn)
17971 || (NONJUMP_INSN_P (insn)
17972 && (asm_noperands (PATTERN (insn)) >= 0
17973 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
17974 return I387_CW_UNINITIALIZED;
17975
17976 if (recog_memoized (insn) < 0)
17977 return I387_CW_ANY;
17978
17979 mode = get_attr_i387_cw (insn);
17980
17981 switch (entity)
17982 {
17983 case I387_TRUNC:
17984 if (mode == I387_CW_TRUNC)
17985 return mode;
17986 break;
17987
17988 case I387_FLOOR:
17989 if (mode == I387_CW_FLOOR)
17990 return mode;
17991 break;
17992
17993 case I387_CEIL:
17994 if (mode == I387_CW_CEIL)
17995 return mode;
17996 break;
17997
17998 case I387_MASK_PM:
17999 if (mode == I387_CW_MASK_PM)
18000 return mode;
18001 break;
18002
18003 default:
18004 gcc_unreachable ();
18005 }
18006
18007 return I387_CW_ANY;
18008 }
18009
18010 /* Return mode that entity must be switched into
18011 prior to the execution of insn. */
18012
18013 static int
18014 ix86_mode_needed (int entity, rtx_insn *insn)
18015 {
18016 switch (entity)
18017 {
18018 case AVX_U128:
18019 return ix86_avx_u128_mode_needed (insn);
18020 case I387_TRUNC:
18021 case I387_FLOOR:
18022 case I387_CEIL:
18023 case I387_MASK_PM:
18024 return ix86_i387_mode_needed (entity, insn);
18025 default:
18026 gcc_unreachable ();
18027 }
18028 return 0;
18029 }
18030
18031 /* Check if a 256bit AVX register is referenced in stores. */
18032
18033 static void
18034 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
18035 {
18036 if (ix86_check_avx256_register (dest))
18037 {
18038 bool *used = (bool *) data;
18039 *used = true;
18040 }
18041 }
18042
18043 /* Calculate mode of upper 128bit AVX registers after the insn. */
18044
18045 static int
18046 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
18047 {
18048 rtx pat = PATTERN (insn);
18049
18050 if (vzeroupper_operation (pat, VOIDmode)
18051 || vzeroall_operation (pat, VOIDmode))
18052 return AVX_U128_CLEAN;
18053
18054 /* We know that state is clean after CALL insn if there are no
18055 256bit registers used in the function return register. */
18056 if (CALL_P (insn))
18057 {
18058 bool avx_reg256_found = false;
18059 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
18060
18061 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
18062 }
18063
18064 /* Otherwise, return current mode. Remember that if insn
18065 references AVX 256bit registers, the mode was already changed
18066 to DIRTY from MODE_NEEDED. */
18067 return mode;
18068 }
18069
18070 /* Return the mode that an insn results in. */
18071
18072 static int
18073 ix86_mode_after (int entity, int mode, rtx_insn *insn)
18074 {
18075 switch (entity)
18076 {
18077 case AVX_U128:
18078 return ix86_avx_u128_mode_after (mode, insn);
18079 case I387_TRUNC:
18080 case I387_FLOOR:
18081 case I387_CEIL:
18082 case I387_MASK_PM:
18083 return mode;
18084 default:
18085 gcc_unreachable ();
18086 }
18087 }
18088
18089 static int
18090 ix86_avx_u128_mode_entry (void)
18091 {
18092 tree arg;
18093
18094 /* Entry mode is set to AVX_U128_DIRTY if there are
18095 256bit modes used in function arguments. */
18096 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
18097 arg = TREE_CHAIN (arg))
18098 {
18099 rtx incoming = DECL_INCOMING_RTL (arg);
18100
18101 if (incoming && ix86_check_avx256_register (incoming))
18102 return AVX_U128_DIRTY;
18103 }
18104
18105 return AVX_U128_CLEAN;
18106 }
18107
18108 /* Return a mode that ENTITY is assumed to be
18109 switched to at function entry. */
18110
18111 static int
18112 ix86_mode_entry (int entity)
18113 {
18114 switch (entity)
18115 {
18116 case AVX_U128:
18117 return ix86_avx_u128_mode_entry ();
18118 case I387_TRUNC:
18119 case I387_FLOOR:
18120 case I387_CEIL:
18121 case I387_MASK_PM:
18122 return I387_CW_ANY;
18123 default:
18124 gcc_unreachable ();
18125 }
18126 }
18127
18128 static int
18129 ix86_avx_u128_mode_exit (void)
18130 {
18131 rtx reg = crtl->return_rtx;
18132
18133 /* Exit mode is set to AVX_U128_DIRTY if there are
18134 256bit modes used in the function return register. */
18135 if (reg && ix86_check_avx256_register (reg))
18136 return AVX_U128_DIRTY;
18137
18138 return AVX_U128_CLEAN;
18139 }
18140
18141 /* Return a mode that ENTITY is assumed to be
18142 switched to at function exit. */
18143
18144 static int
18145 ix86_mode_exit (int entity)
18146 {
18147 switch (entity)
18148 {
18149 case AVX_U128:
18150 return ix86_avx_u128_mode_exit ();
18151 case I387_TRUNC:
18152 case I387_FLOOR:
18153 case I387_CEIL:
18154 case I387_MASK_PM:
18155 return I387_CW_ANY;
18156 default:
18157 gcc_unreachable ();
18158 }
18159 }
18160
18161 static int
18162 ix86_mode_priority (int, int n)
18163 {
18164 return n;
18165 }
18166
18167 /* Output code to initialize control word copies used by trunc?f?i and
18168 rounding patterns. CURRENT_MODE is set to current control word,
18169 while NEW_MODE is set to new control word. */
18170
18171 static void
18172 emit_i387_cw_initialization (int mode)
18173 {
18174 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
18175 rtx new_mode;
18176
18177 enum ix86_stack_slot slot;
18178
18179 rtx reg = gen_reg_rtx (HImode);
18180
18181 emit_insn (gen_x86_fnstcw_1 (stored_mode));
18182 emit_move_insn (reg, copy_rtx (stored_mode));
18183
18184 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
18185 || optimize_insn_for_size_p ())
18186 {
18187 switch (mode)
18188 {
18189 case I387_CW_TRUNC:
18190 /* round toward zero (truncate) */
18191 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
18192 slot = SLOT_CW_TRUNC;
18193 break;
18194
18195 case I387_CW_FLOOR:
18196 /* round down toward -oo */
18197 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
18198 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
18199 slot = SLOT_CW_FLOOR;
18200 break;
18201
18202 case I387_CW_CEIL:
18203 /* round up toward +oo */
18204 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
18205 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
18206 slot = SLOT_CW_CEIL;
18207 break;
18208
18209 case I387_CW_MASK_PM:
18210 /* mask precision exception for nearbyint() */
18211 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
18212 slot = SLOT_CW_MASK_PM;
18213 break;
18214
18215 default:
18216 gcc_unreachable ();
18217 }
18218 }
18219 else
18220 {
18221 switch (mode)
18222 {
18223 case I387_CW_TRUNC:
18224 /* round toward zero (truncate) */
18225 emit_insn (gen_insvsi_1 (reg, GEN_INT (0xc)));
18226 slot = SLOT_CW_TRUNC;
18227 break;
18228
18229 case I387_CW_FLOOR:
18230 /* round down toward -oo */
18231 emit_insn (gen_insvsi_1 (reg, GEN_INT (0x4)));
18232 slot = SLOT_CW_FLOOR;
18233 break;
18234
18235 case I387_CW_CEIL:
18236 /* round up toward +oo */
18237 emit_insn (gen_insvsi_1 (reg, GEN_INT (0x8)));
18238 slot = SLOT_CW_CEIL;
18239 break;
18240
18241 case I387_CW_MASK_PM:
18242 /* mask precision exception for nearbyint() */
18243 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
18244 slot = SLOT_CW_MASK_PM;
18245 break;
18246
18247 default:
18248 gcc_unreachable ();
18249 }
18250 }
18251
18252 gcc_assert (slot < MAX_386_STACK_LOCALS);
18253
18254 new_mode = assign_386_stack_local (HImode, slot);
18255 emit_move_insn (new_mode, reg);
18256 }
18257
18258 /* Emit vzeroupper. */
18259
18260 void
18261 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
18262 {
18263 int i;
18264
18265 /* Cancel automatic vzeroupper insertion if there are
18266 live call-saved SSE registers at the insertion point. */
18267
18268 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
18269 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
18270 return;
18271
18272 if (TARGET_64BIT)
18273 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
18274 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
18275 return;
18276
18277 emit_insn (gen_avx_vzeroupper ());
18278 }
18279
18280 /* Generate one or more insns to set ENTITY to MODE. */
18281
18282 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
18283 is the set of hard registers live at the point where the insn(s)
18284 are to be inserted. */
18285
18286 static void
18287 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
18288 HARD_REG_SET regs_live)
18289 {
18290 switch (entity)
18291 {
18292 case AVX_U128:
18293 if (mode == AVX_U128_CLEAN)
18294 ix86_avx_emit_vzeroupper (regs_live);
18295 break;
18296 case I387_TRUNC:
18297 case I387_FLOOR:
18298 case I387_CEIL:
18299 case I387_MASK_PM:
18300 if (mode != I387_CW_ANY
18301 && mode != I387_CW_UNINITIALIZED)
18302 emit_i387_cw_initialization (mode);
18303 break;
18304 default:
18305 gcc_unreachable ();
18306 }
18307 }
18308
18309 /* Output code for INSN to convert a float to a signed int. OPERANDS
18310 are the insn operands. The output may be [HSD]Imode and the input
18311 operand may be [SDX]Fmode. */
18312
18313 const char *
18314 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
18315 {
18316 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
18317 int dimode_p = GET_MODE (operands[0]) == DImode;
18318 int round_mode = get_attr_i387_cw (insn);
18319
18320 /* Jump through a hoop or two for DImode, since the hardware has no
18321 non-popping instruction. We used to do this a different way, but
18322 that was somewhat fragile and broke with post-reload splitters. */
18323 if ((dimode_p || fisttp) && !stack_top_dies)
18324 output_asm_insn ("fld\t%y1", operands);
18325
18326 gcc_assert (STACK_TOP_P (operands[1]));
18327 gcc_assert (MEM_P (operands[0]));
18328 gcc_assert (GET_MODE (operands[1]) != TFmode);
18329
18330 if (fisttp)
18331 output_asm_insn ("fisttp%Z0\t%0", operands);
18332 else
18333 {
18334 if (round_mode != I387_CW_ANY)
18335 output_asm_insn ("fldcw\t%3", operands);
18336 if (stack_top_dies || dimode_p)
18337 output_asm_insn ("fistp%Z0\t%0", operands);
18338 else
18339 output_asm_insn ("fist%Z0\t%0", operands);
18340 if (round_mode != I387_CW_ANY)
18341 output_asm_insn ("fldcw\t%2", operands);
18342 }
18343
18344 return "";
18345 }
18346
18347 /* Output code for x87 ffreep insn. The OPNO argument, which may only
18348 have the values zero or one, indicates the ffreep insn's operand
18349 from the OPERANDS array. */
18350
18351 static const char *
18352 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
18353 {
18354 if (TARGET_USE_FFREEP)
18355 #ifdef HAVE_AS_IX86_FFREEP
18356 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
18357 #else
18358 {
18359 static char retval[32];
18360 int regno = REGNO (operands[opno]);
18361
18362 gcc_assert (STACK_REGNO_P (regno));
18363
18364 regno -= FIRST_STACK_REG;
18365
18366 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
18367 return retval;
18368 }
18369 #endif
18370
18371 return opno ? "fstp\t%y1" : "fstp\t%y0";
18372 }
18373
18374
18375 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
18376 should be used. UNORDERED_P is true when fucom should be used. */
18377
18378 const char *
18379 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
18380 {
18381 int stack_top_dies;
18382 rtx cmp_op0, cmp_op1;
18383 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
18384
18385 if (eflags_p)
18386 {
18387 cmp_op0 = operands[0];
18388 cmp_op1 = operands[1];
18389 }
18390 else
18391 {
18392 cmp_op0 = operands[1];
18393 cmp_op1 = operands[2];
18394 }
18395
18396 if (is_sse)
18397 {
18398 if (GET_MODE (operands[0]) == SFmode)
18399 if (unordered_p)
18400 return "%vucomiss\t{%1, %0|%0, %1}";
18401 else
18402 return "%vcomiss\t{%1, %0|%0, %1}";
18403 else
18404 if (unordered_p)
18405 return "%vucomisd\t{%1, %0|%0, %1}";
18406 else
18407 return "%vcomisd\t{%1, %0|%0, %1}";
18408 }
18409
18410 gcc_assert (STACK_TOP_P (cmp_op0));
18411
18412 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
18413
18414 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
18415 {
18416 if (stack_top_dies)
18417 {
18418 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
18419 return output_387_ffreep (operands, 1);
18420 }
18421 else
18422 return "ftst\n\tfnstsw\t%0";
18423 }
18424
18425 if (STACK_REG_P (cmp_op1)
18426 && stack_top_dies
18427 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
18428 && REGNO (cmp_op1) != FIRST_STACK_REG)
18429 {
18430 /* If both the top of the 387 stack dies, and the other operand
18431 is also a stack register that dies, then this must be a
18432 `fcompp' float compare */
18433
18434 if (eflags_p)
18435 {
18436 /* There is no double popping fcomi variant. Fortunately,
18437 eflags is immune from the fstp's cc clobbering. */
18438 if (unordered_p)
18439 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
18440 else
18441 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
18442 return output_387_ffreep (operands, 0);
18443 }
18444 else
18445 {
18446 if (unordered_p)
18447 return "fucompp\n\tfnstsw\t%0";
18448 else
18449 return "fcompp\n\tfnstsw\t%0";
18450 }
18451 }
18452 else
18453 {
18454 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
18455
18456 static const char * const alt[16] =
18457 {
18458 "fcom%Z2\t%y2\n\tfnstsw\t%0",
18459 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
18460 "fucom%Z2\t%y2\n\tfnstsw\t%0",
18461 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
18462
18463 "ficom%Z2\t%y2\n\tfnstsw\t%0",
18464 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
18465 NULL,
18466 NULL,
18467
18468 "fcomi\t{%y1, %0|%0, %y1}",
18469 "fcomip\t{%y1, %0|%0, %y1}",
18470 "fucomi\t{%y1, %0|%0, %y1}",
18471 "fucomip\t{%y1, %0|%0, %y1}",
18472
18473 NULL,
18474 NULL,
18475 NULL,
18476 NULL
18477 };
18478
18479 int mask;
18480 const char *ret;
18481
18482 mask = eflags_p << 3;
18483 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
18484 mask |= unordered_p << 1;
18485 mask |= stack_top_dies;
18486
18487 gcc_assert (mask < 16);
18488 ret = alt[mask];
18489 gcc_assert (ret);
18490
18491 return ret;
18492 }
18493 }
18494
18495 void
18496 ix86_output_addr_vec_elt (FILE *file, int value)
18497 {
18498 const char *directive = ASM_LONG;
18499
18500 #ifdef ASM_QUAD
18501 if (TARGET_LP64)
18502 directive = ASM_QUAD;
18503 #else
18504 gcc_assert (!TARGET_64BIT);
18505 #endif
18506
18507 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
18508 }
18509
18510 void
18511 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
18512 {
18513 const char *directive = ASM_LONG;
18514
18515 #ifdef ASM_QUAD
18516 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
18517 directive = ASM_QUAD;
18518 #else
18519 gcc_assert (!TARGET_64BIT);
18520 #endif
18521 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
18522 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
18523 fprintf (file, "%s%s%d-%s%d\n",
18524 directive, LPREFIX, value, LPREFIX, rel);
18525 else if (HAVE_AS_GOTOFF_IN_DATA)
18526 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
18527 #if TARGET_MACHO
18528 else if (TARGET_MACHO)
18529 {
18530 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
18531 machopic_output_function_base_name (file);
18532 putc ('\n', file);
18533 }
18534 #endif
18535 else
18536 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
18537 GOT_SYMBOL_NAME, LPREFIX, value);
18538 }
18539 \f
18540 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
18541 for the target. */
18542
18543 void
18544 ix86_expand_clear (rtx dest)
18545 {
18546 rtx tmp;
18547
18548 /* We play register width games, which are only valid after reload. */
18549 gcc_assert (reload_completed);
18550
18551 /* Avoid HImode and its attendant prefix byte. */
18552 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
18553 dest = gen_rtx_REG (SImode, REGNO (dest));
18554 tmp = gen_rtx_SET (dest, const0_rtx);
18555
18556 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
18557 {
18558 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18559 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
18560 }
18561
18562 emit_insn (tmp);
18563 }
18564
18565 /* X is an unchanging MEM. If it is a constant pool reference, return
18566 the constant pool rtx, else NULL. */
18567
18568 rtx
18569 maybe_get_pool_constant (rtx x)
18570 {
18571 x = ix86_delegitimize_address (XEXP (x, 0));
18572
18573 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
18574 return get_pool_constant (x);
18575
18576 return NULL_RTX;
18577 }
18578
18579 void
18580 ix86_expand_move (machine_mode mode, rtx operands[])
18581 {
18582 rtx op0, op1;
18583 enum tls_model model;
18584
18585 op0 = operands[0];
18586 op1 = operands[1];
18587
18588 if (GET_CODE (op1) == SYMBOL_REF)
18589 {
18590 rtx tmp;
18591
18592 model = SYMBOL_REF_TLS_MODEL (op1);
18593 if (model)
18594 {
18595 op1 = legitimize_tls_address (op1, model, true);
18596 op1 = force_operand (op1, op0);
18597 if (op1 == op0)
18598 return;
18599 op1 = convert_to_mode (mode, op1, 1);
18600 }
18601 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
18602 op1 = tmp;
18603 }
18604 else if (GET_CODE (op1) == CONST
18605 && GET_CODE (XEXP (op1, 0)) == PLUS
18606 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
18607 {
18608 rtx addend = XEXP (XEXP (op1, 0), 1);
18609 rtx symbol = XEXP (XEXP (op1, 0), 0);
18610 rtx tmp;
18611
18612 model = SYMBOL_REF_TLS_MODEL (symbol);
18613 if (model)
18614 tmp = legitimize_tls_address (symbol, model, true);
18615 else
18616 tmp = legitimize_pe_coff_symbol (symbol, true);
18617
18618 if (tmp)
18619 {
18620 tmp = force_operand (tmp, NULL);
18621 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
18622 op0, 1, OPTAB_DIRECT);
18623 if (tmp == op0)
18624 return;
18625 op1 = convert_to_mode (mode, tmp, 1);
18626 }
18627 }
18628
18629 if ((flag_pic || MACHOPIC_INDIRECT)
18630 && symbolic_operand (op1, mode))
18631 {
18632 if (TARGET_MACHO && !TARGET_64BIT)
18633 {
18634 #if TARGET_MACHO
18635 /* dynamic-no-pic */
18636 if (MACHOPIC_INDIRECT)
18637 {
18638 rtx temp = (op0 && REG_P (op0) && mode == Pmode)
18639 ? op0 : gen_reg_rtx (Pmode);
18640 op1 = machopic_indirect_data_reference (op1, temp);
18641 if (MACHOPIC_PURE)
18642 op1 = machopic_legitimize_pic_address (op1, mode,
18643 temp == op1 ? 0 : temp);
18644 }
18645 if (op0 != op1 && GET_CODE (op0) != MEM)
18646 {
18647 rtx insn = gen_rtx_SET (op0, op1);
18648 emit_insn (insn);
18649 return;
18650 }
18651 if (GET_CODE (op0) == MEM)
18652 op1 = force_reg (Pmode, op1);
18653 else
18654 {
18655 rtx temp = op0;
18656 if (GET_CODE (temp) != REG)
18657 temp = gen_reg_rtx (Pmode);
18658 temp = legitimize_pic_address (op1, temp);
18659 if (temp == op0)
18660 return;
18661 op1 = temp;
18662 }
18663 /* dynamic-no-pic */
18664 #endif
18665 }
18666 else
18667 {
18668 if (MEM_P (op0))
18669 op1 = force_reg (mode, op1);
18670 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
18671 {
18672 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
18673 op1 = legitimize_pic_address (op1, reg);
18674 if (op0 == op1)
18675 return;
18676 op1 = convert_to_mode (mode, op1, 1);
18677 }
18678 }
18679 }
18680 else
18681 {
18682 if (MEM_P (op0)
18683 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
18684 || !push_operand (op0, mode))
18685 && MEM_P (op1))
18686 op1 = force_reg (mode, op1);
18687
18688 if (push_operand (op0, mode)
18689 && ! general_no_elim_operand (op1, mode))
18690 op1 = copy_to_mode_reg (mode, op1);
18691
18692 /* Force large constants in 64bit compilation into register
18693 to get them CSEed. */
18694 if (can_create_pseudo_p ()
18695 && (mode == DImode) && TARGET_64BIT
18696 && immediate_operand (op1, mode)
18697 && !x86_64_zext_immediate_operand (op1, VOIDmode)
18698 && !register_operand (op0, mode)
18699 && optimize)
18700 op1 = copy_to_mode_reg (mode, op1);
18701
18702 if (can_create_pseudo_p ()
18703 && CONST_DOUBLE_P (op1))
18704 {
18705 /* If we are loading a floating point constant to a register,
18706 force the value to memory now, since we'll get better code
18707 out the back end. */
18708
18709 op1 = validize_mem (force_const_mem (mode, op1));
18710 if (!register_operand (op0, mode))
18711 {
18712 rtx temp = gen_reg_rtx (mode);
18713 emit_insn (gen_rtx_SET (temp, op1));
18714 emit_move_insn (op0, temp);
18715 return;
18716 }
18717 }
18718 }
18719
18720 emit_insn (gen_rtx_SET (op0, op1));
18721 }
18722
18723 void
18724 ix86_expand_vector_move (machine_mode mode, rtx operands[])
18725 {
18726 rtx op0 = operands[0], op1 = operands[1];
18727 /* Use GET_MODE_BITSIZE instead of GET_MODE_ALIGNMENT for IA MCU
18728 psABI since the biggest alignment is 4 byte for IA MCU psABI. */
18729 unsigned int align = (TARGET_IAMCU
18730 ? GET_MODE_BITSIZE (mode)
18731 : GET_MODE_ALIGNMENT (mode));
18732
18733 if (push_operand (op0, VOIDmode))
18734 op0 = emit_move_resolve_push (mode, op0);
18735
18736 /* Force constants other than zero into memory. We do not know how
18737 the instructions used to build constants modify the upper 64 bits
18738 of the register, once we have that information we may be able
18739 to handle some of them more efficiently. */
18740 if (can_create_pseudo_p ()
18741 && register_operand (op0, mode)
18742 && (CONSTANT_P (op1)
18743 || (SUBREG_P (op1)
18744 && CONSTANT_P (SUBREG_REG (op1))))
18745 && !standard_sse_constant_p (op1))
18746 op1 = validize_mem (force_const_mem (mode, op1));
18747
18748 /* We need to check memory alignment for SSE mode since attribute
18749 can make operands unaligned. */
18750 if (can_create_pseudo_p ()
18751 && SSE_REG_MODE_P (mode)
18752 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
18753 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
18754 {
18755 rtx tmp[2];
18756
18757 /* ix86_expand_vector_move_misalign() does not like constants ... */
18758 if (CONSTANT_P (op1)
18759 || (SUBREG_P (op1)
18760 && CONSTANT_P (SUBREG_REG (op1))))
18761 op1 = validize_mem (force_const_mem (mode, op1));
18762
18763 /* ... nor both arguments in memory. */
18764 if (!register_operand (op0, mode)
18765 && !register_operand (op1, mode))
18766 op1 = force_reg (mode, op1);
18767
18768 tmp[0] = op0; tmp[1] = op1;
18769 ix86_expand_vector_move_misalign (mode, tmp);
18770 return;
18771 }
18772
18773 /* Make operand1 a register if it isn't already. */
18774 if (can_create_pseudo_p ()
18775 && !register_operand (op0, mode)
18776 && !register_operand (op1, mode))
18777 {
18778 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
18779 return;
18780 }
18781
18782 emit_insn (gen_rtx_SET (op0, op1));
18783 }
18784
18785 /* Split 32-byte AVX unaligned load and store if needed. */
18786
18787 static void
18788 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
18789 {
18790 rtx m;
18791 rtx (*extract) (rtx, rtx, rtx);
18792 rtx (*load_unaligned) (rtx, rtx);
18793 rtx (*store_unaligned) (rtx, rtx);
18794 machine_mode mode;
18795
18796 switch (GET_MODE (op0))
18797 {
18798 default:
18799 gcc_unreachable ();
18800 case V32QImode:
18801 extract = gen_avx_vextractf128v32qi;
18802 load_unaligned = gen_avx_loaddquv32qi;
18803 store_unaligned = gen_avx_storedquv32qi;
18804 mode = V16QImode;
18805 break;
18806 case V8SFmode:
18807 extract = gen_avx_vextractf128v8sf;
18808 load_unaligned = gen_avx_loadups256;
18809 store_unaligned = gen_avx_storeups256;
18810 mode = V4SFmode;
18811 break;
18812 case V4DFmode:
18813 extract = gen_avx_vextractf128v4df;
18814 load_unaligned = gen_avx_loadupd256;
18815 store_unaligned = gen_avx_storeupd256;
18816 mode = V2DFmode;
18817 break;
18818 }
18819
18820 if (MEM_P (op1))
18821 {
18822 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
18823 && optimize_insn_for_speed_p ())
18824 {
18825 rtx r = gen_reg_rtx (mode);
18826 m = adjust_address (op1, mode, 0);
18827 emit_move_insn (r, m);
18828 m = adjust_address (op1, mode, 16);
18829 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
18830 emit_move_insn (op0, r);
18831 }
18832 /* Normal *mov<mode>_internal pattern will handle
18833 unaligned loads just fine if misaligned_operand
18834 is true, and without the UNSPEC it can be combined
18835 with arithmetic instructions. */
18836 else if (misaligned_operand (op1, GET_MODE (op1)))
18837 emit_insn (gen_rtx_SET (op0, op1));
18838 else
18839 emit_insn (load_unaligned (op0, op1));
18840 }
18841 else if (MEM_P (op0))
18842 {
18843 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
18844 && optimize_insn_for_speed_p ())
18845 {
18846 m = adjust_address (op0, mode, 0);
18847 emit_insn (extract (m, op1, const0_rtx));
18848 m = adjust_address (op0, mode, 16);
18849 emit_insn (extract (m, op1, const1_rtx));
18850 }
18851 else
18852 emit_insn (store_unaligned (op0, op1));
18853 }
18854 else
18855 gcc_unreachable ();
18856 }
18857
18858 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
18859 straight to ix86_expand_vector_move. */
18860 /* Code generation for scalar reg-reg moves of single and double precision data:
18861 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
18862 movaps reg, reg
18863 else
18864 movss reg, reg
18865 if (x86_sse_partial_reg_dependency == true)
18866 movapd reg, reg
18867 else
18868 movsd reg, reg
18869
18870 Code generation for scalar loads of double precision data:
18871 if (x86_sse_split_regs == true)
18872 movlpd mem, reg (gas syntax)
18873 else
18874 movsd mem, reg
18875
18876 Code generation for unaligned packed loads of single precision data
18877 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
18878 if (x86_sse_unaligned_move_optimal)
18879 movups mem, reg
18880
18881 if (x86_sse_partial_reg_dependency == true)
18882 {
18883 xorps reg, reg
18884 movlps mem, reg
18885 movhps mem+8, reg
18886 }
18887 else
18888 {
18889 movlps mem, reg
18890 movhps mem+8, reg
18891 }
18892
18893 Code generation for unaligned packed loads of double precision data
18894 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
18895 if (x86_sse_unaligned_move_optimal)
18896 movupd mem, reg
18897
18898 if (x86_sse_split_regs == true)
18899 {
18900 movlpd mem, reg
18901 movhpd mem+8, reg
18902 }
18903 else
18904 {
18905 movsd mem, reg
18906 movhpd mem+8, reg
18907 }
18908 */
18909
18910 void
18911 ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
18912 {
18913 rtx op0, op1, orig_op0 = NULL_RTX, m;
18914 rtx (*load_unaligned) (rtx, rtx);
18915 rtx (*store_unaligned) (rtx, rtx);
18916
18917 op0 = operands[0];
18918 op1 = operands[1];
18919
18920 if (GET_MODE_SIZE (mode) == 64)
18921 {
18922 switch (GET_MODE_CLASS (mode))
18923 {
18924 case MODE_VECTOR_INT:
18925 case MODE_INT:
18926 if (GET_MODE (op0) != V16SImode)
18927 {
18928 if (!MEM_P (op0))
18929 {
18930 orig_op0 = op0;
18931 op0 = gen_reg_rtx (V16SImode);
18932 }
18933 else
18934 op0 = gen_lowpart (V16SImode, op0);
18935 }
18936 op1 = gen_lowpart (V16SImode, op1);
18937 /* FALLTHRU */
18938
18939 case MODE_VECTOR_FLOAT:
18940 switch (GET_MODE (op0))
18941 {
18942 default:
18943 gcc_unreachable ();
18944 case V16SImode:
18945 load_unaligned = gen_avx512f_loaddquv16si;
18946 store_unaligned = gen_avx512f_storedquv16si;
18947 break;
18948 case V16SFmode:
18949 load_unaligned = gen_avx512f_loadups512;
18950 store_unaligned = gen_avx512f_storeups512;
18951 break;
18952 case V8DFmode:
18953 load_unaligned = gen_avx512f_loadupd512;
18954 store_unaligned = gen_avx512f_storeupd512;
18955 break;
18956 }
18957
18958 if (MEM_P (op1))
18959 emit_insn (load_unaligned (op0, op1));
18960 else if (MEM_P (op0))
18961 emit_insn (store_unaligned (op0, op1));
18962 else
18963 gcc_unreachable ();
18964 if (orig_op0)
18965 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
18966 break;
18967
18968 default:
18969 gcc_unreachable ();
18970 }
18971
18972 return;
18973 }
18974
18975 if (TARGET_AVX
18976 && GET_MODE_SIZE (mode) == 32)
18977 {
18978 switch (GET_MODE_CLASS (mode))
18979 {
18980 case MODE_VECTOR_INT:
18981 case MODE_INT:
18982 if (GET_MODE (op0) != V32QImode)
18983 {
18984 if (!MEM_P (op0))
18985 {
18986 orig_op0 = op0;
18987 op0 = gen_reg_rtx (V32QImode);
18988 }
18989 else
18990 op0 = gen_lowpart (V32QImode, op0);
18991 }
18992 op1 = gen_lowpart (V32QImode, op1);
18993 /* FALLTHRU */
18994
18995 case MODE_VECTOR_FLOAT:
18996 ix86_avx256_split_vector_move_misalign (op0, op1);
18997 if (orig_op0)
18998 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
18999 break;
19000
19001 default:
19002 gcc_unreachable ();
19003 }
19004
19005 return;
19006 }
19007
19008 if (MEM_P (op1))
19009 {
19010 /* Normal *mov<mode>_internal pattern will handle
19011 unaligned loads just fine if misaligned_operand
19012 is true, and without the UNSPEC it can be combined
19013 with arithmetic instructions. */
19014 if (TARGET_AVX
19015 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
19016 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
19017 && misaligned_operand (op1, GET_MODE (op1)))
19018 emit_insn (gen_rtx_SET (op0, op1));
19019 /* ??? If we have typed data, then it would appear that using
19020 movdqu is the only way to get unaligned data loaded with
19021 integer type. */
19022 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19023 {
19024 if (GET_MODE (op0) != V16QImode)
19025 {
19026 orig_op0 = op0;
19027 op0 = gen_reg_rtx (V16QImode);
19028 }
19029 op1 = gen_lowpart (V16QImode, op1);
19030 /* We will eventually emit movups based on insn attributes. */
19031 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
19032 if (orig_op0)
19033 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
19034 }
19035 else if (TARGET_SSE2 && mode == V2DFmode)
19036 {
19037 rtx zero;
19038
19039 if (TARGET_AVX
19040 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
19041 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
19042 || optimize_insn_for_size_p ())
19043 {
19044 /* We will eventually emit movups based on insn attributes. */
19045 emit_insn (gen_sse2_loadupd (op0, op1));
19046 return;
19047 }
19048
19049 /* When SSE registers are split into halves, we can avoid
19050 writing to the top half twice. */
19051 if (TARGET_SSE_SPLIT_REGS)
19052 {
19053 emit_clobber (op0);
19054 zero = op0;
19055 }
19056 else
19057 {
19058 /* ??? Not sure about the best option for the Intel chips.
19059 The following would seem to satisfy; the register is
19060 entirely cleared, breaking the dependency chain. We
19061 then store to the upper half, with a dependency depth
19062 of one. A rumor has it that Intel recommends two movsd
19063 followed by an unpacklpd, but this is unconfirmed. And
19064 given that the dependency depth of the unpacklpd would
19065 still be one, I'm not sure why this would be better. */
19066 zero = CONST0_RTX (V2DFmode);
19067 }
19068
19069 m = adjust_address (op1, DFmode, 0);
19070 emit_insn (gen_sse2_loadlpd (op0, zero, m));
19071 m = adjust_address (op1, DFmode, 8);
19072 emit_insn (gen_sse2_loadhpd (op0, op0, m));
19073 }
19074 else
19075 {
19076 rtx t;
19077
19078 if (TARGET_AVX
19079 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
19080 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
19081 || optimize_insn_for_size_p ())
19082 {
19083 if (GET_MODE (op0) != V4SFmode)
19084 {
19085 orig_op0 = op0;
19086 op0 = gen_reg_rtx (V4SFmode);
19087 }
19088 op1 = gen_lowpart (V4SFmode, op1);
19089 emit_insn (gen_sse_loadups (op0, op1));
19090 if (orig_op0)
19091 emit_move_insn (orig_op0,
19092 gen_lowpart (GET_MODE (orig_op0), op0));
19093 return;
19094 }
19095
19096 if (mode != V4SFmode)
19097 t = gen_reg_rtx (V4SFmode);
19098 else
19099 t = op0;
19100
19101 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
19102 emit_move_insn (t, CONST0_RTX (V4SFmode));
19103 else
19104 emit_clobber (t);
19105
19106 m = adjust_address (op1, V2SFmode, 0);
19107 emit_insn (gen_sse_loadlps (t, t, m));
19108 m = adjust_address (op1, V2SFmode, 8);
19109 emit_insn (gen_sse_loadhps (t, t, m));
19110 if (mode != V4SFmode)
19111 emit_move_insn (op0, gen_lowpart (mode, t));
19112 }
19113 }
19114 else if (MEM_P (op0))
19115 {
19116 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19117 {
19118 op0 = gen_lowpart (V16QImode, op0);
19119 op1 = gen_lowpart (V16QImode, op1);
19120 /* We will eventually emit movups based on insn attributes. */
19121 emit_insn (gen_sse2_storedquv16qi (op0, op1));
19122 }
19123 else if (TARGET_SSE2 && mode == V2DFmode)
19124 {
19125 if (TARGET_AVX
19126 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
19127 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
19128 || optimize_insn_for_size_p ())
19129 /* We will eventually emit movups based on insn attributes. */
19130 emit_insn (gen_sse2_storeupd (op0, op1));
19131 else
19132 {
19133 m = adjust_address (op0, DFmode, 0);
19134 emit_insn (gen_sse2_storelpd (m, op1));
19135 m = adjust_address (op0, DFmode, 8);
19136 emit_insn (gen_sse2_storehpd (m, op1));
19137 }
19138 }
19139 else
19140 {
19141 if (mode != V4SFmode)
19142 op1 = gen_lowpart (V4SFmode, op1);
19143
19144 if (TARGET_AVX
19145 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
19146 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
19147 || optimize_insn_for_size_p ())
19148 {
19149 op0 = gen_lowpart (V4SFmode, op0);
19150 emit_insn (gen_sse_storeups (op0, op1));
19151 }
19152 else
19153 {
19154 m = adjust_address (op0, V2SFmode, 0);
19155 emit_insn (gen_sse_storelps (m, op1));
19156 m = adjust_address (op0, V2SFmode, 8);
19157 emit_insn (gen_sse_storehps (m, op1));
19158 }
19159 }
19160 }
19161 else
19162 gcc_unreachable ();
19163 }
19164
19165 /* Helper function of ix86_fixup_binary_operands to canonicalize
19166 operand order. Returns true if the operands should be swapped. */
19167
19168 static bool
19169 ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
19170 rtx operands[])
19171 {
19172 rtx dst = operands[0];
19173 rtx src1 = operands[1];
19174 rtx src2 = operands[2];
19175
19176 /* If the operation is not commutative, we can't do anything. */
19177 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
19178 return false;
19179
19180 /* Highest priority is that src1 should match dst. */
19181 if (rtx_equal_p (dst, src1))
19182 return false;
19183 if (rtx_equal_p (dst, src2))
19184 return true;
19185
19186 /* Next highest priority is that immediate constants come second. */
19187 if (immediate_operand (src2, mode))
19188 return false;
19189 if (immediate_operand (src1, mode))
19190 return true;
19191
19192 /* Lowest priority is that memory references should come second. */
19193 if (MEM_P (src2))
19194 return false;
19195 if (MEM_P (src1))
19196 return true;
19197
19198 return false;
19199 }
19200
19201
19202 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
19203 destination to use for the operation. If different from the true
19204 destination in operands[0], a copy operation will be required. */
19205
19206 rtx
19207 ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
19208 rtx operands[])
19209 {
19210 rtx dst = operands[0];
19211 rtx src1 = operands[1];
19212 rtx src2 = operands[2];
19213
19214 /* Canonicalize operand order. */
19215 if (ix86_swap_binary_operands_p (code, mode, operands))
19216 {
19217 /* It is invalid to swap operands of different modes. */
19218 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
19219
19220 std::swap (src1, src2);
19221 }
19222
19223 /* Both source operands cannot be in memory. */
19224 if (MEM_P (src1) && MEM_P (src2))
19225 {
19226 /* Optimization: Only read from memory once. */
19227 if (rtx_equal_p (src1, src2))
19228 {
19229 src2 = force_reg (mode, src2);
19230 src1 = src2;
19231 }
19232 else if (rtx_equal_p (dst, src1))
19233 src2 = force_reg (mode, src2);
19234 else
19235 src1 = force_reg (mode, src1);
19236 }
19237
19238 /* If the destination is memory, and we do not have matching source
19239 operands, do things in registers. */
19240 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
19241 dst = gen_reg_rtx (mode);
19242
19243 /* Source 1 cannot be a constant. */
19244 if (CONSTANT_P (src1))
19245 src1 = force_reg (mode, src1);
19246
19247 /* Source 1 cannot be a non-matching memory. */
19248 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
19249 src1 = force_reg (mode, src1);
19250
19251 /* Improve address combine. */
19252 if (code == PLUS
19253 && GET_MODE_CLASS (mode) == MODE_INT
19254 && MEM_P (src2))
19255 src2 = force_reg (mode, src2);
19256
19257 operands[1] = src1;
19258 operands[2] = src2;
19259 return dst;
19260 }
19261
19262 /* Similarly, but assume that the destination has already been
19263 set up properly. */
19264
19265 void
19266 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
19267 machine_mode mode, rtx operands[])
19268 {
19269 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
19270 gcc_assert (dst == operands[0]);
19271 }
19272
19273 /* Attempt to expand a binary operator. Make the expansion closer to the
19274 actual machine, then just general_operand, which will allow 3 separate
19275 memory references (one output, two input) in a single insn. */
19276
19277 void
19278 ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
19279 rtx operands[])
19280 {
19281 rtx src1, src2, dst, op, clob;
19282
19283 dst = ix86_fixup_binary_operands (code, mode, operands);
19284 src1 = operands[1];
19285 src2 = operands[2];
19286
19287 /* Emit the instruction. */
19288
19289 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, src1, src2));
19290
19291 if (reload_completed
19292 && code == PLUS
19293 && !rtx_equal_p (dst, src1))
19294 {
19295 /* This is going to be an LEA; avoid splitting it later. */
19296 emit_insn (op);
19297 }
19298 else
19299 {
19300 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19301 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
19302 }
19303
19304 /* Fix up the destination if needed. */
19305 if (dst != operands[0])
19306 emit_move_insn (operands[0], dst);
19307 }
19308
19309 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
19310 the given OPERANDS. */
19311
19312 void
19313 ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
19314 rtx operands[])
19315 {
19316 rtx op1 = NULL_RTX, op2 = NULL_RTX;
19317 if (SUBREG_P (operands[1]))
19318 {
19319 op1 = operands[1];
19320 op2 = operands[2];
19321 }
19322 else if (SUBREG_P (operands[2]))
19323 {
19324 op1 = operands[2];
19325 op2 = operands[1];
19326 }
19327 /* Optimize (__m128i) d | (__m128i) e and similar code
19328 when d and e are float vectors into float vector logical
19329 insn. In C/C++ without using intrinsics there is no other way
19330 to express vector logical operation on float vectors than
19331 to cast them temporarily to integer vectors. */
19332 if (op1
19333 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
19334 && (SUBREG_P (op2) || GET_CODE (op2) == CONST_VECTOR)
19335 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
19336 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
19337 && SUBREG_BYTE (op1) == 0
19338 && (GET_CODE (op2) == CONST_VECTOR
19339 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
19340 && SUBREG_BYTE (op2) == 0))
19341 && can_create_pseudo_p ())
19342 {
19343 rtx dst;
19344 switch (GET_MODE (SUBREG_REG (op1)))
19345 {
19346 case V4SFmode:
19347 case V8SFmode:
19348 case V16SFmode:
19349 case V2DFmode:
19350 case V4DFmode:
19351 case V8DFmode:
19352 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
19353 if (GET_CODE (op2) == CONST_VECTOR)
19354 {
19355 op2 = gen_lowpart (GET_MODE (dst), op2);
19356 op2 = force_reg (GET_MODE (dst), op2);
19357 }
19358 else
19359 {
19360 op1 = operands[1];
19361 op2 = SUBREG_REG (operands[2]);
19362 if (!nonimmediate_operand (op2, GET_MODE (dst)))
19363 op2 = force_reg (GET_MODE (dst), op2);
19364 }
19365 op1 = SUBREG_REG (op1);
19366 if (!nonimmediate_operand (op1, GET_MODE (dst)))
19367 op1 = force_reg (GET_MODE (dst), op1);
19368 emit_insn (gen_rtx_SET (dst,
19369 gen_rtx_fmt_ee (code, GET_MODE (dst),
19370 op1, op2)));
19371 emit_move_insn (operands[0], gen_lowpart (mode, dst));
19372 return;
19373 default:
19374 break;
19375 }
19376 }
19377 if (!nonimmediate_operand (operands[1], mode))
19378 operands[1] = force_reg (mode, operands[1]);
19379 if (!nonimmediate_operand (operands[2], mode))
19380 operands[2] = force_reg (mode, operands[2]);
19381 ix86_fixup_binary_operands_no_copy (code, mode, operands);
19382 emit_insn (gen_rtx_SET (operands[0],
19383 gen_rtx_fmt_ee (code, mode, operands[1],
19384 operands[2])));
19385 }
19386
19387 /* Return TRUE or FALSE depending on whether the binary operator meets the
19388 appropriate constraints. */
19389
19390 bool
19391 ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
19392 rtx operands[3])
19393 {
19394 rtx dst = operands[0];
19395 rtx src1 = operands[1];
19396 rtx src2 = operands[2];
19397
19398 /* Both source operands cannot be in memory. */
19399 if (MEM_P (src1) && MEM_P (src2))
19400 return false;
19401
19402 /* Canonicalize operand order for commutative operators. */
19403 if (ix86_swap_binary_operands_p (code, mode, operands))
19404 std::swap (src1, src2);
19405
19406 /* If the destination is memory, we must have a matching source operand. */
19407 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
19408 return false;
19409
19410 /* Source 1 cannot be a constant. */
19411 if (CONSTANT_P (src1))
19412 return false;
19413
19414 /* Source 1 cannot be a non-matching memory. */
19415 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
19416 /* Support "andhi/andsi/anddi" as a zero-extending move. */
19417 return (code == AND
19418 && (mode == HImode
19419 || mode == SImode
19420 || (TARGET_64BIT && mode == DImode))
19421 && satisfies_constraint_L (src2));
19422
19423 return true;
19424 }
19425
19426 /* Attempt to expand a unary operator. Make the expansion closer to the
19427 actual machine, then just general_operand, which will allow 2 separate
19428 memory references (one output, one input) in a single insn. */
19429
19430 void
19431 ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
19432 rtx operands[])
19433 {
19434 bool matching_memory = false;
19435 rtx src, dst, op, clob;
19436
19437 dst = operands[0];
19438 src = operands[1];
19439
19440 /* If the destination is memory, and we do not have matching source
19441 operands, do things in registers. */
19442 if (MEM_P (dst))
19443 {
19444 if (rtx_equal_p (dst, src))
19445 matching_memory = true;
19446 else
19447 dst = gen_reg_rtx (mode);
19448 }
19449
19450 /* When source operand is memory, destination must match. */
19451 if (MEM_P (src) && !matching_memory)
19452 src = force_reg (mode, src);
19453
19454 /* Emit the instruction. */
19455
19456 op = gen_rtx_SET (dst, gen_rtx_fmt_e (code, mode, src));
19457
19458 if (code == NOT)
19459 emit_insn (op);
19460 else
19461 {
19462 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19463 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
19464 }
19465
19466 /* Fix up the destination if needed. */
19467 if (dst != operands[0])
19468 emit_move_insn (operands[0], dst);
19469 }
19470
19471 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
19472 divisor are within the range [0-255]. */
19473
19474 void
19475 ix86_split_idivmod (machine_mode mode, rtx operands[],
19476 bool signed_p)
19477 {
19478 rtx_code_label *end_label, *qimode_label;
19479 rtx insn, div, mod;
19480 rtx scratch, tmp0, tmp1, tmp2;
19481 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
19482 rtx (*gen_zero_extend) (rtx, rtx);
19483 rtx (*gen_test_ccno_1) (rtx, rtx);
19484
19485 switch (mode)
19486 {
19487 case SImode:
19488 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
19489 gen_test_ccno_1 = gen_testsi_ccno_1;
19490 gen_zero_extend = gen_zero_extendqisi2;
19491 break;
19492 case DImode:
19493 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
19494 gen_test_ccno_1 = gen_testdi_ccno_1;
19495 gen_zero_extend = gen_zero_extendqidi2;
19496 break;
19497 default:
19498 gcc_unreachable ();
19499 }
19500
19501 end_label = gen_label_rtx ();
19502 qimode_label = gen_label_rtx ();
19503
19504 scratch = gen_reg_rtx (mode);
19505
19506 /* Use 8bit unsigned divimod if dividend and divisor are within
19507 the range [0-255]. */
19508 emit_move_insn (scratch, operands[2]);
19509 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
19510 scratch, 1, OPTAB_DIRECT);
19511 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
19512 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
19513 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
19514 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
19515 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
19516 pc_rtx);
19517 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp0));
19518 predict_jump (REG_BR_PROB_BASE * 50 / 100);
19519 JUMP_LABEL (insn) = qimode_label;
19520
19521 /* Generate original signed/unsigned divimod. */
19522 div = gen_divmod4_1 (operands[0], operands[1],
19523 operands[2], operands[3]);
19524 emit_insn (div);
19525
19526 /* Branch to the end. */
19527 emit_jump_insn (gen_jump (end_label));
19528 emit_barrier ();
19529
19530 /* Generate 8bit unsigned divide. */
19531 emit_label (qimode_label);
19532 /* Don't use operands[0] for result of 8bit divide since not all
19533 registers support QImode ZERO_EXTRACT. */
19534 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
19535 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
19536 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
19537 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
19538
19539 if (signed_p)
19540 {
19541 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
19542 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
19543 }
19544 else
19545 {
19546 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
19547 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
19548 }
19549
19550 /* Extract remainder from AH. */
19551 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
19552 if (REG_P (operands[1]))
19553 insn = emit_move_insn (operands[1], tmp1);
19554 else
19555 {
19556 /* Need a new scratch register since the old one has result
19557 of 8bit divide. */
19558 scratch = gen_reg_rtx (mode);
19559 emit_move_insn (scratch, tmp1);
19560 insn = emit_move_insn (operands[1], scratch);
19561 }
19562 set_unique_reg_note (insn, REG_EQUAL, mod);
19563
19564 /* Zero extend quotient from AL. */
19565 tmp1 = gen_lowpart (QImode, tmp0);
19566 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
19567 set_unique_reg_note (insn, REG_EQUAL, div);
19568
19569 emit_label (end_label);
19570 }
19571
19572 #define LEA_MAX_STALL (3)
19573 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
19574
19575 /* Increase given DISTANCE in half-cycles according to
19576 dependencies between PREV and NEXT instructions.
19577 Add 1 half-cycle if there is no dependency and
19578 go to next cycle if there is some dependecy. */
19579
19580 static unsigned int
19581 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
19582 {
19583 df_ref def, use;
19584
19585 if (!prev || !next)
19586 return distance + (distance & 1) + 2;
19587
19588 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
19589 return distance + 1;
19590
19591 FOR_EACH_INSN_USE (use, next)
19592 FOR_EACH_INSN_DEF (def, prev)
19593 if (!DF_REF_IS_ARTIFICIAL (def)
19594 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
19595 return distance + (distance & 1) + 2;
19596
19597 return distance + 1;
19598 }
19599
19600 /* Function checks if instruction INSN defines register number
19601 REGNO1 or REGNO2. */
19602
19603 static bool
19604 insn_defines_reg (unsigned int regno1, unsigned int regno2,
19605 rtx_insn *insn)
19606 {
19607 df_ref def;
19608
19609 FOR_EACH_INSN_DEF (def, insn)
19610 if (DF_REF_REG_DEF_P (def)
19611 && !DF_REF_IS_ARTIFICIAL (def)
19612 && (regno1 == DF_REF_REGNO (def)
19613 || regno2 == DF_REF_REGNO (def)))
19614 return true;
19615
19616 return false;
19617 }
19618
19619 /* Function checks if instruction INSN uses register number
19620 REGNO as a part of address expression. */
19621
19622 static bool
19623 insn_uses_reg_mem (unsigned int regno, rtx insn)
19624 {
19625 df_ref use;
19626
19627 FOR_EACH_INSN_USE (use, insn)
19628 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
19629 return true;
19630
19631 return false;
19632 }
19633
19634 /* Search backward for non-agu definition of register number REGNO1
19635 or register number REGNO2 in basic block starting from instruction
19636 START up to head of basic block or instruction INSN.
19637
19638 Function puts true value into *FOUND var if definition was found
19639 and false otherwise.
19640
19641 Distance in half-cycles between START and found instruction or head
19642 of BB is added to DISTANCE and returned. */
19643
19644 static int
19645 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
19646 rtx_insn *insn, int distance,
19647 rtx_insn *start, bool *found)
19648 {
19649 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
19650 rtx_insn *prev = start;
19651 rtx_insn *next = NULL;
19652
19653 *found = false;
19654
19655 while (prev
19656 && prev != insn
19657 && distance < LEA_SEARCH_THRESHOLD)
19658 {
19659 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
19660 {
19661 distance = increase_distance (prev, next, distance);
19662 if (insn_defines_reg (regno1, regno2, prev))
19663 {
19664 if (recog_memoized (prev) < 0
19665 || get_attr_type (prev) != TYPE_LEA)
19666 {
19667 *found = true;
19668 return distance;
19669 }
19670 }
19671
19672 next = prev;
19673 }
19674 if (prev == BB_HEAD (bb))
19675 break;
19676
19677 prev = PREV_INSN (prev);
19678 }
19679
19680 return distance;
19681 }
19682
19683 /* Search backward for non-agu definition of register number REGNO1
19684 or register number REGNO2 in INSN's basic block until
19685 1. Pass LEA_SEARCH_THRESHOLD instructions, or
19686 2. Reach neighbour BBs boundary, or
19687 3. Reach agu definition.
19688 Returns the distance between the non-agu definition point and INSN.
19689 If no definition point, returns -1. */
19690
19691 static int
19692 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
19693 rtx_insn *insn)
19694 {
19695 basic_block bb = BLOCK_FOR_INSN (insn);
19696 int distance = 0;
19697 bool found = false;
19698
19699 if (insn != BB_HEAD (bb))
19700 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
19701 distance, PREV_INSN (insn),
19702 &found);
19703
19704 if (!found && distance < LEA_SEARCH_THRESHOLD)
19705 {
19706 edge e;
19707 edge_iterator ei;
19708 bool simple_loop = false;
19709
19710 FOR_EACH_EDGE (e, ei, bb->preds)
19711 if (e->src == bb)
19712 {
19713 simple_loop = true;
19714 break;
19715 }
19716
19717 if (simple_loop)
19718 distance = distance_non_agu_define_in_bb (regno1, regno2,
19719 insn, distance,
19720 BB_END (bb), &found);
19721 else
19722 {
19723 int shortest_dist = -1;
19724 bool found_in_bb = false;
19725
19726 FOR_EACH_EDGE (e, ei, bb->preds)
19727 {
19728 int bb_dist
19729 = distance_non_agu_define_in_bb (regno1, regno2,
19730 insn, distance,
19731 BB_END (e->src),
19732 &found_in_bb);
19733 if (found_in_bb)
19734 {
19735 if (shortest_dist < 0)
19736 shortest_dist = bb_dist;
19737 else if (bb_dist > 0)
19738 shortest_dist = MIN (bb_dist, shortest_dist);
19739
19740 found = true;
19741 }
19742 }
19743
19744 distance = shortest_dist;
19745 }
19746 }
19747
19748 /* get_attr_type may modify recog data. We want to make sure
19749 that recog data is valid for instruction INSN, on which
19750 distance_non_agu_define is called. INSN is unchanged here. */
19751 extract_insn_cached (insn);
19752
19753 if (!found)
19754 return -1;
19755
19756 return distance >> 1;
19757 }
19758
19759 /* Return the distance in half-cycles between INSN and the next
19760 insn that uses register number REGNO in memory address added
19761 to DISTANCE. Return -1 if REGNO0 is set.
19762
19763 Put true value into *FOUND if register usage was found and
19764 false otherwise.
19765 Put true value into *REDEFINED if register redefinition was
19766 found and false otherwise. */
19767
19768 static int
19769 distance_agu_use_in_bb (unsigned int regno,
19770 rtx_insn *insn, int distance, rtx_insn *start,
19771 bool *found, bool *redefined)
19772 {
19773 basic_block bb = NULL;
19774 rtx_insn *next = start;
19775 rtx_insn *prev = NULL;
19776
19777 *found = false;
19778 *redefined = false;
19779
19780 if (start != NULL_RTX)
19781 {
19782 bb = BLOCK_FOR_INSN (start);
19783 if (start != BB_HEAD (bb))
19784 /* If insn and start belong to the same bb, set prev to insn,
19785 so the call to increase_distance will increase the distance
19786 between insns by 1. */
19787 prev = insn;
19788 }
19789
19790 while (next
19791 && next != insn
19792 && distance < LEA_SEARCH_THRESHOLD)
19793 {
19794 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
19795 {
19796 distance = increase_distance(prev, next, distance);
19797 if (insn_uses_reg_mem (regno, next))
19798 {
19799 /* Return DISTANCE if OP0 is used in memory
19800 address in NEXT. */
19801 *found = true;
19802 return distance;
19803 }
19804
19805 if (insn_defines_reg (regno, INVALID_REGNUM, next))
19806 {
19807 /* Return -1 if OP0 is set in NEXT. */
19808 *redefined = true;
19809 return -1;
19810 }
19811
19812 prev = next;
19813 }
19814
19815 if (next == BB_END (bb))
19816 break;
19817
19818 next = NEXT_INSN (next);
19819 }
19820
19821 return distance;
19822 }
19823
19824 /* Return the distance between INSN and the next insn that uses
19825 register number REGNO0 in memory address. Return -1 if no such
19826 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
19827
19828 static int
19829 distance_agu_use (unsigned int regno0, rtx_insn *insn)
19830 {
19831 basic_block bb = BLOCK_FOR_INSN (insn);
19832 int distance = 0;
19833 bool found = false;
19834 bool redefined = false;
19835
19836 if (insn != BB_END (bb))
19837 distance = distance_agu_use_in_bb (regno0, insn, distance,
19838 NEXT_INSN (insn),
19839 &found, &redefined);
19840
19841 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
19842 {
19843 edge e;
19844 edge_iterator ei;
19845 bool simple_loop = false;
19846
19847 FOR_EACH_EDGE (e, ei, bb->succs)
19848 if (e->dest == bb)
19849 {
19850 simple_loop = true;
19851 break;
19852 }
19853
19854 if (simple_loop)
19855 distance = distance_agu_use_in_bb (regno0, insn,
19856 distance, BB_HEAD (bb),
19857 &found, &redefined);
19858 else
19859 {
19860 int shortest_dist = -1;
19861 bool found_in_bb = false;
19862 bool redefined_in_bb = false;
19863
19864 FOR_EACH_EDGE (e, ei, bb->succs)
19865 {
19866 int bb_dist
19867 = distance_agu_use_in_bb (regno0, insn,
19868 distance, BB_HEAD (e->dest),
19869 &found_in_bb, &redefined_in_bb);
19870 if (found_in_bb)
19871 {
19872 if (shortest_dist < 0)
19873 shortest_dist = bb_dist;
19874 else if (bb_dist > 0)
19875 shortest_dist = MIN (bb_dist, shortest_dist);
19876
19877 found = true;
19878 }
19879 }
19880
19881 distance = shortest_dist;
19882 }
19883 }
19884
19885 if (!found || redefined)
19886 return -1;
19887
19888 return distance >> 1;
19889 }
19890
19891 /* Define this macro to tune LEA priority vs ADD, it take effect when
19892 there is a dilemma of choicing LEA or ADD
19893 Negative value: ADD is more preferred than LEA
19894 Zero: Netrual
19895 Positive value: LEA is more preferred than ADD*/
19896 #define IX86_LEA_PRIORITY 0
19897
19898 /* Return true if usage of lea INSN has performance advantage
19899 over a sequence of instructions. Instructions sequence has
19900 SPLIT_COST cycles higher latency than lea latency. */
19901
19902 static bool
19903 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
19904 unsigned int regno2, int split_cost, bool has_scale)
19905 {
19906 int dist_define, dist_use;
19907
19908 /* For Silvermont if using a 2-source or 3-source LEA for
19909 non-destructive destination purposes, or due to wanting
19910 ability to use SCALE, the use of LEA is justified. */
19911 if (TARGET_SILVERMONT || TARGET_INTEL)
19912 {
19913 if (has_scale)
19914 return true;
19915 if (split_cost < 1)
19916 return false;
19917 if (regno0 == regno1 || regno0 == regno2)
19918 return false;
19919 return true;
19920 }
19921
19922 dist_define = distance_non_agu_define (regno1, regno2, insn);
19923 dist_use = distance_agu_use (regno0, insn);
19924
19925 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
19926 {
19927 /* If there is no non AGU operand definition, no AGU
19928 operand usage and split cost is 0 then both lea
19929 and non lea variants have same priority. Currently
19930 we prefer lea for 64 bit code and non lea on 32 bit
19931 code. */
19932 if (dist_use < 0 && split_cost == 0)
19933 return TARGET_64BIT || IX86_LEA_PRIORITY;
19934 else
19935 return true;
19936 }
19937
19938 /* With longer definitions distance lea is more preferable.
19939 Here we change it to take into account splitting cost and
19940 lea priority. */
19941 dist_define += split_cost + IX86_LEA_PRIORITY;
19942
19943 /* If there is no use in memory addess then we just check
19944 that split cost exceeds AGU stall. */
19945 if (dist_use < 0)
19946 return dist_define > LEA_MAX_STALL;
19947
19948 /* If this insn has both backward non-agu dependence and forward
19949 agu dependence, the one with short distance takes effect. */
19950 return dist_define >= dist_use;
19951 }
19952
19953 /* Return true if it is legal to clobber flags by INSN and
19954 false otherwise. */
19955
19956 static bool
19957 ix86_ok_to_clobber_flags (rtx_insn *insn)
19958 {
19959 basic_block bb = BLOCK_FOR_INSN (insn);
19960 df_ref use;
19961 bitmap live;
19962
19963 while (insn)
19964 {
19965 if (NONDEBUG_INSN_P (insn))
19966 {
19967 FOR_EACH_INSN_USE (use, insn)
19968 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
19969 return false;
19970
19971 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
19972 return true;
19973 }
19974
19975 if (insn == BB_END (bb))
19976 break;
19977
19978 insn = NEXT_INSN (insn);
19979 }
19980
19981 live = df_get_live_out(bb);
19982 return !REGNO_REG_SET_P (live, FLAGS_REG);
19983 }
19984
19985 /* Return true if we need to split op0 = op1 + op2 into a sequence of
19986 move and add to avoid AGU stalls. */
19987
19988 bool
19989 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
19990 {
19991 unsigned int regno0, regno1, regno2;
19992
19993 /* Check if we need to optimize. */
19994 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
19995 return false;
19996
19997 /* Check it is correct to split here. */
19998 if (!ix86_ok_to_clobber_flags(insn))
19999 return false;
20000
20001 regno0 = true_regnum (operands[0]);
20002 regno1 = true_regnum (operands[1]);
20003 regno2 = true_regnum (operands[2]);
20004
20005 /* We need to split only adds with non destructive
20006 destination operand. */
20007 if (regno0 == regno1 || regno0 == regno2)
20008 return false;
20009 else
20010 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
20011 }
20012
20013 /* Return true if we should emit lea instruction instead of mov
20014 instruction. */
20015
20016 bool
20017 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
20018 {
20019 unsigned int regno0, regno1;
20020
20021 /* Check if we need to optimize. */
20022 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
20023 return false;
20024
20025 /* Use lea for reg to reg moves only. */
20026 if (!REG_P (operands[0]) || !REG_P (operands[1]))
20027 return false;
20028
20029 regno0 = true_regnum (operands[0]);
20030 regno1 = true_regnum (operands[1]);
20031
20032 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
20033 }
20034
20035 /* Return true if we need to split lea into a sequence of
20036 instructions to avoid AGU stalls. */
20037
20038 bool
20039 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
20040 {
20041 unsigned int regno0, regno1, regno2;
20042 int split_cost;
20043 struct ix86_address parts;
20044 int ok;
20045
20046 /* Check we need to optimize. */
20047 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
20048 return false;
20049
20050 /* The "at least two components" test below might not catch simple
20051 move or zero extension insns if parts.base is non-NULL and parts.disp
20052 is const0_rtx as the only components in the address, e.g. if the
20053 register is %rbp or %r13. As this test is much cheaper and moves or
20054 zero extensions are the common case, do this check first. */
20055 if (REG_P (operands[1])
20056 || (SImode_address_operand (operands[1], VOIDmode)
20057 && REG_P (XEXP (operands[1], 0))))
20058 return false;
20059
20060 /* Check if it is OK to split here. */
20061 if (!ix86_ok_to_clobber_flags (insn))
20062 return false;
20063
20064 ok = ix86_decompose_address (operands[1], &parts);
20065 gcc_assert (ok);
20066
20067 /* There should be at least two components in the address. */
20068 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
20069 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
20070 return false;
20071
20072 /* We should not split into add if non legitimate pic
20073 operand is used as displacement. */
20074 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
20075 return false;
20076
20077 regno0 = true_regnum (operands[0]) ;
20078 regno1 = INVALID_REGNUM;
20079 regno2 = INVALID_REGNUM;
20080
20081 if (parts.base)
20082 regno1 = true_regnum (parts.base);
20083 if (parts.index)
20084 regno2 = true_regnum (parts.index);
20085
20086 split_cost = 0;
20087
20088 /* Compute how many cycles we will add to execution time
20089 if split lea into a sequence of instructions. */
20090 if (parts.base || parts.index)
20091 {
20092 /* Have to use mov instruction if non desctructive
20093 destination form is used. */
20094 if (regno1 != regno0 && regno2 != regno0)
20095 split_cost += 1;
20096
20097 /* Have to add index to base if both exist. */
20098 if (parts.base && parts.index)
20099 split_cost += 1;
20100
20101 /* Have to use shift and adds if scale is 2 or greater. */
20102 if (parts.scale > 1)
20103 {
20104 if (regno0 != regno1)
20105 split_cost += 1;
20106 else if (regno2 == regno0)
20107 split_cost += 4;
20108 else
20109 split_cost += parts.scale;
20110 }
20111
20112 /* Have to use add instruction with immediate if
20113 disp is non zero. */
20114 if (parts.disp && parts.disp != const0_rtx)
20115 split_cost += 1;
20116
20117 /* Subtract the price of lea. */
20118 split_cost -= 1;
20119 }
20120
20121 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
20122 parts.scale > 1);
20123 }
20124
20125 /* Emit x86 binary operand CODE in mode MODE, where the first operand
20126 matches destination. RTX includes clobber of FLAGS_REG. */
20127
20128 static void
20129 ix86_emit_binop (enum rtx_code code, machine_mode mode,
20130 rtx dst, rtx src)
20131 {
20132 rtx op, clob;
20133
20134 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, dst, src));
20135 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
20136
20137 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
20138 }
20139
20140 /* Return true if regno1 def is nearest to the insn. */
20141
20142 static bool
20143 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
20144 {
20145 rtx_insn *prev = insn;
20146 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
20147
20148 if (insn == start)
20149 return false;
20150 while (prev && prev != start)
20151 {
20152 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
20153 {
20154 prev = PREV_INSN (prev);
20155 continue;
20156 }
20157 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
20158 return true;
20159 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
20160 return false;
20161 prev = PREV_INSN (prev);
20162 }
20163
20164 /* None of the regs is defined in the bb. */
20165 return false;
20166 }
20167
20168 /* Split lea instructions into a sequence of instructions
20169 which are executed on ALU to avoid AGU stalls.
20170 It is assumed that it is allowed to clobber flags register
20171 at lea position. */
20172
20173 void
20174 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
20175 {
20176 unsigned int regno0, regno1, regno2;
20177 struct ix86_address parts;
20178 rtx target, tmp;
20179 int ok, adds;
20180
20181 ok = ix86_decompose_address (operands[1], &parts);
20182 gcc_assert (ok);
20183
20184 target = gen_lowpart (mode, operands[0]);
20185
20186 regno0 = true_regnum (target);
20187 regno1 = INVALID_REGNUM;
20188 regno2 = INVALID_REGNUM;
20189
20190 if (parts.base)
20191 {
20192 parts.base = gen_lowpart (mode, parts.base);
20193 regno1 = true_regnum (parts.base);
20194 }
20195
20196 if (parts.index)
20197 {
20198 parts.index = gen_lowpart (mode, parts.index);
20199 regno2 = true_regnum (parts.index);
20200 }
20201
20202 if (parts.disp)
20203 parts.disp = gen_lowpart (mode, parts.disp);
20204
20205 if (parts.scale > 1)
20206 {
20207 /* Case r1 = r1 + ... */
20208 if (regno1 == regno0)
20209 {
20210 /* If we have a case r1 = r1 + C * r2 then we
20211 should use multiplication which is very
20212 expensive. Assume cost model is wrong if we
20213 have such case here. */
20214 gcc_assert (regno2 != regno0);
20215
20216 for (adds = parts.scale; adds > 0; adds--)
20217 ix86_emit_binop (PLUS, mode, target, parts.index);
20218 }
20219 else
20220 {
20221 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
20222 if (regno0 != regno2)
20223 emit_insn (gen_rtx_SET (target, parts.index));
20224
20225 /* Use shift for scaling. */
20226 ix86_emit_binop (ASHIFT, mode, target,
20227 GEN_INT (exact_log2 (parts.scale)));
20228
20229 if (parts.base)
20230 ix86_emit_binop (PLUS, mode, target, parts.base);
20231
20232 if (parts.disp && parts.disp != const0_rtx)
20233 ix86_emit_binop (PLUS, mode, target, parts.disp);
20234 }
20235 }
20236 else if (!parts.base && !parts.index)
20237 {
20238 gcc_assert(parts.disp);
20239 emit_insn (gen_rtx_SET (target, parts.disp));
20240 }
20241 else
20242 {
20243 if (!parts.base)
20244 {
20245 if (regno0 != regno2)
20246 emit_insn (gen_rtx_SET (target, parts.index));
20247 }
20248 else if (!parts.index)
20249 {
20250 if (regno0 != regno1)
20251 emit_insn (gen_rtx_SET (target, parts.base));
20252 }
20253 else
20254 {
20255 if (regno0 == regno1)
20256 tmp = parts.index;
20257 else if (regno0 == regno2)
20258 tmp = parts.base;
20259 else
20260 {
20261 rtx tmp1;
20262
20263 /* Find better operand for SET instruction, depending
20264 on which definition is farther from the insn. */
20265 if (find_nearest_reg_def (insn, regno1, regno2))
20266 tmp = parts.index, tmp1 = parts.base;
20267 else
20268 tmp = parts.base, tmp1 = parts.index;
20269
20270 emit_insn (gen_rtx_SET (target, tmp));
20271
20272 if (parts.disp && parts.disp != const0_rtx)
20273 ix86_emit_binop (PLUS, mode, target, parts.disp);
20274
20275 ix86_emit_binop (PLUS, mode, target, tmp1);
20276 return;
20277 }
20278
20279 ix86_emit_binop (PLUS, mode, target, tmp);
20280 }
20281
20282 if (parts.disp && parts.disp != const0_rtx)
20283 ix86_emit_binop (PLUS, mode, target, parts.disp);
20284 }
20285 }
20286
20287 /* Return true if it is ok to optimize an ADD operation to LEA
20288 operation to avoid flag register consumation. For most processors,
20289 ADD is faster than LEA. For the processors like BONNELL, if the
20290 destination register of LEA holds an actual address which will be
20291 used soon, LEA is better and otherwise ADD is better. */
20292
20293 bool
20294 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
20295 {
20296 unsigned int regno0 = true_regnum (operands[0]);
20297 unsigned int regno1 = true_regnum (operands[1]);
20298 unsigned int regno2 = true_regnum (operands[2]);
20299
20300 /* If a = b + c, (a!=b && a!=c), must use lea form. */
20301 if (regno0 != regno1 && regno0 != regno2)
20302 return true;
20303
20304 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
20305 return false;
20306
20307 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
20308 }
20309
20310 /* Return true if destination reg of SET_BODY is shift count of
20311 USE_BODY. */
20312
20313 static bool
20314 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
20315 {
20316 rtx set_dest;
20317 rtx shift_rtx;
20318 int i;
20319
20320 /* Retrieve destination of SET_BODY. */
20321 switch (GET_CODE (set_body))
20322 {
20323 case SET:
20324 set_dest = SET_DEST (set_body);
20325 if (!set_dest || !REG_P (set_dest))
20326 return false;
20327 break;
20328 case PARALLEL:
20329 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
20330 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
20331 use_body))
20332 return true;
20333 default:
20334 return false;
20335 break;
20336 }
20337
20338 /* Retrieve shift count of USE_BODY. */
20339 switch (GET_CODE (use_body))
20340 {
20341 case SET:
20342 shift_rtx = XEXP (use_body, 1);
20343 break;
20344 case PARALLEL:
20345 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
20346 if (ix86_dep_by_shift_count_body (set_body,
20347 XVECEXP (use_body, 0, i)))
20348 return true;
20349 default:
20350 return false;
20351 break;
20352 }
20353
20354 if (shift_rtx
20355 && (GET_CODE (shift_rtx) == ASHIFT
20356 || GET_CODE (shift_rtx) == LSHIFTRT
20357 || GET_CODE (shift_rtx) == ASHIFTRT
20358 || GET_CODE (shift_rtx) == ROTATE
20359 || GET_CODE (shift_rtx) == ROTATERT))
20360 {
20361 rtx shift_count = XEXP (shift_rtx, 1);
20362
20363 /* Return true if shift count is dest of SET_BODY. */
20364 if (REG_P (shift_count))
20365 {
20366 /* Add check since it can be invoked before register
20367 allocation in pre-reload schedule. */
20368 if (reload_completed
20369 && true_regnum (set_dest) == true_regnum (shift_count))
20370 return true;
20371 else if (REGNO(set_dest) == REGNO(shift_count))
20372 return true;
20373 }
20374 }
20375
20376 return false;
20377 }
20378
20379 /* Return true if destination reg of SET_INSN is shift count of
20380 USE_INSN. */
20381
20382 bool
20383 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
20384 {
20385 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
20386 PATTERN (use_insn));
20387 }
20388
20389 /* Return TRUE or FALSE depending on whether the unary operator meets the
20390 appropriate constraints. */
20391
20392 bool
20393 ix86_unary_operator_ok (enum rtx_code,
20394 machine_mode,
20395 rtx operands[2])
20396 {
20397 /* If one of operands is memory, source and destination must match. */
20398 if ((MEM_P (operands[0])
20399 || MEM_P (operands[1]))
20400 && ! rtx_equal_p (operands[0], operands[1]))
20401 return false;
20402 return true;
20403 }
20404
20405 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
20406 are ok, keeping in mind the possible movddup alternative. */
20407
20408 bool
20409 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
20410 {
20411 if (MEM_P (operands[0]))
20412 return rtx_equal_p (operands[0], operands[1 + high]);
20413 if (MEM_P (operands[1]) && MEM_P (operands[2]))
20414 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
20415 return true;
20416 }
20417
20418 /* Post-reload splitter for converting an SF or DFmode value in an
20419 SSE register into an unsigned SImode. */
20420
20421 void
20422 ix86_split_convert_uns_si_sse (rtx operands[])
20423 {
20424 machine_mode vecmode;
20425 rtx value, large, zero_or_two31, input, two31, x;
20426
20427 large = operands[1];
20428 zero_or_two31 = operands[2];
20429 input = operands[3];
20430 two31 = operands[4];
20431 vecmode = GET_MODE (large);
20432 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
20433
20434 /* Load up the value into the low element. We must ensure that the other
20435 elements are valid floats -- zero is the easiest such value. */
20436 if (MEM_P (input))
20437 {
20438 if (vecmode == V4SFmode)
20439 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
20440 else
20441 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
20442 }
20443 else
20444 {
20445 input = gen_rtx_REG (vecmode, REGNO (input));
20446 emit_move_insn (value, CONST0_RTX (vecmode));
20447 if (vecmode == V4SFmode)
20448 emit_insn (gen_sse_movss (value, value, input));
20449 else
20450 emit_insn (gen_sse2_movsd (value, value, input));
20451 }
20452
20453 emit_move_insn (large, two31);
20454 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
20455
20456 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
20457 emit_insn (gen_rtx_SET (large, x));
20458
20459 x = gen_rtx_AND (vecmode, zero_or_two31, large);
20460 emit_insn (gen_rtx_SET (zero_or_two31, x));
20461
20462 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
20463 emit_insn (gen_rtx_SET (value, x));
20464
20465 large = gen_rtx_REG (V4SImode, REGNO (large));
20466 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
20467
20468 x = gen_rtx_REG (V4SImode, REGNO (value));
20469 if (vecmode == V4SFmode)
20470 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
20471 else
20472 emit_insn (gen_sse2_cvttpd2dq (x, value));
20473 value = x;
20474
20475 emit_insn (gen_xorv4si3 (value, value, large));
20476 }
20477
20478 /* Convert an unsigned DImode value into a DFmode, using only SSE.
20479 Expects the 64-bit DImode to be supplied in a pair of integral
20480 registers. Requires SSE2; will use SSE3 if available. For x86_32,
20481 -mfpmath=sse, !optimize_size only. */
20482
20483 void
20484 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
20485 {
20486 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
20487 rtx int_xmm, fp_xmm;
20488 rtx biases, exponents;
20489 rtx x;
20490
20491 int_xmm = gen_reg_rtx (V4SImode);
20492 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
20493 emit_insn (gen_movdi_to_sse (int_xmm, input));
20494 else if (TARGET_SSE_SPLIT_REGS)
20495 {
20496 emit_clobber (int_xmm);
20497 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
20498 }
20499 else
20500 {
20501 x = gen_reg_rtx (V2DImode);
20502 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
20503 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
20504 }
20505
20506 x = gen_rtx_CONST_VECTOR (V4SImode,
20507 gen_rtvec (4, GEN_INT (0x43300000UL),
20508 GEN_INT (0x45300000UL),
20509 const0_rtx, const0_rtx));
20510 exponents = validize_mem (force_const_mem (V4SImode, x));
20511
20512 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
20513 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
20514
20515 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
20516 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
20517 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
20518 (0x1.0p84 + double(fp_value_hi_xmm)).
20519 Note these exponents differ by 32. */
20520
20521 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
20522
20523 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
20524 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
20525 real_ldexp (&bias_lo_rvt, &dconst1, 52);
20526 real_ldexp (&bias_hi_rvt, &dconst1, 84);
20527 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
20528 x = const_double_from_real_value (bias_hi_rvt, DFmode);
20529 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
20530 biases = validize_mem (force_const_mem (V2DFmode, biases));
20531 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
20532
20533 /* Add the upper and lower DFmode values together. */
20534 if (TARGET_SSE3)
20535 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
20536 else
20537 {
20538 x = copy_to_mode_reg (V2DFmode, fp_xmm);
20539 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
20540 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
20541 }
20542
20543 ix86_expand_vector_extract (false, target, fp_xmm, 0);
20544 }
20545
20546 /* Not used, but eases macroization of patterns. */
20547 void
20548 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
20549 {
20550 gcc_unreachable ();
20551 }
20552
20553 /* Convert an unsigned SImode value into a DFmode. Only currently used
20554 for SSE, but applicable anywhere. */
20555
20556 void
20557 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
20558 {
20559 REAL_VALUE_TYPE TWO31r;
20560 rtx x, fp;
20561
20562 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
20563 NULL, 1, OPTAB_DIRECT);
20564
20565 fp = gen_reg_rtx (DFmode);
20566 emit_insn (gen_floatsidf2 (fp, x));
20567
20568 real_ldexp (&TWO31r, &dconst1, 31);
20569 x = const_double_from_real_value (TWO31r, DFmode);
20570
20571 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
20572 if (x != target)
20573 emit_move_insn (target, x);
20574 }
20575
20576 /* Convert a signed DImode value into a DFmode. Only used for SSE in
20577 32-bit mode; otherwise we have a direct convert instruction. */
20578
20579 void
20580 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
20581 {
20582 REAL_VALUE_TYPE TWO32r;
20583 rtx fp_lo, fp_hi, x;
20584
20585 fp_lo = gen_reg_rtx (DFmode);
20586 fp_hi = gen_reg_rtx (DFmode);
20587
20588 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
20589
20590 real_ldexp (&TWO32r, &dconst1, 32);
20591 x = const_double_from_real_value (TWO32r, DFmode);
20592 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
20593
20594 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
20595
20596 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
20597 0, OPTAB_DIRECT);
20598 if (x != target)
20599 emit_move_insn (target, x);
20600 }
20601
20602 /* Convert an unsigned SImode value into a SFmode, using only SSE.
20603 For x86_32, -mfpmath=sse, !optimize_size only. */
20604 void
20605 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
20606 {
20607 REAL_VALUE_TYPE ONE16r;
20608 rtx fp_hi, fp_lo, int_hi, int_lo, x;
20609
20610 real_ldexp (&ONE16r, &dconst1, 16);
20611 x = const_double_from_real_value (ONE16r, SFmode);
20612 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
20613 NULL, 0, OPTAB_DIRECT);
20614 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
20615 NULL, 0, OPTAB_DIRECT);
20616 fp_hi = gen_reg_rtx (SFmode);
20617 fp_lo = gen_reg_rtx (SFmode);
20618 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
20619 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
20620 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
20621 0, OPTAB_DIRECT);
20622 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
20623 0, OPTAB_DIRECT);
20624 if (!rtx_equal_p (target, fp_hi))
20625 emit_move_insn (target, fp_hi);
20626 }
20627
20628 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
20629 a vector of unsigned ints VAL to vector of floats TARGET. */
20630
20631 void
20632 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
20633 {
20634 rtx tmp[8];
20635 REAL_VALUE_TYPE TWO16r;
20636 machine_mode intmode = GET_MODE (val);
20637 machine_mode fltmode = GET_MODE (target);
20638 rtx (*cvt) (rtx, rtx);
20639
20640 if (intmode == V4SImode)
20641 cvt = gen_floatv4siv4sf2;
20642 else
20643 cvt = gen_floatv8siv8sf2;
20644 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
20645 tmp[0] = force_reg (intmode, tmp[0]);
20646 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
20647 OPTAB_DIRECT);
20648 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
20649 NULL_RTX, 1, OPTAB_DIRECT);
20650 tmp[3] = gen_reg_rtx (fltmode);
20651 emit_insn (cvt (tmp[3], tmp[1]));
20652 tmp[4] = gen_reg_rtx (fltmode);
20653 emit_insn (cvt (tmp[4], tmp[2]));
20654 real_ldexp (&TWO16r, &dconst1, 16);
20655 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
20656 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
20657 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
20658 OPTAB_DIRECT);
20659 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
20660 OPTAB_DIRECT);
20661 if (tmp[7] != target)
20662 emit_move_insn (target, tmp[7]);
20663 }
20664
20665 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
20666 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
20667 This is done by doing just signed conversion if < 0x1p31, and otherwise by
20668 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
20669
20670 rtx
20671 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
20672 {
20673 REAL_VALUE_TYPE TWO31r;
20674 rtx two31r, tmp[4];
20675 machine_mode mode = GET_MODE (val);
20676 machine_mode scalarmode = GET_MODE_INNER (mode);
20677 machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
20678 rtx (*cmp) (rtx, rtx, rtx, rtx);
20679 int i;
20680
20681 for (i = 0; i < 3; i++)
20682 tmp[i] = gen_reg_rtx (mode);
20683 real_ldexp (&TWO31r, &dconst1, 31);
20684 two31r = const_double_from_real_value (TWO31r, scalarmode);
20685 two31r = ix86_build_const_vector (mode, 1, two31r);
20686 two31r = force_reg (mode, two31r);
20687 switch (mode)
20688 {
20689 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
20690 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
20691 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
20692 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
20693 default: gcc_unreachable ();
20694 }
20695 tmp[3] = gen_rtx_LE (mode, two31r, val);
20696 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
20697 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
20698 0, OPTAB_DIRECT);
20699 if (intmode == V4SImode || TARGET_AVX2)
20700 *xorp = expand_simple_binop (intmode, ASHIFT,
20701 gen_lowpart (intmode, tmp[0]),
20702 GEN_INT (31), NULL_RTX, 0,
20703 OPTAB_DIRECT);
20704 else
20705 {
20706 rtx two31 = GEN_INT (HOST_WIDE_INT_1U << 31);
20707 two31 = ix86_build_const_vector (intmode, 1, two31);
20708 *xorp = expand_simple_binop (intmode, AND,
20709 gen_lowpart (intmode, tmp[0]),
20710 two31, NULL_RTX, 0,
20711 OPTAB_DIRECT);
20712 }
20713 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
20714 0, OPTAB_DIRECT);
20715 }
20716
20717 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
20718 then replicate the value for all elements of the vector
20719 register. */
20720
20721 rtx
20722 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
20723 {
20724 int i, n_elt;
20725 rtvec v;
20726 machine_mode scalar_mode;
20727
20728 switch (mode)
20729 {
20730 case V64QImode:
20731 case V32QImode:
20732 case V16QImode:
20733 case V32HImode:
20734 case V16HImode:
20735 case V8HImode:
20736 case V16SImode:
20737 case V8SImode:
20738 case V4SImode:
20739 case V8DImode:
20740 case V4DImode:
20741 case V2DImode:
20742 gcc_assert (vect);
20743 case V16SFmode:
20744 case V8SFmode:
20745 case V4SFmode:
20746 case V8DFmode:
20747 case V4DFmode:
20748 case V2DFmode:
20749 n_elt = GET_MODE_NUNITS (mode);
20750 v = rtvec_alloc (n_elt);
20751 scalar_mode = GET_MODE_INNER (mode);
20752
20753 RTVEC_ELT (v, 0) = value;
20754
20755 for (i = 1; i < n_elt; ++i)
20756 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
20757
20758 return gen_rtx_CONST_VECTOR (mode, v);
20759
20760 default:
20761 gcc_unreachable ();
20762 }
20763 }
20764
20765 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
20766 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
20767 for an SSE register. If VECT is true, then replicate the mask for
20768 all elements of the vector register. If INVERT is true, then create
20769 a mask excluding the sign bit. */
20770
20771 rtx
20772 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
20773 {
20774 machine_mode vec_mode, imode;
20775 wide_int w;
20776 rtx mask, v;
20777
20778 switch (mode)
20779 {
20780 case V16SImode:
20781 case V16SFmode:
20782 case V8SImode:
20783 case V4SImode:
20784 case V8SFmode:
20785 case V4SFmode:
20786 vec_mode = mode;
20787 imode = SImode;
20788 break;
20789
20790 case V8DImode:
20791 case V4DImode:
20792 case V2DImode:
20793 case V8DFmode:
20794 case V4DFmode:
20795 case V2DFmode:
20796 vec_mode = mode;
20797 imode = DImode;
20798 break;
20799
20800 case TImode:
20801 case TFmode:
20802 vec_mode = VOIDmode;
20803 imode = TImode;
20804 break;
20805
20806 default:
20807 gcc_unreachable ();
20808 }
20809
20810 machine_mode inner_mode = GET_MODE_INNER (mode);
20811 w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1,
20812 GET_MODE_BITSIZE (inner_mode));
20813 if (invert)
20814 w = wi::bit_not (w);
20815
20816 /* Force this value into the low part of a fp vector constant. */
20817 mask = immed_wide_int_const (w, imode);
20818 mask = gen_lowpart (inner_mode, mask);
20819
20820 if (vec_mode == VOIDmode)
20821 return force_reg (inner_mode, mask);
20822
20823 v = ix86_build_const_vector (vec_mode, vect, mask);
20824 return force_reg (vec_mode, v);
20825 }
20826
20827 /* Generate code for floating point ABS or NEG. */
20828
20829 void
20830 ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode,
20831 rtx operands[])
20832 {
20833 rtx mask, set, dst, src;
20834 bool use_sse = false;
20835 bool vector_mode = VECTOR_MODE_P (mode);
20836 machine_mode vmode = mode;
20837
20838 if (vector_mode)
20839 use_sse = true;
20840 else if (mode == TFmode)
20841 use_sse = true;
20842 else if (TARGET_SSE_MATH)
20843 {
20844 use_sse = SSE_FLOAT_MODE_P (mode);
20845 if (mode == SFmode)
20846 vmode = V4SFmode;
20847 else if (mode == DFmode)
20848 vmode = V2DFmode;
20849 }
20850
20851 /* NEG and ABS performed with SSE use bitwise mask operations.
20852 Create the appropriate mask now. */
20853 if (use_sse)
20854 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
20855 else
20856 mask = NULL_RTX;
20857
20858 dst = operands[0];
20859 src = operands[1];
20860
20861 set = gen_rtx_fmt_e (code, mode, src);
20862 set = gen_rtx_SET (dst, set);
20863
20864 if (mask)
20865 {
20866 rtx use, clob;
20867 rtvec par;
20868
20869 use = gen_rtx_USE (VOIDmode, mask);
20870 if (vector_mode)
20871 par = gen_rtvec (2, set, use);
20872 else
20873 {
20874 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
20875 par = gen_rtvec (3, set, use, clob);
20876 }
20877 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
20878 }
20879 else
20880 emit_insn (set);
20881 }
20882
20883 /* Expand a copysign operation. Special case operand 0 being a constant. */
20884
20885 void
20886 ix86_expand_copysign (rtx operands[])
20887 {
20888 machine_mode mode, vmode;
20889 rtx dest, op0, op1, mask, nmask;
20890
20891 dest = operands[0];
20892 op0 = operands[1];
20893 op1 = operands[2];
20894
20895 mode = GET_MODE (dest);
20896
20897 if (mode == SFmode)
20898 vmode = V4SFmode;
20899 else if (mode == DFmode)
20900 vmode = V2DFmode;
20901 else
20902 vmode = mode;
20903
20904 if (CONST_DOUBLE_P (op0))
20905 {
20906 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
20907
20908 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
20909 op0 = simplify_unary_operation (ABS, mode, op0, mode);
20910
20911 if (mode == SFmode || mode == DFmode)
20912 {
20913 if (op0 == CONST0_RTX (mode))
20914 op0 = CONST0_RTX (vmode);
20915 else
20916 {
20917 rtx v = ix86_build_const_vector (vmode, false, op0);
20918
20919 op0 = force_reg (vmode, v);
20920 }
20921 }
20922 else if (op0 != CONST0_RTX (mode))
20923 op0 = force_reg (mode, op0);
20924
20925 mask = ix86_build_signbit_mask (vmode, 0, 0);
20926
20927 if (mode == SFmode)
20928 copysign_insn = gen_copysignsf3_const;
20929 else if (mode == DFmode)
20930 copysign_insn = gen_copysigndf3_const;
20931 else
20932 copysign_insn = gen_copysigntf3_const;
20933
20934 emit_insn (copysign_insn (dest, op0, op1, mask));
20935 }
20936 else
20937 {
20938 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
20939
20940 nmask = ix86_build_signbit_mask (vmode, 0, 1);
20941 mask = ix86_build_signbit_mask (vmode, 0, 0);
20942
20943 if (mode == SFmode)
20944 copysign_insn = gen_copysignsf3_var;
20945 else if (mode == DFmode)
20946 copysign_insn = gen_copysigndf3_var;
20947 else
20948 copysign_insn = gen_copysigntf3_var;
20949
20950 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
20951 }
20952 }
20953
20954 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
20955 be a constant, and so has already been expanded into a vector constant. */
20956
20957 void
20958 ix86_split_copysign_const (rtx operands[])
20959 {
20960 machine_mode mode, vmode;
20961 rtx dest, op0, mask, x;
20962
20963 dest = operands[0];
20964 op0 = operands[1];
20965 mask = operands[3];
20966
20967 mode = GET_MODE (dest);
20968 vmode = GET_MODE (mask);
20969
20970 dest = simplify_gen_subreg (vmode, dest, mode, 0);
20971 x = gen_rtx_AND (vmode, dest, mask);
20972 emit_insn (gen_rtx_SET (dest, x));
20973
20974 if (op0 != CONST0_RTX (vmode))
20975 {
20976 x = gen_rtx_IOR (vmode, dest, op0);
20977 emit_insn (gen_rtx_SET (dest, x));
20978 }
20979 }
20980
20981 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
20982 so we have to do two masks. */
20983
20984 void
20985 ix86_split_copysign_var (rtx operands[])
20986 {
20987 machine_mode mode, vmode;
20988 rtx dest, scratch, op0, op1, mask, nmask, x;
20989
20990 dest = operands[0];
20991 scratch = operands[1];
20992 op0 = operands[2];
20993 op1 = operands[3];
20994 nmask = operands[4];
20995 mask = operands[5];
20996
20997 mode = GET_MODE (dest);
20998 vmode = GET_MODE (mask);
20999
21000 if (rtx_equal_p (op0, op1))
21001 {
21002 /* Shouldn't happen often (it's useless, obviously), but when it does
21003 we'd generate incorrect code if we continue below. */
21004 emit_move_insn (dest, op0);
21005 return;
21006 }
21007
21008 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
21009 {
21010 gcc_assert (REGNO (op1) == REGNO (scratch));
21011
21012 x = gen_rtx_AND (vmode, scratch, mask);
21013 emit_insn (gen_rtx_SET (scratch, x));
21014
21015 dest = mask;
21016 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
21017 x = gen_rtx_NOT (vmode, dest);
21018 x = gen_rtx_AND (vmode, x, op0);
21019 emit_insn (gen_rtx_SET (dest, x));
21020 }
21021 else
21022 {
21023 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
21024 {
21025 x = gen_rtx_AND (vmode, scratch, mask);
21026 }
21027 else /* alternative 2,4 */
21028 {
21029 gcc_assert (REGNO (mask) == REGNO (scratch));
21030 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
21031 x = gen_rtx_AND (vmode, scratch, op1);
21032 }
21033 emit_insn (gen_rtx_SET (scratch, x));
21034
21035 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
21036 {
21037 dest = simplify_gen_subreg (vmode, op0, mode, 0);
21038 x = gen_rtx_AND (vmode, dest, nmask);
21039 }
21040 else /* alternative 3,4 */
21041 {
21042 gcc_assert (REGNO (nmask) == REGNO (dest));
21043 dest = nmask;
21044 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
21045 x = gen_rtx_AND (vmode, dest, op0);
21046 }
21047 emit_insn (gen_rtx_SET (dest, x));
21048 }
21049
21050 x = gen_rtx_IOR (vmode, dest, scratch);
21051 emit_insn (gen_rtx_SET (dest, x));
21052 }
21053
21054 /* Return TRUE or FALSE depending on whether the first SET in INSN
21055 has source and destination with matching CC modes, and that the
21056 CC mode is at least as constrained as REQ_MODE. */
21057
21058 bool
21059 ix86_match_ccmode (rtx insn, machine_mode req_mode)
21060 {
21061 rtx set;
21062 machine_mode set_mode;
21063
21064 set = PATTERN (insn);
21065 if (GET_CODE (set) == PARALLEL)
21066 set = XVECEXP (set, 0, 0);
21067 gcc_assert (GET_CODE (set) == SET);
21068 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
21069
21070 set_mode = GET_MODE (SET_DEST (set));
21071 switch (set_mode)
21072 {
21073 case CCNOmode:
21074 if (req_mode != CCNOmode
21075 && (req_mode != CCmode
21076 || XEXP (SET_SRC (set), 1) != const0_rtx))
21077 return false;
21078 break;
21079 case CCmode:
21080 if (req_mode == CCGCmode)
21081 return false;
21082 /* FALLTHRU */
21083 case CCGCmode:
21084 if (req_mode == CCGOCmode || req_mode == CCNOmode)
21085 return false;
21086 /* FALLTHRU */
21087 case CCGOCmode:
21088 if (req_mode == CCZmode)
21089 return false;
21090 /* FALLTHRU */
21091 case CCZmode:
21092 break;
21093
21094 case CCAmode:
21095 case CCCmode:
21096 case CCOmode:
21097 case CCPmode:
21098 case CCSmode:
21099 if (set_mode != req_mode)
21100 return false;
21101 break;
21102
21103 default:
21104 gcc_unreachable ();
21105 }
21106
21107 return GET_MODE (SET_SRC (set)) == set_mode;
21108 }
21109
21110 /* Generate insn patterns to do an integer compare of OPERANDS. */
21111
21112 static rtx
21113 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
21114 {
21115 machine_mode cmpmode;
21116 rtx tmp, flags;
21117
21118 cmpmode = SELECT_CC_MODE (code, op0, op1);
21119 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
21120
21121 /* This is very simple, but making the interface the same as in the
21122 FP case makes the rest of the code easier. */
21123 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
21124 emit_insn (gen_rtx_SET (flags, tmp));
21125
21126 /* Return the test that should be put into the flags user, i.e.
21127 the bcc, scc, or cmov instruction. */
21128 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
21129 }
21130
21131 /* Figure out whether to use ordered or unordered fp comparisons.
21132 Return the appropriate mode to use. */
21133
21134 machine_mode
21135 ix86_fp_compare_mode (enum rtx_code)
21136 {
21137 /* ??? In order to make all comparisons reversible, we do all comparisons
21138 non-trapping when compiling for IEEE. Once gcc is able to distinguish
21139 all forms trapping and nontrapping comparisons, we can make inequality
21140 comparisons trapping again, since it results in better code when using
21141 FCOM based compares. */
21142 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
21143 }
21144
21145 machine_mode
21146 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
21147 {
21148 machine_mode mode = GET_MODE (op0);
21149
21150 if (SCALAR_FLOAT_MODE_P (mode))
21151 {
21152 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
21153 return ix86_fp_compare_mode (code);
21154 }
21155
21156 switch (code)
21157 {
21158 /* Only zero flag is needed. */
21159 case EQ: /* ZF=0 */
21160 case NE: /* ZF!=0 */
21161 return CCZmode;
21162 /* Codes needing carry flag. */
21163 case GEU: /* CF=0 */
21164 case LTU: /* CF=1 */
21165 /* Detect overflow checks. They need just the carry flag. */
21166 if (GET_CODE (op0) == PLUS
21167 && (rtx_equal_p (op1, XEXP (op0, 0))
21168 || rtx_equal_p (op1, XEXP (op0, 1))))
21169 return CCCmode;
21170 else
21171 return CCmode;
21172 case GTU: /* CF=0 & ZF=0 */
21173 case LEU: /* CF=1 | ZF=1 */
21174 return CCmode;
21175 /* Codes possibly doable only with sign flag when
21176 comparing against zero. */
21177 case GE: /* SF=OF or SF=0 */
21178 case LT: /* SF<>OF or SF=1 */
21179 if (op1 == const0_rtx)
21180 return CCGOCmode;
21181 else
21182 /* For other cases Carry flag is not required. */
21183 return CCGCmode;
21184 /* Codes doable only with sign flag when comparing
21185 against zero, but we miss jump instruction for it
21186 so we need to use relational tests against overflow
21187 that thus needs to be zero. */
21188 case GT: /* ZF=0 & SF=OF */
21189 case LE: /* ZF=1 | SF<>OF */
21190 if (op1 == const0_rtx)
21191 return CCNOmode;
21192 else
21193 return CCGCmode;
21194 /* strcmp pattern do (use flags) and combine may ask us for proper
21195 mode. */
21196 case USE:
21197 return CCmode;
21198 default:
21199 gcc_unreachable ();
21200 }
21201 }
21202
21203 /* Return the fixed registers used for condition codes. */
21204
21205 static bool
21206 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
21207 {
21208 *p1 = FLAGS_REG;
21209 *p2 = FPSR_REG;
21210 return true;
21211 }
21212
21213 /* If two condition code modes are compatible, return a condition code
21214 mode which is compatible with both. Otherwise, return
21215 VOIDmode. */
21216
21217 static machine_mode
21218 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
21219 {
21220 if (m1 == m2)
21221 return m1;
21222
21223 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
21224 return VOIDmode;
21225
21226 if ((m1 == CCGCmode && m2 == CCGOCmode)
21227 || (m1 == CCGOCmode && m2 == CCGCmode))
21228 return CCGCmode;
21229
21230 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
21231 return m2;
21232 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
21233 return m1;
21234
21235 switch (m1)
21236 {
21237 default:
21238 gcc_unreachable ();
21239
21240 case CCmode:
21241 case CCGCmode:
21242 case CCGOCmode:
21243 case CCNOmode:
21244 case CCAmode:
21245 case CCCmode:
21246 case CCOmode:
21247 case CCPmode:
21248 case CCSmode:
21249 case CCZmode:
21250 switch (m2)
21251 {
21252 default:
21253 return VOIDmode;
21254
21255 case CCmode:
21256 case CCGCmode:
21257 case CCGOCmode:
21258 case CCNOmode:
21259 case CCAmode:
21260 case CCCmode:
21261 case CCOmode:
21262 case CCPmode:
21263 case CCSmode:
21264 case CCZmode:
21265 return CCmode;
21266 }
21267
21268 case CCFPmode:
21269 case CCFPUmode:
21270 /* These are only compatible with themselves, which we already
21271 checked above. */
21272 return VOIDmode;
21273 }
21274 }
21275
21276
21277 /* Return a comparison we can do and that it is equivalent to
21278 swap_condition (code) apart possibly from orderedness.
21279 But, never change orderedness if TARGET_IEEE_FP, returning
21280 UNKNOWN in that case if necessary. */
21281
21282 static enum rtx_code
21283 ix86_fp_swap_condition (enum rtx_code code)
21284 {
21285 switch (code)
21286 {
21287 case GT: /* GTU - CF=0 & ZF=0 */
21288 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
21289 case GE: /* GEU - CF=0 */
21290 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
21291 case UNLT: /* LTU - CF=1 */
21292 return TARGET_IEEE_FP ? UNKNOWN : GT;
21293 case UNLE: /* LEU - CF=1 | ZF=1 */
21294 return TARGET_IEEE_FP ? UNKNOWN : GE;
21295 default:
21296 return swap_condition (code);
21297 }
21298 }
21299
21300 /* Return cost of comparison CODE using the best strategy for performance.
21301 All following functions do use number of instructions as a cost metrics.
21302 In future this should be tweaked to compute bytes for optimize_size and
21303 take into account performance of various instructions on various CPUs. */
21304
21305 static int
21306 ix86_fp_comparison_cost (enum rtx_code code)
21307 {
21308 int arith_cost;
21309
21310 /* The cost of code using bit-twiddling on %ah. */
21311 switch (code)
21312 {
21313 case UNLE:
21314 case UNLT:
21315 case LTGT:
21316 case GT:
21317 case GE:
21318 case UNORDERED:
21319 case ORDERED:
21320 case UNEQ:
21321 arith_cost = 4;
21322 break;
21323 case LT:
21324 case NE:
21325 case EQ:
21326 case UNGE:
21327 arith_cost = TARGET_IEEE_FP ? 5 : 4;
21328 break;
21329 case LE:
21330 case UNGT:
21331 arith_cost = TARGET_IEEE_FP ? 6 : 4;
21332 break;
21333 default:
21334 gcc_unreachable ();
21335 }
21336
21337 switch (ix86_fp_comparison_strategy (code))
21338 {
21339 case IX86_FPCMP_COMI:
21340 return arith_cost > 4 ? 3 : 2;
21341 case IX86_FPCMP_SAHF:
21342 return arith_cost > 4 ? 4 : 3;
21343 default:
21344 return arith_cost;
21345 }
21346 }
21347
21348 /* Return strategy to use for floating-point. We assume that fcomi is always
21349 preferrable where available, since that is also true when looking at size
21350 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
21351
21352 enum ix86_fpcmp_strategy
21353 ix86_fp_comparison_strategy (enum rtx_code)
21354 {
21355 /* Do fcomi/sahf based test when profitable. */
21356
21357 if (TARGET_CMOVE)
21358 return IX86_FPCMP_COMI;
21359
21360 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
21361 return IX86_FPCMP_SAHF;
21362
21363 return IX86_FPCMP_ARITH;
21364 }
21365
21366 /* Swap, force into registers, or otherwise massage the two operands
21367 to a fp comparison. The operands are updated in place; the new
21368 comparison code is returned. */
21369
21370 static enum rtx_code
21371 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
21372 {
21373 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
21374 rtx op0 = *pop0, op1 = *pop1;
21375 machine_mode op_mode = GET_MODE (op0);
21376 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
21377
21378 /* All of the unordered compare instructions only work on registers.
21379 The same is true of the fcomi compare instructions. The XFmode
21380 compare instructions require registers except when comparing
21381 against zero or when converting operand 1 from fixed point to
21382 floating point. */
21383
21384 if (!is_sse
21385 && (fpcmp_mode == CCFPUmode
21386 || (op_mode == XFmode
21387 && ! (standard_80387_constant_p (op0) == 1
21388 || standard_80387_constant_p (op1) == 1)
21389 && GET_CODE (op1) != FLOAT)
21390 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
21391 {
21392 op0 = force_reg (op_mode, op0);
21393 op1 = force_reg (op_mode, op1);
21394 }
21395 else
21396 {
21397 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
21398 things around if they appear profitable, otherwise force op0
21399 into a register. */
21400
21401 if (standard_80387_constant_p (op0) == 0
21402 || (MEM_P (op0)
21403 && ! (standard_80387_constant_p (op1) == 0
21404 || MEM_P (op1))))
21405 {
21406 enum rtx_code new_code = ix86_fp_swap_condition (code);
21407 if (new_code != UNKNOWN)
21408 {
21409 std::swap (op0, op1);
21410 code = new_code;
21411 }
21412 }
21413
21414 if (!REG_P (op0))
21415 op0 = force_reg (op_mode, op0);
21416
21417 if (CONSTANT_P (op1))
21418 {
21419 int tmp = standard_80387_constant_p (op1);
21420 if (tmp == 0)
21421 op1 = validize_mem (force_const_mem (op_mode, op1));
21422 else if (tmp == 1)
21423 {
21424 if (TARGET_CMOVE)
21425 op1 = force_reg (op_mode, op1);
21426 }
21427 else
21428 op1 = force_reg (op_mode, op1);
21429 }
21430 }
21431
21432 /* Try to rearrange the comparison to make it cheaper. */
21433 if (ix86_fp_comparison_cost (code)
21434 > ix86_fp_comparison_cost (swap_condition (code))
21435 && (REG_P (op1) || can_create_pseudo_p ()))
21436 {
21437 std::swap (op0, op1);
21438 code = swap_condition (code);
21439 if (!REG_P (op0))
21440 op0 = force_reg (op_mode, op0);
21441 }
21442
21443 *pop0 = op0;
21444 *pop1 = op1;
21445 return code;
21446 }
21447
21448 /* Convert comparison codes we use to represent FP comparison to integer
21449 code that will result in proper branch. Return UNKNOWN if no such code
21450 is available. */
21451
21452 enum rtx_code
21453 ix86_fp_compare_code_to_integer (enum rtx_code code)
21454 {
21455 switch (code)
21456 {
21457 case GT:
21458 return GTU;
21459 case GE:
21460 return GEU;
21461 case ORDERED:
21462 case UNORDERED:
21463 return code;
21464 break;
21465 case UNEQ:
21466 return EQ;
21467 break;
21468 case UNLT:
21469 return LTU;
21470 break;
21471 case UNLE:
21472 return LEU;
21473 break;
21474 case LTGT:
21475 return NE;
21476 break;
21477 default:
21478 return UNKNOWN;
21479 }
21480 }
21481
21482 /* Generate insn patterns to do a floating point compare of OPERANDS. */
21483
21484 static rtx
21485 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
21486 {
21487 machine_mode fpcmp_mode, intcmp_mode;
21488 rtx tmp, tmp2;
21489
21490 fpcmp_mode = ix86_fp_compare_mode (code);
21491 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
21492
21493 /* Do fcomi/sahf based test when profitable. */
21494 switch (ix86_fp_comparison_strategy (code))
21495 {
21496 case IX86_FPCMP_COMI:
21497 intcmp_mode = fpcmp_mode;
21498 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
21499 tmp = gen_rtx_SET (gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
21500 emit_insn (tmp);
21501 break;
21502
21503 case IX86_FPCMP_SAHF:
21504 intcmp_mode = fpcmp_mode;
21505 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
21506 tmp = gen_rtx_SET (gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
21507
21508 if (!scratch)
21509 scratch = gen_reg_rtx (HImode);
21510 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
21511 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
21512 break;
21513
21514 case IX86_FPCMP_ARITH:
21515 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
21516 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
21517 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
21518 if (!scratch)
21519 scratch = gen_reg_rtx (HImode);
21520 emit_insn (gen_rtx_SET (scratch, tmp2));
21521
21522 /* In the unordered case, we have to check C2 for NaN's, which
21523 doesn't happen to work out to anything nice combination-wise.
21524 So do some bit twiddling on the value we've got in AH to come
21525 up with an appropriate set of condition codes. */
21526
21527 intcmp_mode = CCNOmode;
21528 switch (code)
21529 {
21530 case GT:
21531 case UNGT:
21532 if (code == GT || !TARGET_IEEE_FP)
21533 {
21534 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
21535 code = EQ;
21536 }
21537 else
21538 {
21539 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
21540 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
21541 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
21542 intcmp_mode = CCmode;
21543 code = GEU;
21544 }
21545 break;
21546 case LT:
21547 case UNLT:
21548 if (code == LT && TARGET_IEEE_FP)
21549 {
21550 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
21551 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
21552 intcmp_mode = CCmode;
21553 code = EQ;
21554 }
21555 else
21556 {
21557 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
21558 code = NE;
21559 }
21560 break;
21561 case GE:
21562 case UNGE:
21563 if (code == GE || !TARGET_IEEE_FP)
21564 {
21565 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
21566 code = EQ;
21567 }
21568 else
21569 {
21570 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
21571 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
21572 code = NE;
21573 }
21574 break;
21575 case LE:
21576 case UNLE:
21577 if (code == LE && TARGET_IEEE_FP)
21578 {
21579 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
21580 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
21581 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
21582 intcmp_mode = CCmode;
21583 code = LTU;
21584 }
21585 else
21586 {
21587 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
21588 code = NE;
21589 }
21590 break;
21591 case EQ:
21592 case UNEQ:
21593 if (code == EQ && TARGET_IEEE_FP)
21594 {
21595 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
21596 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
21597 intcmp_mode = CCmode;
21598 code = EQ;
21599 }
21600 else
21601 {
21602 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
21603 code = NE;
21604 }
21605 break;
21606 case NE:
21607 case LTGT:
21608 if (code == NE && TARGET_IEEE_FP)
21609 {
21610 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
21611 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
21612 GEN_INT (0x40)));
21613 code = NE;
21614 }
21615 else
21616 {
21617 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
21618 code = EQ;
21619 }
21620 break;
21621
21622 case UNORDERED:
21623 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
21624 code = NE;
21625 break;
21626 case ORDERED:
21627 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
21628 code = EQ;
21629 break;
21630
21631 default:
21632 gcc_unreachable ();
21633 }
21634 break;
21635
21636 default:
21637 gcc_unreachable();
21638 }
21639
21640 /* Return the test that should be put into the flags user, i.e.
21641 the bcc, scc, or cmov instruction. */
21642 return gen_rtx_fmt_ee (code, VOIDmode,
21643 gen_rtx_REG (intcmp_mode, FLAGS_REG),
21644 const0_rtx);
21645 }
21646
21647 static rtx
21648 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
21649 {
21650 rtx ret;
21651
21652 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
21653 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
21654
21655 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
21656 {
21657 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
21658 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
21659 }
21660 else
21661 ret = ix86_expand_int_compare (code, op0, op1);
21662
21663 return ret;
21664 }
21665
21666 void
21667 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
21668 {
21669 machine_mode mode = GET_MODE (op0);
21670 rtx tmp;
21671
21672 switch (mode)
21673 {
21674 case SFmode:
21675 case DFmode:
21676 case XFmode:
21677 case QImode:
21678 case HImode:
21679 case SImode:
21680 simple:
21681 tmp = ix86_expand_compare (code, op0, op1);
21682 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21683 gen_rtx_LABEL_REF (VOIDmode, label),
21684 pc_rtx);
21685 emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
21686 return;
21687
21688 case DImode:
21689 if (TARGET_64BIT)
21690 goto simple;
21691 case TImode:
21692 /* Expand DImode branch into multiple compare+branch. */
21693 {
21694 rtx lo[2], hi[2];
21695 rtx_code_label *label2;
21696 enum rtx_code code1, code2, code3;
21697 machine_mode submode;
21698
21699 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
21700 {
21701 std::swap (op0, op1);
21702 code = swap_condition (code);
21703 }
21704
21705 split_double_mode (mode, &op0, 1, lo+0, hi+0);
21706 split_double_mode (mode, &op1, 1, lo+1, hi+1);
21707
21708 submode = mode == DImode ? SImode : DImode;
21709
21710 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
21711 avoid two branches. This costs one extra insn, so disable when
21712 optimizing for size. */
21713
21714 if ((code == EQ || code == NE)
21715 && (!optimize_insn_for_size_p ()
21716 || hi[1] == const0_rtx || lo[1] == const0_rtx))
21717 {
21718 rtx xor0, xor1;
21719
21720 xor1 = hi[0];
21721 if (hi[1] != const0_rtx)
21722 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
21723 NULL_RTX, 0, OPTAB_WIDEN);
21724
21725 xor0 = lo[0];
21726 if (lo[1] != const0_rtx)
21727 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
21728 NULL_RTX, 0, OPTAB_WIDEN);
21729
21730 tmp = expand_binop (submode, ior_optab, xor1, xor0,
21731 NULL_RTX, 0, OPTAB_WIDEN);
21732
21733 ix86_expand_branch (code, tmp, const0_rtx, label);
21734 return;
21735 }
21736
21737 /* Otherwise, if we are doing less-than or greater-or-equal-than,
21738 op1 is a constant and the low word is zero, then we can just
21739 examine the high word. Similarly for low word -1 and
21740 less-or-equal-than or greater-than. */
21741
21742 if (CONST_INT_P (hi[1]))
21743 switch (code)
21744 {
21745 case LT: case LTU: case GE: case GEU:
21746 if (lo[1] == const0_rtx)
21747 {
21748 ix86_expand_branch (code, hi[0], hi[1], label);
21749 return;
21750 }
21751 break;
21752 case LE: case LEU: case GT: case GTU:
21753 if (lo[1] == constm1_rtx)
21754 {
21755 ix86_expand_branch (code, hi[0], hi[1], label);
21756 return;
21757 }
21758 break;
21759 default:
21760 break;
21761 }
21762
21763 /* Otherwise, we need two or three jumps. */
21764
21765 label2 = gen_label_rtx ();
21766
21767 code1 = code;
21768 code2 = swap_condition (code);
21769 code3 = unsigned_condition (code);
21770
21771 switch (code)
21772 {
21773 case LT: case GT: case LTU: case GTU:
21774 break;
21775
21776 case LE: code1 = LT; code2 = GT; break;
21777 case GE: code1 = GT; code2 = LT; break;
21778 case LEU: code1 = LTU; code2 = GTU; break;
21779 case GEU: code1 = GTU; code2 = LTU; break;
21780
21781 case EQ: code1 = UNKNOWN; code2 = NE; break;
21782 case NE: code2 = UNKNOWN; break;
21783
21784 default:
21785 gcc_unreachable ();
21786 }
21787
21788 /*
21789 * a < b =>
21790 * if (hi(a) < hi(b)) goto true;
21791 * if (hi(a) > hi(b)) goto false;
21792 * if (lo(a) < lo(b)) goto true;
21793 * false:
21794 */
21795
21796 if (code1 != UNKNOWN)
21797 ix86_expand_branch (code1, hi[0], hi[1], label);
21798 if (code2 != UNKNOWN)
21799 ix86_expand_branch (code2, hi[0], hi[1], label2);
21800
21801 ix86_expand_branch (code3, lo[0], lo[1], label);
21802
21803 if (code2 != UNKNOWN)
21804 emit_label (label2);
21805 return;
21806 }
21807
21808 default:
21809 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
21810 goto simple;
21811 }
21812 }
21813
21814 /* Split branch based on floating point condition. */
21815 void
21816 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
21817 rtx target1, rtx target2, rtx tmp)
21818 {
21819 rtx condition;
21820 rtx i;
21821
21822 if (target2 != pc_rtx)
21823 {
21824 std::swap (target1, target2);
21825 code = reverse_condition_maybe_unordered (code);
21826 }
21827
21828 condition = ix86_expand_fp_compare (code, op1, op2,
21829 tmp);
21830
21831 i = emit_jump_insn (gen_rtx_SET
21832 (pc_rtx,
21833 gen_rtx_IF_THEN_ELSE (VOIDmode,
21834 condition, target1, target2)));
21835 if (split_branch_probability >= 0)
21836 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
21837 }
21838
21839 void
21840 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
21841 {
21842 rtx ret;
21843
21844 gcc_assert (GET_MODE (dest) == QImode);
21845
21846 ret = ix86_expand_compare (code, op0, op1);
21847 PUT_MODE (ret, QImode);
21848 emit_insn (gen_rtx_SET (dest, ret));
21849 }
21850
21851 /* Expand comparison setting or clearing carry flag. Return true when
21852 successful and set pop for the operation. */
21853 static bool
21854 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
21855 {
21856 machine_mode mode =
21857 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
21858
21859 /* Do not handle double-mode compares that go through special path. */
21860 if (mode == (TARGET_64BIT ? TImode : DImode))
21861 return false;
21862
21863 if (SCALAR_FLOAT_MODE_P (mode))
21864 {
21865 rtx compare_op;
21866 rtx_insn *compare_seq;
21867
21868 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
21869
21870 /* Shortcut: following common codes never translate
21871 into carry flag compares. */
21872 if (code == EQ || code == NE || code == UNEQ || code == LTGT
21873 || code == ORDERED || code == UNORDERED)
21874 return false;
21875
21876 /* These comparisons require zero flag; swap operands so they won't. */
21877 if ((code == GT || code == UNLE || code == LE || code == UNGT)
21878 && !TARGET_IEEE_FP)
21879 {
21880 std::swap (op0, op1);
21881 code = swap_condition (code);
21882 }
21883
21884 /* Try to expand the comparison and verify that we end up with
21885 carry flag based comparison. This fails to be true only when
21886 we decide to expand comparison using arithmetic that is not
21887 too common scenario. */
21888 start_sequence ();
21889 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
21890 compare_seq = get_insns ();
21891 end_sequence ();
21892
21893 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
21894 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
21895 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
21896 else
21897 code = GET_CODE (compare_op);
21898
21899 if (code != LTU && code != GEU)
21900 return false;
21901
21902 emit_insn (compare_seq);
21903 *pop = compare_op;
21904 return true;
21905 }
21906
21907 if (!INTEGRAL_MODE_P (mode))
21908 return false;
21909
21910 switch (code)
21911 {
21912 case LTU:
21913 case GEU:
21914 break;
21915
21916 /* Convert a==0 into (unsigned)a<1. */
21917 case EQ:
21918 case NE:
21919 if (op1 != const0_rtx)
21920 return false;
21921 op1 = const1_rtx;
21922 code = (code == EQ ? LTU : GEU);
21923 break;
21924
21925 /* Convert a>b into b<a or a>=b-1. */
21926 case GTU:
21927 case LEU:
21928 if (CONST_INT_P (op1))
21929 {
21930 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
21931 /* Bail out on overflow. We still can swap operands but that
21932 would force loading of the constant into register. */
21933 if (op1 == const0_rtx
21934 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
21935 return false;
21936 code = (code == GTU ? GEU : LTU);
21937 }
21938 else
21939 {
21940 std::swap (op0, op1);
21941 code = (code == GTU ? LTU : GEU);
21942 }
21943 break;
21944
21945 /* Convert a>=0 into (unsigned)a<0x80000000. */
21946 case LT:
21947 case GE:
21948 if (mode == DImode || op1 != const0_rtx)
21949 return false;
21950 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
21951 code = (code == LT ? GEU : LTU);
21952 break;
21953 case LE:
21954 case GT:
21955 if (mode == DImode || op1 != constm1_rtx)
21956 return false;
21957 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
21958 code = (code == LE ? GEU : LTU);
21959 break;
21960
21961 default:
21962 return false;
21963 }
21964 /* Swapping operands may cause constant to appear as first operand. */
21965 if (!nonimmediate_operand (op0, VOIDmode))
21966 {
21967 if (!can_create_pseudo_p ())
21968 return false;
21969 op0 = force_reg (mode, op0);
21970 }
21971 *pop = ix86_expand_compare (code, op0, op1);
21972 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
21973 return true;
21974 }
21975
21976 bool
21977 ix86_expand_int_movcc (rtx operands[])
21978 {
21979 enum rtx_code code = GET_CODE (operands[1]), compare_code;
21980 rtx_insn *compare_seq;
21981 rtx compare_op;
21982 machine_mode mode = GET_MODE (operands[0]);
21983 bool sign_bit_compare_p = false;
21984 rtx op0 = XEXP (operands[1], 0);
21985 rtx op1 = XEXP (operands[1], 1);
21986
21987 if (GET_MODE (op0) == TImode
21988 || (GET_MODE (op0) == DImode
21989 && !TARGET_64BIT))
21990 return false;
21991
21992 start_sequence ();
21993 compare_op = ix86_expand_compare (code, op0, op1);
21994 compare_seq = get_insns ();
21995 end_sequence ();
21996
21997 compare_code = GET_CODE (compare_op);
21998
21999 if ((op1 == const0_rtx && (code == GE || code == LT))
22000 || (op1 == constm1_rtx && (code == GT || code == LE)))
22001 sign_bit_compare_p = true;
22002
22003 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
22004 HImode insns, we'd be swallowed in word prefix ops. */
22005
22006 if ((mode != HImode || TARGET_FAST_PREFIX)
22007 && (mode != (TARGET_64BIT ? TImode : DImode))
22008 && CONST_INT_P (operands[2])
22009 && CONST_INT_P (operands[3]))
22010 {
22011 rtx out = operands[0];
22012 HOST_WIDE_INT ct = INTVAL (operands[2]);
22013 HOST_WIDE_INT cf = INTVAL (operands[3]);
22014 HOST_WIDE_INT diff;
22015
22016 diff = ct - cf;
22017 /* Sign bit compares are better done using shifts than we do by using
22018 sbb. */
22019 if (sign_bit_compare_p
22020 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22021 {
22022 /* Detect overlap between destination and compare sources. */
22023 rtx tmp = out;
22024
22025 if (!sign_bit_compare_p)
22026 {
22027 rtx flags;
22028 bool fpcmp = false;
22029
22030 compare_code = GET_CODE (compare_op);
22031
22032 flags = XEXP (compare_op, 0);
22033
22034 if (GET_MODE (flags) == CCFPmode
22035 || GET_MODE (flags) == CCFPUmode)
22036 {
22037 fpcmp = true;
22038 compare_code
22039 = ix86_fp_compare_code_to_integer (compare_code);
22040 }
22041
22042 /* To simplify rest of code, restrict to the GEU case. */
22043 if (compare_code == LTU)
22044 {
22045 std::swap (ct, cf);
22046 compare_code = reverse_condition (compare_code);
22047 code = reverse_condition (code);
22048 }
22049 else
22050 {
22051 if (fpcmp)
22052 PUT_CODE (compare_op,
22053 reverse_condition_maybe_unordered
22054 (GET_CODE (compare_op)));
22055 else
22056 PUT_CODE (compare_op,
22057 reverse_condition (GET_CODE (compare_op)));
22058 }
22059 diff = ct - cf;
22060
22061 if (reg_overlap_mentioned_p (out, op0)
22062 || reg_overlap_mentioned_p (out, op1))
22063 tmp = gen_reg_rtx (mode);
22064
22065 if (mode == DImode)
22066 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
22067 else
22068 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
22069 flags, compare_op));
22070 }
22071 else
22072 {
22073 if (code == GT || code == GE)
22074 code = reverse_condition (code);
22075 else
22076 {
22077 std::swap (ct, cf);
22078 diff = ct - cf;
22079 }
22080 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
22081 }
22082
22083 if (diff == 1)
22084 {
22085 /*
22086 * cmpl op0,op1
22087 * sbbl dest,dest
22088 * [addl dest, ct]
22089 *
22090 * Size 5 - 8.
22091 */
22092 if (ct)
22093 tmp = expand_simple_binop (mode, PLUS,
22094 tmp, GEN_INT (ct),
22095 copy_rtx (tmp), 1, OPTAB_DIRECT);
22096 }
22097 else if (cf == -1)
22098 {
22099 /*
22100 * cmpl op0,op1
22101 * sbbl dest,dest
22102 * orl $ct, dest
22103 *
22104 * Size 8.
22105 */
22106 tmp = expand_simple_binop (mode, IOR,
22107 tmp, GEN_INT (ct),
22108 copy_rtx (tmp), 1, OPTAB_DIRECT);
22109 }
22110 else if (diff == -1 && ct)
22111 {
22112 /*
22113 * cmpl op0,op1
22114 * sbbl dest,dest
22115 * notl dest
22116 * [addl dest, cf]
22117 *
22118 * Size 8 - 11.
22119 */
22120 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
22121 if (cf)
22122 tmp = expand_simple_binop (mode, PLUS,
22123 copy_rtx (tmp), GEN_INT (cf),
22124 copy_rtx (tmp), 1, OPTAB_DIRECT);
22125 }
22126 else
22127 {
22128 /*
22129 * cmpl op0,op1
22130 * sbbl dest,dest
22131 * [notl dest]
22132 * andl cf - ct, dest
22133 * [addl dest, ct]
22134 *
22135 * Size 8 - 11.
22136 */
22137
22138 if (cf == 0)
22139 {
22140 cf = ct;
22141 ct = 0;
22142 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
22143 }
22144
22145 tmp = expand_simple_binop (mode, AND,
22146 copy_rtx (tmp),
22147 gen_int_mode (cf - ct, mode),
22148 copy_rtx (tmp), 1, OPTAB_DIRECT);
22149 if (ct)
22150 tmp = expand_simple_binop (mode, PLUS,
22151 copy_rtx (tmp), GEN_INT (ct),
22152 copy_rtx (tmp), 1, OPTAB_DIRECT);
22153 }
22154
22155 if (!rtx_equal_p (tmp, out))
22156 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
22157
22158 return true;
22159 }
22160
22161 if (diff < 0)
22162 {
22163 machine_mode cmp_mode = GET_MODE (op0);
22164 enum rtx_code new_code;
22165
22166 if (SCALAR_FLOAT_MODE_P (cmp_mode))
22167 {
22168 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
22169
22170 /* We may be reversing unordered compare to normal compare, that
22171 is not valid in general (we may convert non-trapping condition
22172 to trapping one), however on i386 we currently emit all
22173 comparisons unordered. */
22174 new_code = reverse_condition_maybe_unordered (code);
22175 }
22176 else
22177 new_code = ix86_reverse_condition (code, cmp_mode);
22178 if (new_code != UNKNOWN)
22179 {
22180 std::swap (ct, cf);
22181 diff = -diff;
22182 code = new_code;
22183 }
22184 }
22185
22186 compare_code = UNKNOWN;
22187 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
22188 && CONST_INT_P (op1))
22189 {
22190 if (op1 == const0_rtx
22191 && (code == LT || code == GE))
22192 compare_code = code;
22193 else if (op1 == constm1_rtx)
22194 {
22195 if (code == LE)
22196 compare_code = LT;
22197 else if (code == GT)
22198 compare_code = GE;
22199 }
22200 }
22201
22202 /* Optimize dest = (op0 < 0) ? -1 : cf. */
22203 if (compare_code != UNKNOWN
22204 && GET_MODE (op0) == GET_MODE (out)
22205 && (cf == -1 || ct == -1))
22206 {
22207 /* If lea code below could be used, only optimize
22208 if it results in a 2 insn sequence. */
22209
22210 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
22211 || diff == 3 || diff == 5 || diff == 9)
22212 || (compare_code == LT && ct == -1)
22213 || (compare_code == GE && cf == -1))
22214 {
22215 /*
22216 * notl op1 (if necessary)
22217 * sarl $31, op1
22218 * orl cf, op1
22219 */
22220 if (ct != -1)
22221 {
22222 cf = ct;
22223 ct = -1;
22224 code = reverse_condition (code);
22225 }
22226
22227 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
22228
22229 out = expand_simple_binop (mode, IOR,
22230 out, GEN_INT (cf),
22231 out, 1, OPTAB_DIRECT);
22232 if (out != operands[0])
22233 emit_move_insn (operands[0], out);
22234
22235 return true;
22236 }
22237 }
22238
22239
22240 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
22241 || diff == 3 || diff == 5 || diff == 9)
22242 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
22243 && (mode != DImode
22244 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
22245 {
22246 /*
22247 * xorl dest,dest
22248 * cmpl op1,op2
22249 * setcc dest
22250 * lea cf(dest*(ct-cf)),dest
22251 *
22252 * Size 14.
22253 *
22254 * This also catches the degenerate setcc-only case.
22255 */
22256
22257 rtx tmp;
22258 int nops;
22259
22260 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
22261
22262 nops = 0;
22263 /* On x86_64 the lea instruction operates on Pmode, so we need
22264 to get arithmetics done in proper mode to match. */
22265 if (diff == 1)
22266 tmp = copy_rtx (out);
22267 else
22268 {
22269 rtx out1;
22270 out1 = copy_rtx (out);
22271 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
22272 nops++;
22273 if (diff & 1)
22274 {
22275 tmp = gen_rtx_PLUS (mode, tmp, out1);
22276 nops++;
22277 }
22278 }
22279 if (cf != 0)
22280 {
22281 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
22282 nops++;
22283 }
22284 if (!rtx_equal_p (tmp, out))
22285 {
22286 if (nops == 1)
22287 out = force_operand (tmp, copy_rtx (out));
22288 else
22289 emit_insn (gen_rtx_SET (copy_rtx (out), copy_rtx (tmp)));
22290 }
22291 if (!rtx_equal_p (out, operands[0]))
22292 emit_move_insn (operands[0], copy_rtx (out));
22293
22294 return true;
22295 }
22296
22297 /*
22298 * General case: Jumpful:
22299 * xorl dest,dest cmpl op1, op2
22300 * cmpl op1, op2 movl ct, dest
22301 * setcc dest jcc 1f
22302 * decl dest movl cf, dest
22303 * andl (cf-ct),dest 1:
22304 * addl ct,dest
22305 *
22306 * Size 20. Size 14.
22307 *
22308 * This is reasonably steep, but branch mispredict costs are
22309 * high on modern cpus, so consider failing only if optimizing
22310 * for space.
22311 */
22312
22313 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
22314 && BRANCH_COST (optimize_insn_for_speed_p (),
22315 false) >= 2)
22316 {
22317 if (cf == 0)
22318 {
22319 machine_mode cmp_mode = GET_MODE (op0);
22320 enum rtx_code new_code;
22321
22322 if (SCALAR_FLOAT_MODE_P (cmp_mode))
22323 {
22324 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
22325
22326 /* We may be reversing unordered compare to normal compare,
22327 that is not valid in general (we may convert non-trapping
22328 condition to trapping one), however on i386 we currently
22329 emit all comparisons unordered. */
22330 new_code = reverse_condition_maybe_unordered (code);
22331 }
22332 else
22333 {
22334 new_code = ix86_reverse_condition (code, cmp_mode);
22335 if (compare_code != UNKNOWN && new_code != UNKNOWN)
22336 compare_code = reverse_condition (compare_code);
22337 }
22338
22339 if (new_code != UNKNOWN)
22340 {
22341 cf = ct;
22342 ct = 0;
22343 code = new_code;
22344 }
22345 }
22346
22347 if (compare_code != UNKNOWN)
22348 {
22349 /* notl op1 (if needed)
22350 sarl $31, op1
22351 andl (cf-ct), op1
22352 addl ct, op1
22353
22354 For x < 0 (resp. x <= -1) there will be no notl,
22355 so if possible swap the constants to get rid of the
22356 complement.
22357 True/false will be -1/0 while code below (store flag
22358 followed by decrement) is 0/-1, so the constants need
22359 to be exchanged once more. */
22360
22361 if (compare_code == GE || !cf)
22362 {
22363 code = reverse_condition (code);
22364 compare_code = LT;
22365 }
22366 else
22367 std::swap (ct, cf);
22368
22369 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
22370 }
22371 else
22372 {
22373 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
22374
22375 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
22376 constm1_rtx,
22377 copy_rtx (out), 1, OPTAB_DIRECT);
22378 }
22379
22380 out = expand_simple_binop (mode, AND, copy_rtx (out),
22381 gen_int_mode (cf - ct, mode),
22382 copy_rtx (out), 1, OPTAB_DIRECT);
22383 if (ct)
22384 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
22385 copy_rtx (out), 1, OPTAB_DIRECT);
22386 if (!rtx_equal_p (out, operands[0]))
22387 emit_move_insn (operands[0], copy_rtx (out));
22388
22389 return true;
22390 }
22391 }
22392
22393 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
22394 {
22395 /* Try a few things more with specific constants and a variable. */
22396
22397 optab op;
22398 rtx var, orig_out, out, tmp;
22399
22400 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
22401 return false;
22402
22403 /* If one of the two operands is an interesting constant, load a
22404 constant with the above and mask it in with a logical operation. */
22405
22406 if (CONST_INT_P (operands[2]))
22407 {
22408 var = operands[3];
22409 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
22410 operands[3] = constm1_rtx, op = and_optab;
22411 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
22412 operands[3] = const0_rtx, op = ior_optab;
22413 else
22414 return false;
22415 }
22416 else if (CONST_INT_P (operands[3]))
22417 {
22418 var = operands[2];
22419 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
22420 operands[2] = constm1_rtx, op = and_optab;
22421 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
22422 operands[2] = const0_rtx, op = ior_optab;
22423 else
22424 return false;
22425 }
22426 else
22427 return false;
22428
22429 orig_out = operands[0];
22430 tmp = gen_reg_rtx (mode);
22431 operands[0] = tmp;
22432
22433 /* Recurse to get the constant loaded. */
22434 if (!ix86_expand_int_movcc (operands))
22435 return false;
22436
22437 /* Mask in the interesting variable. */
22438 out = expand_binop (mode, op, var, tmp, orig_out, 0,
22439 OPTAB_WIDEN);
22440 if (!rtx_equal_p (out, orig_out))
22441 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
22442
22443 return true;
22444 }
22445
22446 /*
22447 * For comparison with above,
22448 *
22449 * movl cf,dest
22450 * movl ct,tmp
22451 * cmpl op1,op2
22452 * cmovcc tmp,dest
22453 *
22454 * Size 15.
22455 */
22456
22457 if (! nonimmediate_operand (operands[2], mode))
22458 operands[2] = force_reg (mode, operands[2]);
22459 if (! nonimmediate_operand (operands[3], mode))
22460 operands[3] = force_reg (mode, operands[3]);
22461
22462 if (! register_operand (operands[2], VOIDmode)
22463 && (mode == QImode
22464 || ! register_operand (operands[3], VOIDmode)))
22465 operands[2] = force_reg (mode, operands[2]);
22466
22467 if (mode == QImode
22468 && ! register_operand (operands[3], VOIDmode))
22469 operands[3] = force_reg (mode, operands[3]);
22470
22471 emit_insn (compare_seq);
22472 emit_insn (gen_rtx_SET (operands[0],
22473 gen_rtx_IF_THEN_ELSE (mode,
22474 compare_op, operands[2],
22475 operands[3])));
22476 return true;
22477 }
22478
22479 /* Swap, force into registers, or otherwise massage the two operands
22480 to an sse comparison with a mask result. Thus we differ a bit from
22481 ix86_prepare_fp_compare_args which expects to produce a flags result.
22482
22483 The DEST operand exists to help determine whether to commute commutative
22484 operators. The POP0/POP1 operands are updated in place. The new
22485 comparison code is returned, or UNKNOWN if not implementable. */
22486
22487 static enum rtx_code
22488 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
22489 rtx *pop0, rtx *pop1)
22490 {
22491 switch (code)
22492 {
22493 case LTGT:
22494 case UNEQ:
22495 /* AVX supports all the needed comparisons. */
22496 if (TARGET_AVX)
22497 break;
22498 /* We have no LTGT as an operator. We could implement it with
22499 NE & ORDERED, but this requires an extra temporary. It's
22500 not clear that it's worth it. */
22501 return UNKNOWN;
22502
22503 case LT:
22504 case LE:
22505 case UNGT:
22506 case UNGE:
22507 /* These are supported directly. */
22508 break;
22509
22510 case EQ:
22511 case NE:
22512 case UNORDERED:
22513 case ORDERED:
22514 /* AVX has 3 operand comparisons, no need to swap anything. */
22515 if (TARGET_AVX)
22516 break;
22517 /* For commutative operators, try to canonicalize the destination
22518 operand to be first in the comparison - this helps reload to
22519 avoid extra moves. */
22520 if (!dest || !rtx_equal_p (dest, *pop1))
22521 break;
22522 /* FALLTHRU */
22523
22524 case GE:
22525 case GT:
22526 case UNLE:
22527 case UNLT:
22528 /* These are not supported directly before AVX, and furthermore
22529 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
22530 comparison operands to transform into something that is
22531 supported. */
22532 std::swap (*pop0, *pop1);
22533 code = swap_condition (code);
22534 break;
22535
22536 default:
22537 gcc_unreachable ();
22538 }
22539
22540 return code;
22541 }
22542
22543 /* Detect conditional moves that exactly match min/max operational
22544 semantics. Note that this is IEEE safe, as long as we don't
22545 interchange the operands.
22546
22547 Returns FALSE if this conditional move doesn't match a MIN/MAX,
22548 and TRUE if the operation is successful and instructions are emitted. */
22549
22550 static bool
22551 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
22552 rtx cmp_op1, rtx if_true, rtx if_false)
22553 {
22554 machine_mode mode;
22555 bool is_min;
22556 rtx tmp;
22557
22558 if (code == LT)
22559 ;
22560 else if (code == UNGE)
22561 std::swap (if_true, if_false);
22562 else
22563 return false;
22564
22565 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
22566 is_min = true;
22567 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
22568 is_min = false;
22569 else
22570 return false;
22571
22572 mode = GET_MODE (dest);
22573
22574 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
22575 but MODE may be a vector mode and thus not appropriate. */
22576 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
22577 {
22578 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
22579 rtvec v;
22580
22581 if_true = force_reg (mode, if_true);
22582 v = gen_rtvec (2, if_true, if_false);
22583 tmp = gen_rtx_UNSPEC (mode, v, u);
22584 }
22585 else
22586 {
22587 code = is_min ? SMIN : SMAX;
22588 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
22589 }
22590
22591 emit_insn (gen_rtx_SET (dest, tmp));
22592 return true;
22593 }
22594
22595 /* Expand an sse vector comparison. Return the register with the result. */
22596
22597 static rtx
22598 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
22599 rtx op_true, rtx op_false)
22600 {
22601 machine_mode mode = GET_MODE (dest);
22602 machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
22603
22604 /* In general case result of comparison can differ from operands' type. */
22605 machine_mode cmp_mode;
22606
22607 /* In AVX512F the result of comparison is an integer mask. */
22608 bool maskcmp = false;
22609 rtx x;
22610
22611 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
22612 {
22613 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
22614 gcc_assert (cmp_mode != BLKmode);
22615
22616 maskcmp = true;
22617 }
22618 else
22619 cmp_mode = cmp_ops_mode;
22620
22621
22622 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
22623 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
22624 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
22625
22626 if (optimize
22627 || (op_true && reg_overlap_mentioned_p (dest, op_true))
22628 || (op_false && reg_overlap_mentioned_p (dest, op_false)))
22629 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
22630
22631 /* Compare patterns for int modes are unspec in AVX512F only. */
22632 if (maskcmp && (code == GT || code == EQ))
22633 {
22634 rtx (*gen)(rtx, rtx, rtx);
22635
22636 switch (cmp_ops_mode)
22637 {
22638 case V64QImode:
22639 gcc_assert (TARGET_AVX512BW);
22640 gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1;
22641 break;
22642 case V32HImode:
22643 gcc_assert (TARGET_AVX512BW);
22644 gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1;
22645 break;
22646 case V16SImode:
22647 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
22648 break;
22649 case V8DImode:
22650 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
22651 break;
22652 default:
22653 gen = NULL;
22654 }
22655
22656 if (gen)
22657 {
22658 emit_insn (gen (dest, cmp_op0, cmp_op1));
22659 return dest;
22660 }
22661 }
22662 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
22663
22664 if (cmp_mode != mode && !maskcmp)
22665 {
22666 x = force_reg (cmp_ops_mode, x);
22667 convert_move (dest, x, false);
22668 }
22669 else
22670 emit_insn (gen_rtx_SET (dest, x));
22671
22672 return dest;
22673 }
22674
22675 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
22676 operations. This is used for both scalar and vector conditional moves. */
22677
22678 void
22679 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
22680 {
22681 machine_mode mode = GET_MODE (dest);
22682 machine_mode cmpmode = GET_MODE (cmp);
22683
22684 /* In AVX512F the result of comparison is an integer mask. */
22685 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
22686
22687 rtx t2, t3, x;
22688
22689 /* If we have an integer mask and FP value then we need
22690 to cast mask to FP mode. */
22691 if (mode != cmpmode && VECTOR_MODE_P (cmpmode))
22692 {
22693 cmp = force_reg (cmpmode, cmp);
22694 cmp = gen_rtx_SUBREG (mode, cmp, 0);
22695 }
22696
22697 if (vector_all_ones_operand (op_true, mode)
22698 && rtx_equal_p (op_false, CONST0_RTX (mode))
22699 && !maskcmp)
22700 {
22701 emit_insn (gen_rtx_SET (dest, cmp));
22702 }
22703 else if (op_false == CONST0_RTX (mode)
22704 && !maskcmp)
22705 {
22706 op_true = force_reg (mode, op_true);
22707 x = gen_rtx_AND (mode, cmp, op_true);
22708 emit_insn (gen_rtx_SET (dest, x));
22709 }
22710 else if (op_true == CONST0_RTX (mode)
22711 && !maskcmp)
22712 {
22713 op_false = force_reg (mode, op_false);
22714 x = gen_rtx_NOT (mode, cmp);
22715 x = gen_rtx_AND (mode, x, op_false);
22716 emit_insn (gen_rtx_SET (dest, x));
22717 }
22718 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
22719 && !maskcmp)
22720 {
22721 op_false = force_reg (mode, op_false);
22722 x = gen_rtx_IOR (mode, cmp, op_false);
22723 emit_insn (gen_rtx_SET (dest, x));
22724 }
22725 else if (TARGET_XOP
22726 && !maskcmp)
22727 {
22728 op_true = force_reg (mode, op_true);
22729
22730 if (!nonimmediate_operand (op_false, mode))
22731 op_false = force_reg (mode, op_false);
22732
22733 emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cmp,
22734 op_true,
22735 op_false)));
22736 }
22737 else
22738 {
22739 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
22740 rtx d = dest;
22741
22742 if (!nonimmediate_operand (op_true, mode))
22743 op_true = force_reg (mode, op_true);
22744
22745 op_false = force_reg (mode, op_false);
22746
22747 switch (mode)
22748 {
22749 case V4SFmode:
22750 if (TARGET_SSE4_1)
22751 gen = gen_sse4_1_blendvps;
22752 break;
22753 case V2DFmode:
22754 if (TARGET_SSE4_1)
22755 gen = gen_sse4_1_blendvpd;
22756 break;
22757 case V16QImode:
22758 case V8HImode:
22759 case V4SImode:
22760 case V2DImode:
22761 if (TARGET_SSE4_1)
22762 {
22763 gen = gen_sse4_1_pblendvb;
22764 if (mode != V16QImode)
22765 d = gen_reg_rtx (V16QImode);
22766 op_false = gen_lowpart (V16QImode, op_false);
22767 op_true = gen_lowpart (V16QImode, op_true);
22768 cmp = gen_lowpart (V16QImode, cmp);
22769 }
22770 break;
22771 case V8SFmode:
22772 if (TARGET_AVX)
22773 gen = gen_avx_blendvps256;
22774 break;
22775 case V4DFmode:
22776 if (TARGET_AVX)
22777 gen = gen_avx_blendvpd256;
22778 break;
22779 case V32QImode:
22780 case V16HImode:
22781 case V8SImode:
22782 case V4DImode:
22783 if (TARGET_AVX2)
22784 {
22785 gen = gen_avx2_pblendvb;
22786 if (mode != V32QImode)
22787 d = gen_reg_rtx (V32QImode);
22788 op_false = gen_lowpart (V32QImode, op_false);
22789 op_true = gen_lowpart (V32QImode, op_true);
22790 cmp = gen_lowpart (V32QImode, cmp);
22791 }
22792 break;
22793
22794 case V64QImode:
22795 gen = gen_avx512bw_blendmv64qi;
22796 break;
22797 case V32HImode:
22798 gen = gen_avx512bw_blendmv32hi;
22799 break;
22800 case V16SImode:
22801 gen = gen_avx512f_blendmv16si;
22802 break;
22803 case V8DImode:
22804 gen = gen_avx512f_blendmv8di;
22805 break;
22806 case V8DFmode:
22807 gen = gen_avx512f_blendmv8df;
22808 break;
22809 case V16SFmode:
22810 gen = gen_avx512f_blendmv16sf;
22811 break;
22812
22813 default:
22814 break;
22815 }
22816
22817 if (gen != NULL)
22818 {
22819 emit_insn (gen (d, op_false, op_true, cmp));
22820 if (d != dest)
22821 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
22822 }
22823 else
22824 {
22825 op_true = force_reg (mode, op_true);
22826
22827 t2 = gen_reg_rtx (mode);
22828 if (optimize)
22829 t3 = gen_reg_rtx (mode);
22830 else
22831 t3 = dest;
22832
22833 x = gen_rtx_AND (mode, op_true, cmp);
22834 emit_insn (gen_rtx_SET (t2, x));
22835
22836 x = gen_rtx_NOT (mode, cmp);
22837 x = gen_rtx_AND (mode, x, op_false);
22838 emit_insn (gen_rtx_SET (t3, x));
22839
22840 x = gen_rtx_IOR (mode, t3, t2);
22841 emit_insn (gen_rtx_SET (dest, x));
22842 }
22843 }
22844 }
22845
22846 /* Expand a floating-point conditional move. Return true if successful. */
22847
22848 bool
22849 ix86_expand_fp_movcc (rtx operands[])
22850 {
22851 machine_mode mode = GET_MODE (operands[0]);
22852 enum rtx_code code = GET_CODE (operands[1]);
22853 rtx tmp, compare_op;
22854 rtx op0 = XEXP (operands[1], 0);
22855 rtx op1 = XEXP (operands[1], 1);
22856
22857 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
22858 {
22859 machine_mode cmode;
22860
22861 /* Since we've no cmove for sse registers, don't force bad register
22862 allocation just to gain access to it. Deny movcc when the
22863 comparison mode doesn't match the move mode. */
22864 cmode = GET_MODE (op0);
22865 if (cmode == VOIDmode)
22866 cmode = GET_MODE (op1);
22867 if (cmode != mode)
22868 return false;
22869
22870 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
22871 if (code == UNKNOWN)
22872 return false;
22873
22874 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
22875 operands[2], operands[3]))
22876 return true;
22877
22878 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
22879 operands[2], operands[3]);
22880 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
22881 return true;
22882 }
22883
22884 if (GET_MODE (op0) == TImode
22885 || (GET_MODE (op0) == DImode
22886 && !TARGET_64BIT))
22887 return false;
22888
22889 /* The floating point conditional move instructions don't directly
22890 support conditions resulting from a signed integer comparison. */
22891
22892 compare_op = ix86_expand_compare (code, op0, op1);
22893 if (!fcmov_comparison_operator (compare_op, VOIDmode))
22894 {
22895 tmp = gen_reg_rtx (QImode);
22896 ix86_expand_setcc (tmp, code, op0, op1);
22897
22898 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
22899 }
22900
22901 emit_insn (gen_rtx_SET (operands[0],
22902 gen_rtx_IF_THEN_ELSE (mode, compare_op,
22903 operands[2], operands[3])));
22904
22905 return true;
22906 }
22907
22908 /* Helper for ix86_cmp_code_to_pcmp_immediate for int modes. */
22909
22910 static int
22911 ix86_int_cmp_code_to_pcmp_immediate (enum rtx_code code)
22912 {
22913 switch (code)
22914 {
22915 case EQ:
22916 return 0;
22917 case LT:
22918 case LTU:
22919 return 1;
22920 case LE:
22921 case LEU:
22922 return 2;
22923 case NE:
22924 return 4;
22925 case GE:
22926 case GEU:
22927 return 5;
22928 case GT:
22929 case GTU:
22930 return 6;
22931 default:
22932 gcc_unreachable ();
22933 }
22934 }
22935
22936 /* Helper for ix86_cmp_code_to_pcmp_immediate for fp modes. */
22937
22938 static int
22939 ix86_fp_cmp_code_to_pcmp_immediate (enum rtx_code code)
22940 {
22941 switch (code)
22942 {
22943 case EQ:
22944 return 0x08;
22945 case NE:
22946 return 0x04;
22947 case GT:
22948 return 0x16;
22949 case LE:
22950 return 0x1a;
22951 case GE:
22952 return 0x15;
22953 case LT:
22954 return 0x19;
22955 default:
22956 gcc_unreachable ();
22957 }
22958 }
22959
22960 /* Return immediate value to be used in UNSPEC_PCMP
22961 for comparison CODE in MODE. */
22962
22963 static int
22964 ix86_cmp_code_to_pcmp_immediate (enum rtx_code code, machine_mode mode)
22965 {
22966 if (FLOAT_MODE_P (mode))
22967 return ix86_fp_cmp_code_to_pcmp_immediate (code);
22968 return ix86_int_cmp_code_to_pcmp_immediate (code);
22969 }
22970
22971 /* Expand AVX-512 vector comparison. */
22972
22973 bool
22974 ix86_expand_mask_vec_cmp (rtx operands[])
22975 {
22976 machine_mode mask_mode = GET_MODE (operands[0]);
22977 machine_mode cmp_mode = GET_MODE (operands[2]);
22978 enum rtx_code code = GET_CODE (operands[1]);
22979 rtx imm = GEN_INT (ix86_cmp_code_to_pcmp_immediate (code, cmp_mode));
22980 int unspec_code;
22981 rtx unspec;
22982
22983 switch (code)
22984 {
22985 case LEU:
22986 case GTU:
22987 case GEU:
22988 case LTU:
22989 unspec_code = UNSPEC_UNSIGNED_PCMP;
22990 break;
22991
22992 default:
22993 unspec_code = UNSPEC_PCMP;
22994 }
22995
22996 unspec = gen_rtx_UNSPEC (mask_mode, gen_rtvec (3, operands[2],
22997 operands[3], imm),
22998 unspec_code);
22999 emit_insn (gen_rtx_SET (operands[0], unspec));
23000
23001 return true;
23002 }
23003
23004 /* Expand fp vector comparison. */
23005
23006 bool
23007 ix86_expand_fp_vec_cmp (rtx operands[])
23008 {
23009 enum rtx_code code = GET_CODE (operands[1]);
23010 rtx cmp;
23011
23012 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
23013 &operands[2], &operands[3]);
23014 if (code == UNKNOWN)
23015 {
23016 rtx temp;
23017 switch (GET_CODE (operands[1]))
23018 {
23019 case LTGT:
23020 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[2],
23021 operands[3], NULL, NULL);
23022 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[2],
23023 operands[3], NULL, NULL);
23024 code = AND;
23025 break;
23026 case UNEQ:
23027 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[2],
23028 operands[3], NULL, NULL);
23029 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[2],
23030 operands[3], NULL, NULL);
23031 code = IOR;
23032 break;
23033 default:
23034 gcc_unreachable ();
23035 }
23036 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
23037 OPTAB_DIRECT);
23038 }
23039 else
23040 cmp = ix86_expand_sse_cmp (operands[0], code, operands[2], operands[3],
23041 operands[1], operands[2]);
23042
23043 if (operands[0] != cmp)
23044 emit_move_insn (operands[0], cmp);
23045
23046 return true;
23047 }
23048
23049 static rtx
23050 ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1,
23051 rtx op_true, rtx op_false, bool *negate)
23052 {
23053 machine_mode data_mode = GET_MODE (dest);
23054 machine_mode mode = GET_MODE (cop0);
23055 rtx x;
23056
23057 *negate = false;
23058
23059 /* XOP supports all of the comparisons on all 128-bit vector int types. */
23060 if (TARGET_XOP
23061 && (mode == V16QImode || mode == V8HImode
23062 || mode == V4SImode || mode == V2DImode))
23063 ;
23064 else
23065 {
23066 /* Canonicalize the comparison to EQ, GT, GTU. */
23067 switch (code)
23068 {
23069 case EQ:
23070 case GT:
23071 case GTU:
23072 break;
23073
23074 case NE:
23075 case LE:
23076 case LEU:
23077 code = reverse_condition (code);
23078 *negate = true;
23079 break;
23080
23081 case GE:
23082 case GEU:
23083 code = reverse_condition (code);
23084 *negate = true;
23085 /* FALLTHRU */
23086
23087 case LT:
23088 case LTU:
23089 std::swap (cop0, cop1);
23090 code = swap_condition (code);
23091 break;
23092
23093 default:
23094 gcc_unreachable ();
23095 }
23096
23097 /* Only SSE4.1/SSE4.2 supports V2DImode. */
23098 if (mode == V2DImode)
23099 {
23100 switch (code)
23101 {
23102 case EQ:
23103 /* SSE4.1 supports EQ. */
23104 if (!TARGET_SSE4_1)
23105 return NULL;
23106 break;
23107
23108 case GT:
23109 case GTU:
23110 /* SSE4.2 supports GT/GTU. */
23111 if (!TARGET_SSE4_2)
23112 return NULL;
23113 break;
23114
23115 default:
23116 gcc_unreachable ();
23117 }
23118 }
23119
23120 /* Unsigned parallel compare is not supported by the hardware.
23121 Play some tricks to turn this into a signed comparison
23122 against 0. */
23123 if (code == GTU)
23124 {
23125 cop0 = force_reg (mode, cop0);
23126
23127 switch (mode)
23128 {
23129 case V16SImode:
23130 case V8DImode:
23131 case V8SImode:
23132 case V4DImode:
23133 case V4SImode:
23134 case V2DImode:
23135 {
23136 rtx t1, t2, mask;
23137 rtx (*gen_sub3) (rtx, rtx, rtx);
23138
23139 switch (mode)
23140 {
23141 case V16SImode: gen_sub3 = gen_subv16si3; break;
23142 case V8DImode: gen_sub3 = gen_subv8di3; break;
23143 case V8SImode: gen_sub3 = gen_subv8si3; break;
23144 case V4DImode: gen_sub3 = gen_subv4di3; break;
23145 case V4SImode: gen_sub3 = gen_subv4si3; break;
23146 case V2DImode: gen_sub3 = gen_subv2di3; break;
23147 default:
23148 gcc_unreachable ();
23149 }
23150 /* Subtract (-(INT MAX) - 1) from both operands to make
23151 them signed. */
23152 mask = ix86_build_signbit_mask (mode, true, false);
23153 t1 = gen_reg_rtx (mode);
23154 emit_insn (gen_sub3 (t1, cop0, mask));
23155
23156 t2 = gen_reg_rtx (mode);
23157 emit_insn (gen_sub3 (t2, cop1, mask));
23158
23159 cop0 = t1;
23160 cop1 = t2;
23161 code = GT;
23162 }
23163 break;
23164
23165 case V64QImode:
23166 case V32HImode:
23167 case V32QImode:
23168 case V16HImode:
23169 case V16QImode:
23170 case V8HImode:
23171 /* Perform a parallel unsigned saturating subtraction. */
23172 x = gen_reg_rtx (mode);
23173 emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, cop0,
23174 cop1)));
23175
23176 cop0 = x;
23177 cop1 = CONST0_RTX (mode);
23178 code = EQ;
23179 *negate = !*negate;
23180 break;
23181
23182 default:
23183 gcc_unreachable ();
23184 }
23185 }
23186 }
23187
23188 if (*negate)
23189 std::swap (op_true, op_false);
23190
23191 /* Allow the comparison to be done in one mode, but the movcc to
23192 happen in another mode. */
23193 if (data_mode == mode)
23194 {
23195 x = ix86_expand_sse_cmp (dest, code, cop0, cop1,
23196 op_true, op_false);
23197 }
23198 else
23199 {
23200 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
23201 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
23202 op_true, op_false);
23203 if (GET_MODE (x) == mode)
23204 x = gen_lowpart (data_mode, x);
23205 }
23206
23207 return x;
23208 }
23209
23210 /* Expand integer vector comparison. */
23211
23212 bool
23213 ix86_expand_int_vec_cmp (rtx operands[])
23214 {
23215 rtx_code code = GET_CODE (operands[1]);
23216 bool negate = false;
23217 rtx cmp = ix86_expand_int_sse_cmp (operands[0], code, operands[2],
23218 operands[3], NULL, NULL, &negate);
23219
23220 if (!cmp)
23221 return false;
23222
23223 if (negate)
23224 cmp = ix86_expand_int_sse_cmp (operands[0], EQ, cmp,
23225 CONST0_RTX (GET_MODE (cmp)),
23226 NULL, NULL, &negate);
23227
23228 gcc_assert (!negate);
23229
23230 if (operands[0] != cmp)
23231 emit_move_insn (operands[0], cmp);
23232
23233 return true;
23234 }
23235
23236 /* Expand a floating-point vector conditional move; a vcond operation
23237 rather than a movcc operation. */
23238
23239 bool
23240 ix86_expand_fp_vcond (rtx operands[])
23241 {
23242 enum rtx_code code = GET_CODE (operands[3]);
23243 rtx cmp;
23244
23245 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
23246 &operands[4], &operands[5]);
23247 if (code == UNKNOWN)
23248 {
23249 rtx temp;
23250 switch (GET_CODE (operands[3]))
23251 {
23252 case LTGT:
23253 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
23254 operands[5], operands[0], operands[0]);
23255 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
23256 operands[5], operands[1], operands[2]);
23257 code = AND;
23258 break;
23259 case UNEQ:
23260 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
23261 operands[5], operands[0], operands[0]);
23262 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
23263 operands[5], operands[1], operands[2]);
23264 code = IOR;
23265 break;
23266 default:
23267 gcc_unreachable ();
23268 }
23269 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
23270 OPTAB_DIRECT);
23271 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
23272 return true;
23273 }
23274
23275 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
23276 operands[5], operands[1], operands[2]))
23277 return true;
23278
23279 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
23280 operands[1], operands[2]);
23281 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
23282 return true;
23283 }
23284
23285 /* Expand a signed/unsigned integral vector conditional move. */
23286
23287 bool
23288 ix86_expand_int_vcond (rtx operands[])
23289 {
23290 machine_mode data_mode = GET_MODE (operands[0]);
23291 machine_mode mode = GET_MODE (operands[4]);
23292 enum rtx_code code = GET_CODE (operands[3]);
23293 bool negate = false;
23294 rtx x, cop0, cop1;
23295
23296 cop0 = operands[4];
23297 cop1 = operands[5];
23298
23299 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
23300 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
23301 if ((code == LT || code == GE)
23302 && data_mode == mode
23303 && cop1 == CONST0_RTX (mode)
23304 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
23305 && GET_MODE_UNIT_SIZE (data_mode) > 1
23306 && GET_MODE_UNIT_SIZE (data_mode) <= 8
23307 && (GET_MODE_SIZE (data_mode) == 16
23308 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
23309 {
23310 rtx negop = operands[2 - (code == LT)];
23311 int shift = GET_MODE_UNIT_BITSIZE (data_mode) - 1;
23312 if (negop == CONST1_RTX (data_mode))
23313 {
23314 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
23315 operands[0], 1, OPTAB_DIRECT);
23316 if (res != operands[0])
23317 emit_move_insn (operands[0], res);
23318 return true;
23319 }
23320 else if (GET_MODE_INNER (data_mode) != DImode
23321 && vector_all_ones_operand (negop, data_mode))
23322 {
23323 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
23324 operands[0], 0, OPTAB_DIRECT);
23325 if (res != operands[0])
23326 emit_move_insn (operands[0], res);
23327 return true;
23328 }
23329 }
23330
23331 if (!nonimmediate_operand (cop1, mode))
23332 cop1 = force_reg (mode, cop1);
23333 if (!general_operand (operands[1], data_mode))
23334 operands[1] = force_reg (data_mode, operands[1]);
23335 if (!general_operand (operands[2], data_mode))
23336 operands[2] = force_reg (data_mode, operands[2]);
23337
23338 x = ix86_expand_int_sse_cmp (operands[0], code, cop0, cop1,
23339 operands[1], operands[2], &negate);
23340
23341 if (!x)
23342 return false;
23343
23344 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
23345 operands[2-negate]);
23346 return true;
23347 }
23348
23349 /* AVX512F does support 64-byte integer vector operations,
23350 thus the longest vector we are faced with is V64QImode. */
23351 #define MAX_VECT_LEN 64
23352
23353 struct expand_vec_perm_d
23354 {
23355 rtx target, op0, op1;
23356 unsigned char perm[MAX_VECT_LEN];
23357 machine_mode vmode;
23358 unsigned char nelt;
23359 bool one_operand_p;
23360 bool testing_p;
23361 };
23362
23363 static bool
23364 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
23365 struct expand_vec_perm_d *d)
23366 {
23367 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
23368 expander, so args are either in d, or in op0, op1 etc. */
23369 machine_mode mode = GET_MODE (d ? d->op0 : op0);
23370 machine_mode maskmode = mode;
23371 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
23372
23373 switch (mode)
23374 {
23375 case V8HImode:
23376 if (TARGET_AVX512VL && TARGET_AVX512BW)
23377 gen = gen_avx512vl_vpermi2varv8hi3;
23378 break;
23379 case V16HImode:
23380 if (TARGET_AVX512VL && TARGET_AVX512BW)
23381 gen = gen_avx512vl_vpermi2varv16hi3;
23382 break;
23383 case V64QImode:
23384 if (TARGET_AVX512VBMI)
23385 gen = gen_avx512bw_vpermi2varv64qi3;
23386 break;
23387 case V32HImode:
23388 if (TARGET_AVX512BW)
23389 gen = gen_avx512bw_vpermi2varv32hi3;
23390 break;
23391 case V4SImode:
23392 if (TARGET_AVX512VL)
23393 gen = gen_avx512vl_vpermi2varv4si3;
23394 break;
23395 case V8SImode:
23396 if (TARGET_AVX512VL)
23397 gen = gen_avx512vl_vpermi2varv8si3;
23398 break;
23399 case V16SImode:
23400 if (TARGET_AVX512F)
23401 gen = gen_avx512f_vpermi2varv16si3;
23402 break;
23403 case V4SFmode:
23404 if (TARGET_AVX512VL)
23405 {
23406 gen = gen_avx512vl_vpermi2varv4sf3;
23407 maskmode = V4SImode;
23408 }
23409 break;
23410 case V8SFmode:
23411 if (TARGET_AVX512VL)
23412 {
23413 gen = gen_avx512vl_vpermi2varv8sf3;
23414 maskmode = V8SImode;
23415 }
23416 break;
23417 case V16SFmode:
23418 if (TARGET_AVX512F)
23419 {
23420 gen = gen_avx512f_vpermi2varv16sf3;
23421 maskmode = V16SImode;
23422 }
23423 break;
23424 case V2DImode:
23425 if (TARGET_AVX512VL)
23426 gen = gen_avx512vl_vpermi2varv2di3;
23427 break;
23428 case V4DImode:
23429 if (TARGET_AVX512VL)
23430 gen = gen_avx512vl_vpermi2varv4di3;
23431 break;
23432 case V8DImode:
23433 if (TARGET_AVX512F)
23434 gen = gen_avx512f_vpermi2varv8di3;
23435 break;
23436 case V2DFmode:
23437 if (TARGET_AVX512VL)
23438 {
23439 gen = gen_avx512vl_vpermi2varv2df3;
23440 maskmode = V2DImode;
23441 }
23442 break;
23443 case V4DFmode:
23444 if (TARGET_AVX512VL)
23445 {
23446 gen = gen_avx512vl_vpermi2varv4df3;
23447 maskmode = V4DImode;
23448 }
23449 break;
23450 case V8DFmode:
23451 if (TARGET_AVX512F)
23452 {
23453 gen = gen_avx512f_vpermi2varv8df3;
23454 maskmode = V8DImode;
23455 }
23456 break;
23457 default:
23458 break;
23459 }
23460
23461 if (gen == NULL)
23462 return false;
23463
23464 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
23465 expander, so args are either in d, or in op0, op1 etc. */
23466 if (d)
23467 {
23468 rtx vec[64];
23469 target = d->target;
23470 op0 = d->op0;
23471 op1 = d->op1;
23472 for (int i = 0; i < d->nelt; ++i)
23473 vec[i] = GEN_INT (d->perm[i]);
23474 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
23475 }
23476
23477 emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
23478 return true;
23479 }
23480
23481 /* Expand a variable vector permutation. */
23482
23483 void
23484 ix86_expand_vec_perm (rtx operands[])
23485 {
23486 rtx target = operands[0];
23487 rtx op0 = operands[1];
23488 rtx op1 = operands[2];
23489 rtx mask = operands[3];
23490 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
23491 machine_mode mode = GET_MODE (op0);
23492 machine_mode maskmode = GET_MODE (mask);
23493 int w, e, i;
23494 bool one_operand_shuffle = rtx_equal_p (op0, op1);
23495
23496 /* Number of elements in the vector. */
23497 w = GET_MODE_NUNITS (mode);
23498 e = GET_MODE_UNIT_SIZE (mode);
23499 gcc_assert (w <= 64);
23500
23501 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
23502 return;
23503
23504 if (TARGET_AVX2)
23505 {
23506 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
23507 {
23508 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
23509 an constant shuffle operand. With a tiny bit of effort we can
23510 use VPERMD instead. A re-interpretation stall for V4DFmode is
23511 unfortunate but there's no avoiding it.
23512 Similarly for V16HImode we don't have instructions for variable
23513 shuffling, while for V32QImode we can use after preparing suitable
23514 masks vpshufb; vpshufb; vpermq; vpor. */
23515
23516 if (mode == V16HImode)
23517 {
23518 maskmode = mode = V32QImode;
23519 w = 32;
23520 e = 1;
23521 }
23522 else
23523 {
23524 maskmode = mode = V8SImode;
23525 w = 8;
23526 e = 4;
23527 }
23528 t1 = gen_reg_rtx (maskmode);
23529
23530 /* Replicate the low bits of the V4DImode mask into V8SImode:
23531 mask = { A B C D }
23532 t1 = { A A B B C C D D }. */
23533 for (i = 0; i < w / 2; ++i)
23534 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
23535 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
23536 vt = force_reg (maskmode, vt);
23537 mask = gen_lowpart (maskmode, mask);
23538 if (maskmode == V8SImode)
23539 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
23540 else
23541 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
23542
23543 /* Multiply the shuffle indicies by two. */
23544 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
23545 OPTAB_DIRECT);
23546
23547 /* Add one to the odd shuffle indicies:
23548 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
23549 for (i = 0; i < w / 2; ++i)
23550 {
23551 vec[i * 2] = const0_rtx;
23552 vec[i * 2 + 1] = const1_rtx;
23553 }
23554 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
23555 vt = validize_mem (force_const_mem (maskmode, vt));
23556 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
23557 OPTAB_DIRECT);
23558
23559 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
23560 operands[3] = mask = t1;
23561 target = gen_reg_rtx (mode);
23562 op0 = gen_lowpart (mode, op0);
23563 op1 = gen_lowpart (mode, op1);
23564 }
23565
23566 switch (mode)
23567 {
23568 case V8SImode:
23569 /* The VPERMD and VPERMPS instructions already properly ignore
23570 the high bits of the shuffle elements. No need for us to
23571 perform an AND ourselves. */
23572 if (one_operand_shuffle)
23573 {
23574 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
23575 if (target != operands[0])
23576 emit_move_insn (operands[0],
23577 gen_lowpart (GET_MODE (operands[0]), target));
23578 }
23579 else
23580 {
23581 t1 = gen_reg_rtx (V8SImode);
23582 t2 = gen_reg_rtx (V8SImode);
23583 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
23584 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
23585 goto merge_two;
23586 }
23587 return;
23588
23589 case V8SFmode:
23590 mask = gen_lowpart (V8SImode, mask);
23591 if (one_operand_shuffle)
23592 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
23593 else
23594 {
23595 t1 = gen_reg_rtx (V8SFmode);
23596 t2 = gen_reg_rtx (V8SFmode);
23597 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
23598 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
23599 goto merge_two;
23600 }
23601 return;
23602
23603 case V4SImode:
23604 /* By combining the two 128-bit input vectors into one 256-bit
23605 input vector, we can use VPERMD and VPERMPS for the full
23606 two-operand shuffle. */
23607 t1 = gen_reg_rtx (V8SImode);
23608 t2 = gen_reg_rtx (V8SImode);
23609 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
23610 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
23611 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
23612 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
23613 return;
23614
23615 case V4SFmode:
23616 t1 = gen_reg_rtx (V8SFmode);
23617 t2 = gen_reg_rtx (V8SImode);
23618 mask = gen_lowpart (V4SImode, mask);
23619 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
23620 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
23621 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
23622 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
23623 return;
23624
23625 case V32QImode:
23626 t1 = gen_reg_rtx (V32QImode);
23627 t2 = gen_reg_rtx (V32QImode);
23628 t3 = gen_reg_rtx (V32QImode);
23629 vt2 = GEN_INT (-128);
23630 for (i = 0; i < 32; i++)
23631 vec[i] = vt2;
23632 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
23633 vt = force_reg (V32QImode, vt);
23634 for (i = 0; i < 32; i++)
23635 vec[i] = i < 16 ? vt2 : const0_rtx;
23636 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
23637 vt2 = force_reg (V32QImode, vt2);
23638 /* From mask create two adjusted masks, which contain the same
23639 bits as mask in the low 7 bits of each vector element.
23640 The first mask will have the most significant bit clear
23641 if it requests element from the same 128-bit lane
23642 and MSB set if it requests element from the other 128-bit lane.
23643 The second mask will have the opposite values of the MSB,
23644 and additionally will have its 128-bit lanes swapped.
23645 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
23646 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
23647 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
23648 stands for other 12 bytes. */
23649 /* The bit whether element is from the same lane or the other
23650 lane is bit 4, so shift it up by 3 to the MSB position. */
23651 t5 = gen_reg_rtx (V4DImode);
23652 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
23653 GEN_INT (3)));
23654 /* Clear MSB bits from the mask just in case it had them set. */
23655 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
23656 /* After this t1 will have MSB set for elements from other lane. */
23657 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
23658 /* Clear bits other than MSB. */
23659 emit_insn (gen_andv32qi3 (t1, t1, vt));
23660 /* Or in the lower bits from mask into t3. */
23661 emit_insn (gen_iorv32qi3 (t3, t1, t2));
23662 /* And invert MSB bits in t1, so MSB is set for elements from the same
23663 lane. */
23664 emit_insn (gen_xorv32qi3 (t1, t1, vt));
23665 /* Swap 128-bit lanes in t3. */
23666 t6 = gen_reg_rtx (V4DImode);
23667 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
23668 const2_rtx, GEN_INT (3),
23669 const0_rtx, const1_rtx));
23670 /* And or in the lower bits from mask into t1. */
23671 emit_insn (gen_iorv32qi3 (t1, t1, t2));
23672 if (one_operand_shuffle)
23673 {
23674 /* Each of these shuffles will put 0s in places where
23675 element from the other 128-bit lane is needed, otherwise
23676 will shuffle in the requested value. */
23677 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
23678 gen_lowpart (V32QImode, t6)));
23679 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
23680 /* For t3 the 128-bit lanes are swapped again. */
23681 t7 = gen_reg_rtx (V4DImode);
23682 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
23683 const2_rtx, GEN_INT (3),
23684 const0_rtx, const1_rtx));
23685 /* And oring both together leads to the result. */
23686 emit_insn (gen_iorv32qi3 (target, t1,
23687 gen_lowpart (V32QImode, t7)));
23688 if (target != operands[0])
23689 emit_move_insn (operands[0],
23690 gen_lowpart (GET_MODE (operands[0]), target));
23691 return;
23692 }
23693
23694 t4 = gen_reg_rtx (V32QImode);
23695 /* Similarly to the above one_operand_shuffle code,
23696 just for repeated twice for each operand. merge_two:
23697 code will merge the two results together. */
23698 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
23699 gen_lowpart (V32QImode, t6)));
23700 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
23701 gen_lowpart (V32QImode, t6)));
23702 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
23703 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
23704 t7 = gen_reg_rtx (V4DImode);
23705 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
23706 const2_rtx, GEN_INT (3),
23707 const0_rtx, const1_rtx));
23708 t8 = gen_reg_rtx (V4DImode);
23709 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
23710 const2_rtx, GEN_INT (3),
23711 const0_rtx, const1_rtx));
23712 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
23713 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
23714 t1 = t4;
23715 t2 = t3;
23716 goto merge_two;
23717
23718 default:
23719 gcc_assert (GET_MODE_SIZE (mode) <= 16);
23720 break;
23721 }
23722 }
23723
23724 if (TARGET_XOP)
23725 {
23726 /* The XOP VPPERM insn supports three inputs. By ignoring the
23727 one_operand_shuffle special case, we avoid creating another
23728 set of constant vectors in memory. */
23729 one_operand_shuffle = false;
23730
23731 /* mask = mask & {2*w-1, ...} */
23732 vt = GEN_INT (2*w - 1);
23733 }
23734 else
23735 {
23736 /* mask = mask & {w-1, ...} */
23737 vt = GEN_INT (w - 1);
23738 }
23739
23740 for (i = 0; i < w; i++)
23741 vec[i] = vt;
23742 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
23743 mask = expand_simple_binop (maskmode, AND, mask, vt,
23744 NULL_RTX, 0, OPTAB_DIRECT);
23745
23746 /* For non-QImode operations, convert the word permutation control
23747 into a byte permutation control. */
23748 if (mode != V16QImode)
23749 {
23750 mask = expand_simple_binop (maskmode, ASHIFT, mask,
23751 GEN_INT (exact_log2 (e)),
23752 NULL_RTX, 0, OPTAB_DIRECT);
23753
23754 /* Convert mask to vector of chars. */
23755 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
23756
23757 /* Replicate each of the input bytes into byte positions:
23758 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
23759 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
23760 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
23761 for (i = 0; i < 16; ++i)
23762 vec[i] = GEN_INT (i/e * e);
23763 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
23764 vt = validize_mem (force_const_mem (V16QImode, vt));
23765 if (TARGET_XOP)
23766 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
23767 else
23768 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
23769
23770 /* Convert it into the byte positions by doing
23771 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
23772 for (i = 0; i < 16; ++i)
23773 vec[i] = GEN_INT (i % e);
23774 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
23775 vt = validize_mem (force_const_mem (V16QImode, vt));
23776 emit_insn (gen_addv16qi3 (mask, mask, vt));
23777 }
23778
23779 /* The actual shuffle operations all operate on V16QImode. */
23780 op0 = gen_lowpart (V16QImode, op0);
23781 op1 = gen_lowpart (V16QImode, op1);
23782
23783 if (TARGET_XOP)
23784 {
23785 if (GET_MODE (target) != V16QImode)
23786 target = gen_reg_rtx (V16QImode);
23787 emit_insn (gen_xop_pperm (target, op0, op1, mask));
23788 if (target != operands[0])
23789 emit_move_insn (operands[0],
23790 gen_lowpart (GET_MODE (operands[0]), target));
23791 }
23792 else if (one_operand_shuffle)
23793 {
23794 if (GET_MODE (target) != V16QImode)
23795 target = gen_reg_rtx (V16QImode);
23796 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
23797 if (target != operands[0])
23798 emit_move_insn (operands[0],
23799 gen_lowpart (GET_MODE (operands[0]), target));
23800 }
23801 else
23802 {
23803 rtx xops[6];
23804 bool ok;
23805
23806 /* Shuffle the two input vectors independently. */
23807 t1 = gen_reg_rtx (V16QImode);
23808 t2 = gen_reg_rtx (V16QImode);
23809 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
23810 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
23811
23812 merge_two:
23813 /* Then merge them together. The key is whether any given control
23814 element contained a bit set that indicates the second word. */
23815 mask = operands[3];
23816 vt = GEN_INT (w);
23817 if (maskmode == V2DImode && !TARGET_SSE4_1)
23818 {
23819 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
23820 more shuffle to convert the V2DI input mask into a V4SI
23821 input mask. At which point the masking that expand_int_vcond
23822 will work as desired. */
23823 rtx t3 = gen_reg_rtx (V4SImode);
23824 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
23825 const0_rtx, const0_rtx,
23826 const2_rtx, const2_rtx));
23827 mask = t3;
23828 maskmode = V4SImode;
23829 e = w = 4;
23830 }
23831
23832 for (i = 0; i < w; i++)
23833 vec[i] = vt;
23834 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
23835 vt = force_reg (maskmode, vt);
23836 mask = expand_simple_binop (maskmode, AND, mask, vt,
23837 NULL_RTX, 0, OPTAB_DIRECT);
23838
23839 if (GET_MODE (target) != mode)
23840 target = gen_reg_rtx (mode);
23841 xops[0] = target;
23842 xops[1] = gen_lowpart (mode, t2);
23843 xops[2] = gen_lowpart (mode, t1);
23844 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
23845 xops[4] = mask;
23846 xops[5] = vt;
23847 ok = ix86_expand_int_vcond (xops);
23848 gcc_assert (ok);
23849 if (target != operands[0])
23850 emit_move_insn (operands[0],
23851 gen_lowpart (GET_MODE (operands[0]), target));
23852 }
23853 }
23854
23855 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
23856 true if we should do zero extension, else sign extension. HIGH_P is
23857 true if we want the N/2 high elements, else the low elements. */
23858
23859 void
23860 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
23861 {
23862 machine_mode imode = GET_MODE (src);
23863 rtx tmp;
23864
23865 if (TARGET_SSE4_1)
23866 {
23867 rtx (*unpack)(rtx, rtx);
23868 rtx (*extract)(rtx, rtx) = NULL;
23869 machine_mode halfmode = BLKmode;
23870
23871 switch (imode)
23872 {
23873 case V64QImode:
23874 if (unsigned_p)
23875 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
23876 else
23877 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
23878 halfmode = V32QImode;
23879 extract
23880 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
23881 break;
23882 case V32QImode:
23883 if (unsigned_p)
23884 unpack = gen_avx2_zero_extendv16qiv16hi2;
23885 else
23886 unpack = gen_avx2_sign_extendv16qiv16hi2;
23887 halfmode = V16QImode;
23888 extract
23889 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
23890 break;
23891 case V32HImode:
23892 if (unsigned_p)
23893 unpack = gen_avx512f_zero_extendv16hiv16si2;
23894 else
23895 unpack = gen_avx512f_sign_extendv16hiv16si2;
23896 halfmode = V16HImode;
23897 extract
23898 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
23899 break;
23900 case V16HImode:
23901 if (unsigned_p)
23902 unpack = gen_avx2_zero_extendv8hiv8si2;
23903 else
23904 unpack = gen_avx2_sign_extendv8hiv8si2;
23905 halfmode = V8HImode;
23906 extract
23907 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
23908 break;
23909 case V16SImode:
23910 if (unsigned_p)
23911 unpack = gen_avx512f_zero_extendv8siv8di2;
23912 else
23913 unpack = gen_avx512f_sign_extendv8siv8di2;
23914 halfmode = V8SImode;
23915 extract
23916 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
23917 break;
23918 case V8SImode:
23919 if (unsigned_p)
23920 unpack = gen_avx2_zero_extendv4siv4di2;
23921 else
23922 unpack = gen_avx2_sign_extendv4siv4di2;
23923 halfmode = V4SImode;
23924 extract
23925 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
23926 break;
23927 case V16QImode:
23928 if (unsigned_p)
23929 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
23930 else
23931 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
23932 break;
23933 case V8HImode:
23934 if (unsigned_p)
23935 unpack = gen_sse4_1_zero_extendv4hiv4si2;
23936 else
23937 unpack = gen_sse4_1_sign_extendv4hiv4si2;
23938 break;
23939 case V4SImode:
23940 if (unsigned_p)
23941 unpack = gen_sse4_1_zero_extendv2siv2di2;
23942 else
23943 unpack = gen_sse4_1_sign_extendv2siv2di2;
23944 break;
23945 default:
23946 gcc_unreachable ();
23947 }
23948
23949 if (GET_MODE_SIZE (imode) >= 32)
23950 {
23951 tmp = gen_reg_rtx (halfmode);
23952 emit_insn (extract (tmp, src));
23953 }
23954 else if (high_p)
23955 {
23956 /* Shift higher 8 bytes to lower 8 bytes. */
23957 tmp = gen_reg_rtx (V1TImode);
23958 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
23959 GEN_INT (64)));
23960 tmp = gen_lowpart (imode, tmp);
23961 }
23962 else
23963 tmp = src;
23964
23965 emit_insn (unpack (dest, tmp));
23966 }
23967 else
23968 {
23969 rtx (*unpack)(rtx, rtx, rtx);
23970
23971 switch (imode)
23972 {
23973 case V16QImode:
23974 if (high_p)
23975 unpack = gen_vec_interleave_highv16qi;
23976 else
23977 unpack = gen_vec_interleave_lowv16qi;
23978 break;
23979 case V8HImode:
23980 if (high_p)
23981 unpack = gen_vec_interleave_highv8hi;
23982 else
23983 unpack = gen_vec_interleave_lowv8hi;
23984 break;
23985 case V4SImode:
23986 if (high_p)
23987 unpack = gen_vec_interleave_highv4si;
23988 else
23989 unpack = gen_vec_interleave_lowv4si;
23990 break;
23991 default:
23992 gcc_unreachable ();
23993 }
23994
23995 if (unsigned_p)
23996 tmp = force_reg (imode, CONST0_RTX (imode));
23997 else
23998 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
23999 src, pc_rtx, pc_rtx);
24000
24001 rtx tmp2 = gen_reg_rtx (imode);
24002 emit_insn (unpack (tmp2, src, tmp));
24003 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
24004 }
24005 }
24006
24007 /* Expand conditional increment or decrement using adb/sbb instructions.
24008 The default case using setcc followed by the conditional move can be
24009 done by generic code. */
24010 bool
24011 ix86_expand_int_addcc (rtx operands[])
24012 {
24013 enum rtx_code code = GET_CODE (operands[1]);
24014 rtx flags;
24015 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
24016 rtx compare_op;
24017 rtx val = const0_rtx;
24018 bool fpcmp = false;
24019 machine_mode mode;
24020 rtx op0 = XEXP (operands[1], 0);
24021 rtx op1 = XEXP (operands[1], 1);
24022
24023 if (operands[3] != const1_rtx
24024 && operands[3] != constm1_rtx)
24025 return false;
24026 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
24027 return false;
24028 code = GET_CODE (compare_op);
24029
24030 flags = XEXP (compare_op, 0);
24031
24032 if (GET_MODE (flags) == CCFPmode
24033 || GET_MODE (flags) == CCFPUmode)
24034 {
24035 fpcmp = true;
24036 code = ix86_fp_compare_code_to_integer (code);
24037 }
24038
24039 if (code != LTU)
24040 {
24041 val = constm1_rtx;
24042 if (fpcmp)
24043 PUT_CODE (compare_op,
24044 reverse_condition_maybe_unordered
24045 (GET_CODE (compare_op)));
24046 else
24047 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
24048 }
24049
24050 mode = GET_MODE (operands[0]);
24051
24052 /* Construct either adc or sbb insn. */
24053 if ((code == LTU) == (operands[3] == constm1_rtx))
24054 {
24055 switch (mode)
24056 {
24057 case QImode:
24058 insn = gen_subqi3_carry;
24059 break;
24060 case HImode:
24061 insn = gen_subhi3_carry;
24062 break;
24063 case SImode:
24064 insn = gen_subsi3_carry;
24065 break;
24066 case DImode:
24067 insn = gen_subdi3_carry;
24068 break;
24069 default:
24070 gcc_unreachable ();
24071 }
24072 }
24073 else
24074 {
24075 switch (mode)
24076 {
24077 case QImode:
24078 insn = gen_addqi3_carry;
24079 break;
24080 case HImode:
24081 insn = gen_addhi3_carry;
24082 break;
24083 case SImode:
24084 insn = gen_addsi3_carry;
24085 break;
24086 case DImode:
24087 insn = gen_adddi3_carry;
24088 break;
24089 default:
24090 gcc_unreachable ();
24091 }
24092 }
24093 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
24094
24095 return true;
24096 }
24097
24098
24099 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
24100 but works for floating pointer parameters and nonoffsetable memories.
24101 For pushes, it returns just stack offsets; the values will be saved
24102 in the right order. Maximally three parts are generated. */
24103
24104 static int
24105 ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
24106 {
24107 int size;
24108
24109 if (!TARGET_64BIT)
24110 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
24111 else
24112 size = (GET_MODE_SIZE (mode) + 4) / 8;
24113
24114 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
24115 gcc_assert (size >= 2 && size <= 4);
24116
24117 /* Optimize constant pool reference to immediates. This is used by fp
24118 moves, that force all constants to memory to allow combining. */
24119 if (MEM_P (operand) && MEM_READONLY_P (operand))
24120 {
24121 rtx tmp = maybe_get_pool_constant (operand);
24122 if (tmp)
24123 operand = tmp;
24124 }
24125
24126 if (MEM_P (operand) && !offsettable_memref_p (operand))
24127 {
24128 /* The only non-offsetable memories we handle are pushes. */
24129 int ok = push_operand (operand, VOIDmode);
24130
24131 gcc_assert (ok);
24132
24133 operand = copy_rtx (operand);
24134 PUT_MODE (operand, word_mode);
24135 parts[0] = parts[1] = parts[2] = parts[3] = operand;
24136 return size;
24137 }
24138
24139 if (GET_CODE (operand) == CONST_VECTOR)
24140 {
24141 machine_mode imode = int_mode_for_mode (mode);
24142 /* Caution: if we looked through a constant pool memory above,
24143 the operand may actually have a different mode now. That's
24144 ok, since we want to pun this all the way back to an integer. */
24145 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
24146 gcc_assert (operand != NULL);
24147 mode = imode;
24148 }
24149
24150 if (!TARGET_64BIT)
24151 {
24152 if (mode == DImode)
24153 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
24154 else
24155 {
24156 int i;
24157
24158 if (REG_P (operand))
24159 {
24160 gcc_assert (reload_completed);
24161 for (i = 0; i < size; i++)
24162 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
24163 }
24164 else if (offsettable_memref_p (operand))
24165 {
24166 operand = adjust_address (operand, SImode, 0);
24167 parts[0] = operand;
24168 for (i = 1; i < size; i++)
24169 parts[i] = adjust_address (operand, SImode, 4 * i);
24170 }
24171 else if (CONST_DOUBLE_P (operand))
24172 {
24173 const REAL_VALUE_TYPE *r;
24174 long l[4];
24175
24176 r = CONST_DOUBLE_REAL_VALUE (operand);
24177 switch (mode)
24178 {
24179 case TFmode:
24180 real_to_target (l, r, mode);
24181 parts[3] = gen_int_mode (l[3], SImode);
24182 parts[2] = gen_int_mode (l[2], SImode);
24183 break;
24184 case XFmode:
24185 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
24186 long double may not be 80-bit. */
24187 real_to_target (l, r, mode);
24188 parts[2] = gen_int_mode (l[2], SImode);
24189 break;
24190 case DFmode:
24191 REAL_VALUE_TO_TARGET_DOUBLE (*r, l);
24192 break;
24193 default:
24194 gcc_unreachable ();
24195 }
24196 parts[1] = gen_int_mode (l[1], SImode);
24197 parts[0] = gen_int_mode (l[0], SImode);
24198 }
24199 else
24200 gcc_unreachable ();
24201 }
24202 }
24203 else
24204 {
24205 if (mode == TImode)
24206 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
24207 if (mode == XFmode || mode == TFmode)
24208 {
24209 machine_mode upper_mode = mode==XFmode ? SImode : DImode;
24210 if (REG_P (operand))
24211 {
24212 gcc_assert (reload_completed);
24213 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
24214 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
24215 }
24216 else if (offsettable_memref_p (operand))
24217 {
24218 operand = adjust_address (operand, DImode, 0);
24219 parts[0] = operand;
24220 parts[1] = adjust_address (operand, upper_mode, 8);
24221 }
24222 else if (CONST_DOUBLE_P (operand))
24223 {
24224 long l[4];
24225
24226 real_to_target (l, CONST_DOUBLE_REAL_VALUE (operand), mode);
24227
24228 /* real_to_target puts 32-bit pieces in each long. */
24229 parts[0] =
24230 gen_int_mode
24231 ((l[0] & (HOST_WIDE_INT) 0xffffffff)
24232 | ((l[1] & (HOST_WIDE_INT) 0xffffffff) << 32),
24233 DImode);
24234
24235 if (upper_mode == SImode)
24236 parts[1] = gen_int_mode (l[2], SImode);
24237 else
24238 parts[1] =
24239 gen_int_mode
24240 ((l[2] & (HOST_WIDE_INT) 0xffffffff)
24241 | ((l[3] & (HOST_WIDE_INT) 0xffffffff) << 32),
24242 DImode);
24243 }
24244 else
24245 gcc_unreachable ();
24246 }
24247 }
24248
24249 return size;
24250 }
24251
24252 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
24253 Return false when normal moves are needed; true when all required
24254 insns have been emitted. Operands 2-4 contain the input values
24255 int the correct order; operands 5-7 contain the output values. */
24256
24257 void
24258 ix86_split_long_move (rtx operands[])
24259 {
24260 rtx part[2][4];
24261 int nparts, i, j;
24262 int push = 0;
24263 int collisions = 0;
24264 machine_mode mode = GET_MODE (operands[0]);
24265 bool collisionparts[4];
24266
24267 /* The DFmode expanders may ask us to move double.
24268 For 64bit target this is single move. By hiding the fact
24269 here we simplify i386.md splitters. */
24270 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
24271 {
24272 /* Optimize constant pool reference to immediates. This is used by
24273 fp moves, that force all constants to memory to allow combining. */
24274
24275 if (MEM_P (operands[1])
24276 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
24277 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
24278 operands[1] = get_pool_constant (XEXP (operands[1], 0));
24279 if (push_operand (operands[0], VOIDmode))
24280 {
24281 operands[0] = copy_rtx (operands[0]);
24282 PUT_MODE (operands[0], word_mode);
24283 }
24284 else
24285 operands[0] = gen_lowpart (DImode, operands[0]);
24286 operands[1] = gen_lowpart (DImode, operands[1]);
24287 emit_move_insn (operands[0], operands[1]);
24288 return;
24289 }
24290
24291 /* The only non-offsettable memory we handle is push. */
24292 if (push_operand (operands[0], VOIDmode))
24293 push = 1;
24294 else
24295 gcc_assert (!MEM_P (operands[0])
24296 || offsettable_memref_p (operands[0]));
24297
24298 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
24299 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
24300
24301 /* When emitting push, take care for source operands on the stack. */
24302 if (push && MEM_P (operands[1])
24303 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
24304 {
24305 rtx src_base = XEXP (part[1][nparts - 1], 0);
24306
24307 /* Compensate for the stack decrement by 4. */
24308 if (!TARGET_64BIT && nparts == 3
24309 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
24310 src_base = plus_constant (Pmode, src_base, 4);
24311
24312 /* src_base refers to the stack pointer and is
24313 automatically decreased by emitted push. */
24314 for (i = 0; i < nparts; i++)
24315 part[1][i] = change_address (part[1][i],
24316 GET_MODE (part[1][i]), src_base);
24317 }
24318
24319 /* We need to do copy in the right order in case an address register
24320 of the source overlaps the destination. */
24321 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
24322 {
24323 rtx tmp;
24324
24325 for (i = 0; i < nparts; i++)
24326 {
24327 collisionparts[i]
24328 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
24329 if (collisionparts[i])
24330 collisions++;
24331 }
24332
24333 /* Collision in the middle part can be handled by reordering. */
24334 if (collisions == 1 && nparts == 3 && collisionparts [1])
24335 {
24336 std::swap (part[0][1], part[0][2]);
24337 std::swap (part[1][1], part[1][2]);
24338 }
24339 else if (collisions == 1
24340 && nparts == 4
24341 && (collisionparts [1] || collisionparts [2]))
24342 {
24343 if (collisionparts [1])
24344 {
24345 std::swap (part[0][1], part[0][2]);
24346 std::swap (part[1][1], part[1][2]);
24347 }
24348 else
24349 {
24350 std::swap (part[0][2], part[0][3]);
24351 std::swap (part[1][2], part[1][3]);
24352 }
24353 }
24354
24355 /* If there are more collisions, we can't handle it by reordering.
24356 Do an lea to the last part and use only one colliding move. */
24357 else if (collisions > 1)
24358 {
24359 rtx base, addr, tls_base = NULL_RTX;
24360
24361 collisions = 1;
24362
24363 base = part[0][nparts - 1];
24364
24365 /* Handle the case when the last part isn't valid for lea.
24366 Happens in 64-bit mode storing the 12-byte XFmode. */
24367 if (GET_MODE (base) != Pmode)
24368 base = gen_rtx_REG (Pmode, REGNO (base));
24369
24370 addr = XEXP (part[1][0], 0);
24371 if (TARGET_TLS_DIRECT_SEG_REFS)
24372 {
24373 struct ix86_address parts;
24374 int ok = ix86_decompose_address (addr, &parts);
24375 gcc_assert (ok);
24376 if (parts.seg == DEFAULT_TLS_SEG_REG)
24377 {
24378 /* It is not valid to use %gs: or %fs: in
24379 lea though, so we need to remove it from the
24380 address used for lea and add it to each individual
24381 memory loads instead. */
24382 addr = copy_rtx (addr);
24383 rtx *x = &addr;
24384 while (GET_CODE (*x) == PLUS)
24385 {
24386 for (i = 0; i < 2; i++)
24387 {
24388 rtx u = XEXP (*x, i);
24389 if (GET_CODE (u) == ZERO_EXTEND)
24390 u = XEXP (u, 0);
24391 if (GET_CODE (u) == UNSPEC
24392 && XINT (u, 1) == UNSPEC_TP)
24393 {
24394 tls_base = XEXP (*x, i);
24395 *x = XEXP (*x, 1 - i);
24396 break;
24397 }
24398 }
24399 if (tls_base)
24400 break;
24401 x = &XEXP (*x, 0);
24402 }
24403 gcc_assert (tls_base);
24404 }
24405 }
24406 emit_insn (gen_rtx_SET (base, addr));
24407 if (tls_base)
24408 base = gen_rtx_PLUS (GET_MODE (base), base, tls_base);
24409 part[1][0] = replace_equiv_address (part[1][0], base);
24410 for (i = 1; i < nparts; i++)
24411 {
24412 if (tls_base)
24413 base = copy_rtx (base);
24414 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
24415 part[1][i] = replace_equiv_address (part[1][i], tmp);
24416 }
24417 }
24418 }
24419
24420 if (push)
24421 {
24422 if (!TARGET_64BIT)
24423 {
24424 if (nparts == 3)
24425 {
24426 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
24427 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
24428 stack_pointer_rtx, GEN_INT (-4)));
24429 emit_move_insn (part[0][2], part[1][2]);
24430 }
24431 else if (nparts == 4)
24432 {
24433 emit_move_insn (part[0][3], part[1][3]);
24434 emit_move_insn (part[0][2], part[1][2]);
24435 }
24436 }
24437 else
24438 {
24439 /* In 64bit mode we don't have 32bit push available. In case this is
24440 register, it is OK - we will just use larger counterpart. We also
24441 retype memory - these comes from attempt to avoid REX prefix on
24442 moving of second half of TFmode value. */
24443 if (GET_MODE (part[1][1]) == SImode)
24444 {
24445 switch (GET_CODE (part[1][1]))
24446 {
24447 case MEM:
24448 part[1][1] = adjust_address (part[1][1], DImode, 0);
24449 break;
24450
24451 case REG:
24452 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
24453 break;
24454
24455 default:
24456 gcc_unreachable ();
24457 }
24458
24459 if (GET_MODE (part[1][0]) == SImode)
24460 part[1][0] = part[1][1];
24461 }
24462 }
24463 emit_move_insn (part[0][1], part[1][1]);
24464 emit_move_insn (part[0][0], part[1][0]);
24465 return;
24466 }
24467
24468 /* Choose correct order to not overwrite the source before it is copied. */
24469 if ((REG_P (part[0][0])
24470 && REG_P (part[1][1])
24471 && (REGNO (part[0][0]) == REGNO (part[1][1])
24472 || (nparts == 3
24473 && REGNO (part[0][0]) == REGNO (part[1][2]))
24474 || (nparts == 4
24475 && REGNO (part[0][0]) == REGNO (part[1][3]))))
24476 || (collisions > 0
24477 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
24478 {
24479 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
24480 {
24481 operands[2 + i] = part[0][j];
24482 operands[6 + i] = part[1][j];
24483 }
24484 }
24485 else
24486 {
24487 for (i = 0; i < nparts; i++)
24488 {
24489 operands[2 + i] = part[0][i];
24490 operands[6 + i] = part[1][i];
24491 }
24492 }
24493
24494 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
24495 if (optimize_insn_for_size_p ())
24496 {
24497 for (j = 0; j < nparts - 1; j++)
24498 if (CONST_INT_P (operands[6 + j])
24499 && operands[6 + j] != const0_rtx
24500 && REG_P (operands[2 + j]))
24501 for (i = j; i < nparts - 1; i++)
24502 if (CONST_INT_P (operands[7 + i])
24503 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
24504 operands[7 + i] = operands[2 + j];
24505 }
24506
24507 for (i = 0; i < nparts; i++)
24508 emit_move_insn (operands[2 + i], operands[6 + i]);
24509
24510 return;
24511 }
24512
24513 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
24514 left shift by a constant, either using a single shift or
24515 a sequence of add instructions. */
24516
24517 static void
24518 ix86_expand_ashl_const (rtx operand, int count, machine_mode mode)
24519 {
24520 rtx (*insn)(rtx, rtx, rtx);
24521
24522 if (count == 1
24523 || (count * ix86_cost->add <= ix86_cost->shift_const
24524 && !optimize_insn_for_size_p ()))
24525 {
24526 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
24527 while (count-- > 0)
24528 emit_insn (insn (operand, operand, operand));
24529 }
24530 else
24531 {
24532 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
24533 emit_insn (insn (operand, operand, GEN_INT (count)));
24534 }
24535 }
24536
24537 void
24538 ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
24539 {
24540 rtx (*gen_ashl3)(rtx, rtx, rtx);
24541 rtx (*gen_shld)(rtx, rtx, rtx);
24542 int half_width = GET_MODE_BITSIZE (mode) >> 1;
24543
24544 rtx low[2], high[2];
24545 int count;
24546
24547 if (CONST_INT_P (operands[2]))
24548 {
24549 split_double_mode (mode, operands, 2, low, high);
24550 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
24551
24552 if (count >= half_width)
24553 {
24554 emit_move_insn (high[0], low[1]);
24555 emit_move_insn (low[0], const0_rtx);
24556
24557 if (count > half_width)
24558 ix86_expand_ashl_const (high[0], count - half_width, mode);
24559 }
24560 else
24561 {
24562 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
24563
24564 if (!rtx_equal_p (operands[0], operands[1]))
24565 emit_move_insn (operands[0], operands[1]);
24566
24567 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
24568 ix86_expand_ashl_const (low[0], count, mode);
24569 }
24570 return;
24571 }
24572
24573 split_double_mode (mode, operands, 1, low, high);
24574
24575 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
24576
24577 if (operands[1] == const1_rtx)
24578 {
24579 /* Assuming we've chosen a QImode capable registers, then 1 << N
24580 can be done with two 32/64-bit shifts, no branches, no cmoves. */
24581 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
24582 {
24583 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
24584
24585 ix86_expand_clear (low[0]);
24586 ix86_expand_clear (high[0]);
24587 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
24588
24589 d = gen_lowpart (QImode, low[0]);
24590 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
24591 s = gen_rtx_EQ (QImode, flags, const0_rtx);
24592 emit_insn (gen_rtx_SET (d, s));
24593
24594 d = gen_lowpart (QImode, high[0]);
24595 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
24596 s = gen_rtx_NE (QImode, flags, const0_rtx);
24597 emit_insn (gen_rtx_SET (d, s));
24598 }
24599
24600 /* Otherwise, we can get the same results by manually performing
24601 a bit extract operation on bit 5/6, and then performing the two
24602 shifts. The two methods of getting 0/1 into low/high are exactly
24603 the same size. Avoiding the shift in the bit extract case helps
24604 pentium4 a bit; no one else seems to care much either way. */
24605 else
24606 {
24607 machine_mode half_mode;
24608 rtx (*gen_lshr3)(rtx, rtx, rtx);
24609 rtx (*gen_and3)(rtx, rtx, rtx);
24610 rtx (*gen_xor3)(rtx, rtx, rtx);
24611 HOST_WIDE_INT bits;
24612 rtx x;
24613
24614 if (mode == DImode)
24615 {
24616 half_mode = SImode;
24617 gen_lshr3 = gen_lshrsi3;
24618 gen_and3 = gen_andsi3;
24619 gen_xor3 = gen_xorsi3;
24620 bits = 5;
24621 }
24622 else
24623 {
24624 half_mode = DImode;
24625 gen_lshr3 = gen_lshrdi3;
24626 gen_and3 = gen_anddi3;
24627 gen_xor3 = gen_xordi3;
24628 bits = 6;
24629 }
24630
24631 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
24632 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
24633 else
24634 x = gen_lowpart (half_mode, operands[2]);
24635 emit_insn (gen_rtx_SET (high[0], x));
24636
24637 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
24638 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
24639 emit_move_insn (low[0], high[0]);
24640 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
24641 }
24642
24643 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
24644 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
24645 return;
24646 }
24647
24648 if (operands[1] == constm1_rtx)
24649 {
24650 /* For -1 << N, we can avoid the shld instruction, because we
24651 know that we're shifting 0...31/63 ones into a -1. */
24652 emit_move_insn (low[0], constm1_rtx);
24653 if (optimize_insn_for_size_p ())
24654 emit_move_insn (high[0], low[0]);
24655 else
24656 emit_move_insn (high[0], constm1_rtx);
24657 }
24658 else
24659 {
24660 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
24661
24662 if (!rtx_equal_p (operands[0], operands[1]))
24663 emit_move_insn (operands[0], operands[1]);
24664
24665 split_double_mode (mode, operands, 1, low, high);
24666 emit_insn (gen_shld (high[0], low[0], operands[2]));
24667 }
24668
24669 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
24670
24671 if (TARGET_CMOVE && scratch)
24672 {
24673 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
24674 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
24675
24676 ix86_expand_clear (scratch);
24677 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
24678 }
24679 else
24680 {
24681 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
24682 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
24683
24684 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
24685 }
24686 }
24687
24688 void
24689 ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode)
24690 {
24691 rtx (*gen_ashr3)(rtx, rtx, rtx)
24692 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
24693 rtx (*gen_shrd)(rtx, rtx, rtx);
24694 int half_width = GET_MODE_BITSIZE (mode) >> 1;
24695
24696 rtx low[2], high[2];
24697 int count;
24698
24699 if (CONST_INT_P (operands[2]))
24700 {
24701 split_double_mode (mode, operands, 2, low, high);
24702 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
24703
24704 if (count == GET_MODE_BITSIZE (mode) - 1)
24705 {
24706 emit_move_insn (high[0], high[1]);
24707 emit_insn (gen_ashr3 (high[0], high[0],
24708 GEN_INT (half_width - 1)));
24709 emit_move_insn (low[0], high[0]);
24710
24711 }
24712 else if (count >= half_width)
24713 {
24714 emit_move_insn (low[0], high[1]);
24715 emit_move_insn (high[0], low[0]);
24716 emit_insn (gen_ashr3 (high[0], high[0],
24717 GEN_INT (half_width - 1)));
24718
24719 if (count > half_width)
24720 emit_insn (gen_ashr3 (low[0], low[0],
24721 GEN_INT (count - half_width)));
24722 }
24723 else
24724 {
24725 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
24726
24727 if (!rtx_equal_p (operands[0], operands[1]))
24728 emit_move_insn (operands[0], operands[1]);
24729
24730 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
24731 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
24732 }
24733 }
24734 else
24735 {
24736 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
24737
24738 if (!rtx_equal_p (operands[0], operands[1]))
24739 emit_move_insn (operands[0], operands[1]);
24740
24741 split_double_mode (mode, operands, 1, low, high);
24742
24743 emit_insn (gen_shrd (low[0], high[0], operands[2]));
24744 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
24745
24746 if (TARGET_CMOVE && scratch)
24747 {
24748 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
24749 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
24750
24751 emit_move_insn (scratch, high[0]);
24752 emit_insn (gen_ashr3 (scratch, scratch,
24753 GEN_INT (half_width - 1)));
24754 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
24755 scratch));
24756 }
24757 else
24758 {
24759 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
24760 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
24761
24762 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
24763 }
24764 }
24765 }
24766
24767 void
24768 ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
24769 {
24770 rtx (*gen_lshr3)(rtx, rtx, rtx)
24771 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
24772 rtx (*gen_shrd)(rtx, rtx, rtx);
24773 int half_width = GET_MODE_BITSIZE (mode) >> 1;
24774
24775 rtx low[2], high[2];
24776 int count;
24777
24778 if (CONST_INT_P (operands[2]))
24779 {
24780 split_double_mode (mode, operands, 2, low, high);
24781 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
24782
24783 if (count >= half_width)
24784 {
24785 emit_move_insn (low[0], high[1]);
24786 ix86_expand_clear (high[0]);
24787
24788 if (count > half_width)
24789 emit_insn (gen_lshr3 (low[0], low[0],
24790 GEN_INT (count - half_width)));
24791 }
24792 else
24793 {
24794 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
24795
24796 if (!rtx_equal_p (operands[0], operands[1]))
24797 emit_move_insn (operands[0], operands[1]);
24798
24799 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
24800 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
24801 }
24802 }
24803 else
24804 {
24805 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
24806
24807 if (!rtx_equal_p (operands[0], operands[1]))
24808 emit_move_insn (operands[0], operands[1]);
24809
24810 split_double_mode (mode, operands, 1, low, high);
24811
24812 emit_insn (gen_shrd (low[0], high[0], operands[2]));
24813 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
24814
24815 if (TARGET_CMOVE && scratch)
24816 {
24817 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
24818 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
24819
24820 ix86_expand_clear (scratch);
24821 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
24822 scratch));
24823 }
24824 else
24825 {
24826 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
24827 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
24828
24829 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
24830 }
24831 }
24832 }
24833
24834 /* Predict just emitted jump instruction to be taken with probability PROB. */
24835 static void
24836 predict_jump (int prob)
24837 {
24838 rtx insn = get_last_insn ();
24839 gcc_assert (JUMP_P (insn));
24840 add_int_reg_note (insn, REG_BR_PROB, prob);
24841 }
24842
24843 /* Helper function for the string operations below. Dest VARIABLE whether
24844 it is aligned to VALUE bytes. If true, jump to the label. */
24845 static rtx_code_label *
24846 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
24847 {
24848 rtx_code_label *label = gen_label_rtx ();
24849 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
24850 if (GET_MODE (variable) == DImode)
24851 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
24852 else
24853 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
24854 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
24855 1, label);
24856 if (epilogue)
24857 predict_jump (REG_BR_PROB_BASE * 50 / 100);
24858 else
24859 predict_jump (REG_BR_PROB_BASE * 90 / 100);
24860 return label;
24861 }
24862
24863 /* Adjust COUNTER by the VALUE. */
24864 static void
24865 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
24866 {
24867 rtx (*gen_add)(rtx, rtx, rtx)
24868 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
24869
24870 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
24871 }
24872
24873 /* Zero extend possibly SImode EXP to Pmode register. */
24874 rtx
24875 ix86_zero_extend_to_Pmode (rtx exp)
24876 {
24877 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
24878 }
24879
24880 /* Divide COUNTREG by SCALE. */
24881 static rtx
24882 scale_counter (rtx countreg, int scale)
24883 {
24884 rtx sc;
24885
24886 if (scale == 1)
24887 return countreg;
24888 if (CONST_INT_P (countreg))
24889 return GEN_INT (INTVAL (countreg) / scale);
24890 gcc_assert (REG_P (countreg));
24891
24892 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
24893 GEN_INT (exact_log2 (scale)),
24894 NULL, 1, OPTAB_DIRECT);
24895 return sc;
24896 }
24897
24898 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
24899 DImode for constant loop counts. */
24900
24901 static machine_mode
24902 counter_mode (rtx count_exp)
24903 {
24904 if (GET_MODE (count_exp) != VOIDmode)
24905 return GET_MODE (count_exp);
24906 if (!CONST_INT_P (count_exp))
24907 return Pmode;
24908 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
24909 return DImode;
24910 return SImode;
24911 }
24912
24913 /* Copy the address to a Pmode register. This is used for x32 to
24914 truncate DImode TLS address to a SImode register. */
24915
24916 static rtx
24917 ix86_copy_addr_to_reg (rtx addr)
24918 {
24919 rtx reg;
24920 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
24921 {
24922 reg = copy_addr_to_reg (addr);
24923 REG_POINTER (reg) = 1;
24924 return reg;
24925 }
24926 else
24927 {
24928 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
24929 reg = copy_to_mode_reg (DImode, addr);
24930 REG_POINTER (reg) = 1;
24931 return gen_rtx_SUBREG (SImode, reg, 0);
24932 }
24933 }
24934
24935 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
24936 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
24937 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
24938 memory by VALUE (supposed to be in MODE).
24939
24940 The size is rounded down to whole number of chunk size moved at once.
24941 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
24942
24943
24944 static void
24945 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
24946 rtx destptr, rtx srcptr, rtx value,
24947 rtx count, machine_mode mode, int unroll,
24948 int expected_size, bool issetmem)
24949 {
24950 rtx_code_label *out_label, *top_label;
24951 rtx iter, tmp;
24952 machine_mode iter_mode = counter_mode (count);
24953 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
24954 rtx piece_size = GEN_INT (piece_size_n);
24955 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
24956 rtx size;
24957 int i;
24958
24959 top_label = gen_label_rtx ();
24960 out_label = gen_label_rtx ();
24961 iter = gen_reg_rtx (iter_mode);
24962
24963 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
24964 NULL, 1, OPTAB_DIRECT);
24965 /* Those two should combine. */
24966 if (piece_size == const1_rtx)
24967 {
24968 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
24969 true, out_label);
24970 predict_jump (REG_BR_PROB_BASE * 10 / 100);
24971 }
24972 emit_move_insn (iter, const0_rtx);
24973
24974 emit_label (top_label);
24975
24976 tmp = convert_modes (Pmode, iter_mode, iter, true);
24977
24978 /* This assert could be relaxed - in this case we'll need to compute
24979 smallest power of two, containing in PIECE_SIZE_N and pass it to
24980 offset_address. */
24981 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
24982 destmem = offset_address (destmem, tmp, piece_size_n);
24983 destmem = adjust_address (destmem, mode, 0);
24984
24985 if (!issetmem)
24986 {
24987 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
24988 srcmem = adjust_address (srcmem, mode, 0);
24989
24990 /* When unrolling for chips that reorder memory reads and writes,
24991 we can save registers by using single temporary.
24992 Also using 4 temporaries is overkill in 32bit mode. */
24993 if (!TARGET_64BIT && 0)
24994 {
24995 for (i = 0; i < unroll; i++)
24996 {
24997 if (i)
24998 {
24999 destmem =
25000 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
25001 srcmem =
25002 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
25003 }
25004 emit_move_insn (destmem, srcmem);
25005 }
25006 }
25007 else
25008 {
25009 rtx tmpreg[4];
25010 gcc_assert (unroll <= 4);
25011 for (i = 0; i < unroll; i++)
25012 {
25013 tmpreg[i] = gen_reg_rtx (mode);
25014 if (i)
25015 {
25016 srcmem =
25017 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
25018 }
25019 emit_move_insn (tmpreg[i], srcmem);
25020 }
25021 for (i = 0; i < unroll; i++)
25022 {
25023 if (i)
25024 {
25025 destmem =
25026 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
25027 }
25028 emit_move_insn (destmem, tmpreg[i]);
25029 }
25030 }
25031 }
25032 else
25033 for (i = 0; i < unroll; i++)
25034 {
25035 if (i)
25036 destmem =
25037 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
25038 emit_move_insn (destmem, value);
25039 }
25040
25041 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
25042 true, OPTAB_LIB_WIDEN);
25043 if (tmp != iter)
25044 emit_move_insn (iter, tmp);
25045
25046 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
25047 true, top_label);
25048 if (expected_size != -1)
25049 {
25050 expected_size /= GET_MODE_SIZE (mode) * unroll;
25051 if (expected_size == 0)
25052 predict_jump (0);
25053 else if (expected_size > REG_BR_PROB_BASE)
25054 predict_jump (REG_BR_PROB_BASE - 1);
25055 else
25056 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
25057 }
25058 else
25059 predict_jump (REG_BR_PROB_BASE * 80 / 100);
25060 iter = ix86_zero_extend_to_Pmode (iter);
25061 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
25062 true, OPTAB_LIB_WIDEN);
25063 if (tmp != destptr)
25064 emit_move_insn (destptr, tmp);
25065 if (!issetmem)
25066 {
25067 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
25068 true, OPTAB_LIB_WIDEN);
25069 if (tmp != srcptr)
25070 emit_move_insn (srcptr, tmp);
25071 }
25072 emit_label (out_label);
25073 }
25074
25075 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
25076 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
25077 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
25078 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
25079 ORIG_VALUE is the original value passed to memset to fill the memory with.
25080 Other arguments have same meaning as for previous function. */
25081
25082 static void
25083 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
25084 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
25085 rtx count,
25086 machine_mode mode, bool issetmem)
25087 {
25088 rtx destexp;
25089 rtx srcexp;
25090 rtx countreg;
25091 HOST_WIDE_INT rounded_count;
25092
25093 /* If possible, it is shorter to use rep movs.
25094 TODO: Maybe it is better to move this logic to decide_alg. */
25095 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
25096 && (!issetmem || orig_value == const0_rtx))
25097 mode = SImode;
25098
25099 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
25100 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
25101
25102 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
25103 GET_MODE_SIZE (mode)));
25104 if (mode != QImode)
25105 {
25106 destexp = gen_rtx_ASHIFT (Pmode, countreg,
25107 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
25108 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
25109 }
25110 else
25111 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
25112 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
25113 {
25114 rounded_count
25115 = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode));
25116 destmem = shallow_copy_rtx (destmem);
25117 set_mem_size (destmem, rounded_count);
25118 }
25119 else if (MEM_SIZE_KNOWN_P (destmem))
25120 clear_mem_size (destmem);
25121
25122 if (issetmem)
25123 {
25124 value = force_reg (mode, gen_lowpart (mode, value));
25125 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
25126 }
25127 else
25128 {
25129 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
25130 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
25131 if (mode != QImode)
25132 {
25133 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
25134 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
25135 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
25136 }
25137 else
25138 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
25139 if (CONST_INT_P (count))
25140 {
25141 rounded_count
25142 = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode));
25143 srcmem = shallow_copy_rtx (srcmem);
25144 set_mem_size (srcmem, rounded_count);
25145 }
25146 else
25147 {
25148 if (MEM_SIZE_KNOWN_P (srcmem))
25149 clear_mem_size (srcmem);
25150 }
25151 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
25152 destexp, srcexp));
25153 }
25154 }
25155
25156 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
25157 DESTMEM.
25158 SRC is passed by pointer to be updated on return.
25159 Return value is updated DST. */
25160 static rtx
25161 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
25162 HOST_WIDE_INT size_to_move)
25163 {
25164 rtx dst = destmem, src = *srcmem, adjust, tempreg;
25165 enum insn_code code;
25166 machine_mode move_mode;
25167 int piece_size, i;
25168
25169 /* Find the widest mode in which we could perform moves.
25170 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
25171 it until move of such size is supported. */
25172 piece_size = 1 << floor_log2 (size_to_move);
25173 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
25174 code = optab_handler (mov_optab, move_mode);
25175 while (code == CODE_FOR_nothing && piece_size > 1)
25176 {
25177 piece_size >>= 1;
25178 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
25179 code = optab_handler (mov_optab, move_mode);
25180 }
25181
25182 /* Find the corresponding vector mode with the same size as MOVE_MODE.
25183 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
25184 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
25185 {
25186 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
25187 move_mode = mode_for_vector (word_mode, nunits);
25188 code = optab_handler (mov_optab, move_mode);
25189 if (code == CODE_FOR_nothing)
25190 {
25191 move_mode = word_mode;
25192 piece_size = GET_MODE_SIZE (move_mode);
25193 code = optab_handler (mov_optab, move_mode);
25194 }
25195 }
25196 gcc_assert (code != CODE_FOR_nothing);
25197
25198 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
25199 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
25200
25201 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
25202 gcc_assert (size_to_move % piece_size == 0);
25203 adjust = GEN_INT (piece_size);
25204 for (i = 0; i < size_to_move; i += piece_size)
25205 {
25206 /* We move from memory to memory, so we'll need to do it via
25207 a temporary register. */
25208 tempreg = gen_reg_rtx (move_mode);
25209 emit_insn (GEN_FCN (code) (tempreg, src));
25210 emit_insn (GEN_FCN (code) (dst, tempreg));
25211
25212 emit_move_insn (destptr,
25213 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
25214 emit_move_insn (srcptr,
25215 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
25216
25217 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
25218 piece_size);
25219 src = adjust_automodify_address_nv (src, move_mode, srcptr,
25220 piece_size);
25221 }
25222
25223 /* Update DST and SRC rtx. */
25224 *srcmem = src;
25225 return dst;
25226 }
25227
25228 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
25229 static void
25230 expand_movmem_epilogue (rtx destmem, rtx srcmem,
25231 rtx destptr, rtx srcptr, rtx count, int max_size)
25232 {
25233 rtx src, dest;
25234 if (CONST_INT_P (count))
25235 {
25236 HOST_WIDE_INT countval = INTVAL (count);
25237 HOST_WIDE_INT epilogue_size = countval % max_size;
25238 int i;
25239
25240 /* For now MAX_SIZE should be a power of 2. This assert could be
25241 relaxed, but it'll require a bit more complicated epilogue
25242 expanding. */
25243 gcc_assert ((max_size & (max_size - 1)) == 0);
25244 for (i = max_size; i >= 1; i >>= 1)
25245 {
25246 if (epilogue_size & i)
25247 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
25248 }
25249 return;
25250 }
25251 if (max_size > 8)
25252 {
25253 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
25254 count, 1, OPTAB_DIRECT);
25255 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
25256 count, QImode, 1, 4, false);
25257 return;
25258 }
25259
25260 /* When there are stringops, we can cheaply increase dest and src pointers.
25261 Otherwise we save code size by maintaining offset (zero is readily
25262 available from preceding rep operation) and using x86 addressing modes.
25263 */
25264 if (TARGET_SINGLE_STRINGOP)
25265 {
25266 if (max_size > 4)
25267 {
25268 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
25269 src = change_address (srcmem, SImode, srcptr);
25270 dest = change_address (destmem, SImode, destptr);
25271 emit_insn (gen_strmov (destptr, dest, srcptr, src));
25272 emit_label (label);
25273 LABEL_NUSES (label) = 1;
25274 }
25275 if (max_size > 2)
25276 {
25277 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
25278 src = change_address (srcmem, HImode, srcptr);
25279 dest = change_address (destmem, HImode, destptr);
25280 emit_insn (gen_strmov (destptr, dest, srcptr, src));
25281 emit_label (label);
25282 LABEL_NUSES (label) = 1;
25283 }
25284 if (max_size > 1)
25285 {
25286 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
25287 src = change_address (srcmem, QImode, srcptr);
25288 dest = change_address (destmem, QImode, destptr);
25289 emit_insn (gen_strmov (destptr, dest, srcptr, src));
25290 emit_label (label);
25291 LABEL_NUSES (label) = 1;
25292 }
25293 }
25294 else
25295 {
25296 rtx offset = force_reg (Pmode, const0_rtx);
25297 rtx tmp;
25298
25299 if (max_size > 4)
25300 {
25301 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
25302 src = change_address (srcmem, SImode, srcptr);
25303 dest = change_address (destmem, SImode, destptr);
25304 emit_move_insn (dest, src);
25305 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
25306 true, OPTAB_LIB_WIDEN);
25307 if (tmp != offset)
25308 emit_move_insn (offset, tmp);
25309 emit_label (label);
25310 LABEL_NUSES (label) = 1;
25311 }
25312 if (max_size > 2)
25313 {
25314 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
25315 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
25316 src = change_address (srcmem, HImode, tmp);
25317 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
25318 dest = change_address (destmem, HImode, tmp);
25319 emit_move_insn (dest, src);
25320 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
25321 true, OPTAB_LIB_WIDEN);
25322 if (tmp != offset)
25323 emit_move_insn (offset, tmp);
25324 emit_label (label);
25325 LABEL_NUSES (label) = 1;
25326 }
25327 if (max_size > 1)
25328 {
25329 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
25330 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
25331 src = change_address (srcmem, QImode, tmp);
25332 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
25333 dest = change_address (destmem, QImode, tmp);
25334 emit_move_insn (dest, src);
25335 emit_label (label);
25336 LABEL_NUSES (label) = 1;
25337 }
25338 }
25339 }
25340
25341 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
25342 with value PROMOTED_VAL.
25343 SRC is passed by pointer to be updated on return.
25344 Return value is updated DST. */
25345 static rtx
25346 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
25347 HOST_WIDE_INT size_to_move)
25348 {
25349 rtx dst = destmem, adjust;
25350 enum insn_code code;
25351 machine_mode move_mode;
25352 int piece_size, i;
25353
25354 /* Find the widest mode in which we could perform moves.
25355 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
25356 it until move of such size is supported. */
25357 move_mode = GET_MODE (promoted_val);
25358 if (move_mode == VOIDmode)
25359 move_mode = QImode;
25360 if (size_to_move < GET_MODE_SIZE (move_mode))
25361 {
25362 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
25363 promoted_val = gen_lowpart (move_mode, promoted_val);
25364 }
25365 piece_size = GET_MODE_SIZE (move_mode);
25366 code = optab_handler (mov_optab, move_mode);
25367 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
25368
25369 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
25370
25371 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
25372 gcc_assert (size_to_move % piece_size == 0);
25373 adjust = GEN_INT (piece_size);
25374 for (i = 0; i < size_to_move; i += piece_size)
25375 {
25376 if (piece_size <= GET_MODE_SIZE (word_mode))
25377 {
25378 emit_insn (gen_strset (destptr, dst, promoted_val));
25379 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
25380 piece_size);
25381 continue;
25382 }
25383
25384 emit_insn (GEN_FCN (code) (dst, promoted_val));
25385
25386 emit_move_insn (destptr,
25387 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
25388
25389 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
25390 piece_size);
25391 }
25392
25393 /* Update DST rtx. */
25394 return dst;
25395 }
25396 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
25397 static void
25398 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
25399 rtx count, int max_size)
25400 {
25401 count =
25402 expand_simple_binop (counter_mode (count), AND, count,
25403 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
25404 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
25405 gen_lowpart (QImode, value), count, QImode,
25406 1, max_size / 2, true);
25407 }
25408
25409 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
25410 static void
25411 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
25412 rtx count, int max_size)
25413 {
25414 rtx dest;
25415
25416 if (CONST_INT_P (count))
25417 {
25418 HOST_WIDE_INT countval = INTVAL (count);
25419 HOST_WIDE_INT epilogue_size = countval % max_size;
25420 int i;
25421
25422 /* For now MAX_SIZE should be a power of 2. This assert could be
25423 relaxed, but it'll require a bit more complicated epilogue
25424 expanding. */
25425 gcc_assert ((max_size & (max_size - 1)) == 0);
25426 for (i = max_size; i >= 1; i >>= 1)
25427 {
25428 if (epilogue_size & i)
25429 {
25430 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
25431 destmem = emit_memset (destmem, destptr, vec_value, i);
25432 else
25433 destmem = emit_memset (destmem, destptr, value, i);
25434 }
25435 }
25436 return;
25437 }
25438 if (max_size > 32)
25439 {
25440 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
25441 return;
25442 }
25443 if (max_size > 16)
25444 {
25445 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
25446 if (TARGET_64BIT)
25447 {
25448 dest = change_address (destmem, DImode, destptr);
25449 emit_insn (gen_strset (destptr, dest, value));
25450 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
25451 emit_insn (gen_strset (destptr, dest, value));
25452 }
25453 else
25454 {
25455 dest = change_address (destmem, SImode, destptr);
25456 emit_insn (gen_strset (destptr, dest, value));
25457 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
25458 emit_insn (gen_strset (destptr, dest, value));
25459 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
25460 emit_insn (gen_strset (destptr, dest, value));
25461 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
25462 emit_insn (gen_strset (destptr, dest, value));
25463 }
25464 emit_label (label);
25465 LABEL_NUSES (label) = 1;
25466 }
25467 if (max_size > 8)
25468 {
25469 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
25470 if (TARGET_64BIT)
25471 {
25472 dest = change_address (destmem, DImode, destptr);
25473 emit_insn (gen_strset (destptr, dest, value));
25474 }
25475 else
25476 {
25477 dest = change_address (destmem, SImode, destptr);
25478 emit_insn (gen_strset (destptr, dest, value));
25479 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
25480 emit_insn (gen_strset (destptr, dest, value));
25481 }
25482 emit_label (label);
25483 LABEL_NUSES (label) = 1;
25484 }
25485 if (max_size > 4)
25486 {
25487 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
25488 dest = change_address (destmem, SImode, destptr);
25489 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
25490 emit_label (label);
25491 LABEL_NUSES (label) = 1;
25492 }
25493 if (max_size > 2)
25494 {
25495 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
25496 dest = change_address (destmem, HImode, destptr);
25497 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
25498 emit_label (label);
25499 LABEL_NUSES (label) = 1;
25500 }
25501 if (max_size > 1)
25502 {
25503 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
25504 dest = change_address (destmem, QImode, destptr);
25505 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
25506 emit_label (label);
25507 LABEL_NUSES (label) = 1;
25508 }
25509 }
25510
25511 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
25512 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
25513 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
25514 ignored.
25515 Return value is updated DESTMEM. */
25516 static rtx
25517 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
25518 rtx destptr, rtx srcptr, rtx value,
25519 rtx vec_value, rtx count, int align,
25520 int desired_alignment, bool issetmem)
25521 {
25522 int i;
25523 for (i = 1; i < desired_alignment; i <<= 1)
25524 {
25525 if (align <= i)
25526 {
25527 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
25528 if (issetmem)
25529 {
25530 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
25531 destmem = emit_memset (destmem, destptr, vec_value, i);
25532 else
25533 destmem = emit_memset (destmem, destptr, value, i);
25534 }
25535 else
25536 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
25537 ix86_adjust_counter (count, i);
25538 emit_label (label);
25539 LABEL_NUSES (label) = 1;
25540 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
25541 }
25542 }
25543 return destmem;
25544 }
25545
25546 /* Test if COUNT&SIZE is nonzero and if so, expand movme
25547 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
25548 and jump to DONE_LABEL. */
25549 static void
25550 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
25551 rtx destptr, rtx srcptr,
25552 rtx value, rtx vec_value,
25553 rtx count, int size,
25554 rtx done_label, bool issetmem)
25555 {
25556 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
25557 machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
25558 rtx modesize;
25559 int n;
25560
25561 /* If we do not have vector value to copy, we must reduce size. */
25562 if (issetmem)
25563 {
25564 if (!vec_value)
25565 {
25566 if (GET_MODE (value) == VOIDmode && size > 8)
25567 mode = Pmode;
25568 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
25569 mode = GET_MODE (value);
25570 }
25571 else
25572 mode = GET_MODE (vec_value), value = vec_value;
25573 }
25574 else
25575 {
25576 /* Choose appropriate vector mode. */
25577 if (size >= 32)
25578 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
25579 else if (size >= 16)
25580 mode = TARGET_SSE ? V16QImode : DImode;
25581 srcmem = change_address (srcmem, mode, srcptr);
25582 }
25583 destmem = change_address (destmem, mode, destptr);
25584 modesize = GEN_INT (GET_MODE_SIZE (mode));
25585 gcc_assert (GET_MODE_SIZE (mode) <= size);
25586 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
25587 {
25588 if (issetmem)
25589 emit_move_insn (destmem, gen_lowpart (mode, value));
25590 else
25591 {
25592 emit_move_insn (destmem, srcmem);
25593 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
25594 }
25595 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
25596 }
25597
25598 destmem = offset_address (destmem, count, 1);
25599 destmem = offset_address (destmem, GEN_INT (-2 * size),
25600 GET_MODE_SIZE (mode));
25601 if (!issetmem)
25602 {
25603 srcmem = offset_address (srcmem, count, 1);
25604 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
25605 GET_MODE_SIZE (mode));
25606 }
25607 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
25608 {
25609 if (issetmem)
25610 emit_move_insn (destmem, gen_lowpart (mode, value));
25611 else
25612 {
25613 emit_move_insn (destmem, srcmem);
25614 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
25615 }
25616 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
25617 }
25618 emit_jump_insn (gen_jump (done_label));
25619 emit_barrier ();
25620
25621 emit_label (label);
25622 LABEL_NUSES (label) = 1;
25623 }
25624
25625 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
25626 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
25627 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
25628 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
25629 DONE_LABEL is a label after the whole copying sequence. The label is created
25630 on demand if *DONE_LABEL is NULL.
25631 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
25632 bounds after the initial copies.
25633
25634 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
25635 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
25636 we will dispatch to a library call for large blocks.
25637
25638 In pseudocode we do:
25639
25640 if (COUNT < SIZE)
25641 {
25642 Assume that SIZE is 4. Bigger sizes are handled analogously
25643 if (COUNT & 4)
25644 {
25645 copy 4 bytes from SRCPTR to DESTPTR
25646 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
25647 goto done_label
25648 }
25649 if (!COUNT)
25650 goto done_label;
25651 copy 1 byte from SRCPTR to DESTPTR
25652 if (COUNT & 2)
25653 {
25654 copy 2 bytes from SRCPTR to DESTPTR
25655 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
25656 }
25657 }
25658 else
25659 {
25660 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
25661 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
25662
25663 OLD_DESPTR = DESTPTR;
25664 Align DESTPTR up to DESIRED_ALIGN
25665 SRCPTR += DESTPTR - OLD_DESTPTR
25666 COUNT -= DEST_PTR - OLD_DESTPTR
25667 if (DYNAMIC_CHECK)
25668 Round COUNT down to multiple of SIZE
25669 << optional caller supplied zero size guard is here >>
25670 << optional caller suppplied dynamic check is here >>
25671 << caller supplied main copy loop is here >>
25672 }
25673 done_label:
25674 */
25675 static void
25676 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
25677 rtx *destptr, rtx *srcptr,
25678 machine_mode mode,
25679 rtx value, rtx vec_value,
25680 rtx *count,
25681 rtx_code_label **done_label,
25682 int size,
25683 int desired_align,
25684 int align,
25685 unsigned HOST_WIDE_INT *min_size,
25686 bool dynamic_check,
25687 bool issetmem)
25688 {
25689 rtx_code_label *loop_label = NULL, *label;
25690 int n;
25691 rtx modesize;
25692 int prolog_size = 0;
25693 rtx mode_value;
25694
25695 /* Chose proper value to copy. */
25696 if (issetmem && VECTOR_MODE_P (mode))
25697 mode_value = vec_value;
25698 else
25699 mode_value = value;
25700 gcc_assert (GET_MODE_SIZE (mode) <= size);
25701
25702 /* See if block is big or small, handle small blocks. */
25703 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
25704 {
25705 int size2 = size;
25706 loop_label = gen_label_rtx ();
25707
25708 if (!*done_label)
25709 *done_label = gen_label_rtx ();
25710
25711 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
25712 1, loop_label);
25713 size2 >>= 1;
25714
25715 /* Handle sizes > 3. */
25716 for (;size2 > 2; size2 >>= 1)
25717 expand_small_movmem_or_setmem (destmem, srcmem,
25718 *destptr, *srcptr,
25719 value, vec_value,
25720 *count,
25721 size2, *done_label, issetmem);
25722 /* Nothing to copy? Jump to DONE_LABEL if so */
25723 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
25724 1, *done_label);
25725
25726 /* Do a byte copy. */
25727 destmem = change_address (destmem, QImode, *destptr);
25728 if (issetmem)
25729 emit_move_insn (destmem, gen_lowpart (QImode, value));
25730 else
25731 {
25732 srcmem = change_address (srcmem, QImode, *srcptr);
25733 emit_move_insn (destmem, srcmem);
25734 }
25735
25736 /* Handle sizes 2 and 3. */
25737 label = ix86_expand_aligntest (*count, 2, false);
25738 destmem = change_address (destmem, HImode, *destptr);
25739 destmem = offset_address (destmem, *count, 1);
25740 destmem = offset_address (destmem, GEN_INT (-2), 2);
25741 if (issetmem)
25742 emit_move_insn (destmem, gen_lowpart (HImode, value));
25743 else
25744 {
25745 srcmem = change_address (srcmem, HImode, *srcptr);
25746 srcmem = offset_address (srcmem, *count, 1);
25747 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
25748 emit_move_insn (destmem, srcmem);
25749 }
25750
25751 emit_label (label);
25752 LABEL_NUSES (label) = 1;
25753 emit_jump_insn (gen_jump (*done_label));
25754 emit_barrier ();
25755 }
25756 else
25757 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
25758 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
25759
25760 /* Start memcpy for COUNT >= SIZE. */
25761 if (loop_label)
25762 {
25763 emit_label (loop_label);
25764 LABEL_NUSES (loop_label) = 1;
25765 }
25766
25767 /* Copy first desired_align bytes. */
25768 if (!issetmem)
25769 srcmem = change_address (srcmem, mode, *srcptr);
25770 destmem = change_address (destmem, mode, *destptr);
25771 modesize = GEN_INT (GET_MODE_SIZE (mode));
25772 for (n = 0; prolog_size < desired_align - align; n++)
25773 {
25774 if (issetmem)
25775 emit_move_insn (destmem, mode_value);
25776 else
25777 {
25778 emit_move_insn (destmem, srcmem);
25779 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
25780 }
25781 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
25782 prolog_size += GET_MODE_SIZE (mode);
25783 }
25784
25785
25786 /* Copy last SIZE bytes. */
25787 destmem = offset_address (destmem, *count, 1);
25788 destmem = offset_address (destmem,
25789 GEN_INT (-size - prolog_size),
25790 1);
25791 if (issetmem)
25792 emit_move_insn (destmem, mode_value);
25793 else
25794 {
25795 srcmem = offset_address (srcmem, *count, 1);
25796 srcmem = offset_address (srcmem,
25797 GEN_INT (-size - prolog_size),
25798 1);
25799 emit_move_insn (destmem, srcmem);
25800 }
25801 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
25802 {
25803 destmem = offset_address (destmem, modesize, 1);
25804 if (issetmem)
25805 emit_move_insn (destmem, mode_value);
25806 else
25807 {
25808 srcmem = offset_address (srcmem, modesize, 1);
25809 emit_move_insn (destmem, srcmem);
25810 }
25811 }
25812
25813 /* Align destination. */
25814 if (desired_align > 1 && desired_align > align)
25815 {
25816 rtx saveddest = *destptr;
25817
25818 gcc_assert (desired_align <= size);
25819 /* Align destptr up, place it to new register. */
25820 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
25821 GEN_INT (prolog_size),
25822 NULL_RTX, 1, OPTAB_DIRECT);
25823 if (REG_P (*destptr) && REG_P (saveddest) && REG_POINTER (saveddest))
25824 REG_POINTER (*destptr) = 1;
25825 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
25826 GEN_INT (-desired_align),
25827 *destptr, 1, OPTAB_DIRECT);
25828 /* See how many bytes we skipped. */
25829 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
25830 *destptr,
25831 saveddest, 1, OPTAB_DIRECT);
25832 /* Adjust srcptr and count. */
25833 if (!issetmem)
25834 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr,
25835 saveddest, *srcptr, 1, OPTAB_DIRECT);
25836 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
25837 saveddest, *count, 1, OPTAB_DIRECT);
25838 /* We copied at most size + prolog_size. */
25839 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
25840 *min_size
25841 = ROUND_DOWN (*min_size - size, (unsigned HOST_WIDE_INT)size);
25842 else
25843 *min_size = 0;
25844
25845 /* Our loops always round down the bock size, but for dispatch to library
25846 we need precise value. */
25847 if (dynamic_check)
25848 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
25849 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
25850 }
25851 else
25852 {
25853 gcc_assert (prolog_size == 0);
25854 /* Decrease count, so we won't end up copying last word twice. */
25855 if (!CONST_INT_P (*count))
25856 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
25857 constm1_rtx, *count, 1, OPTAB_DIRECT);
25858 else
25859 *count = GEN_INT (ROUND_DOWN (UINTVAL (*count) - 1,
25860 (unsigned HOST_WIDE_INT)size));
25861 if (*min_size)
25862 *min_size = ROUND_DOWN (*min_size - 1, (unsigned HOST_WIDE_INT)size);
25863 }
25864 }
25865
25866
25867 /* This function is like the previous one, except here we know how many bytes
25868 need to be copied. That allows us to update alignment not only of DST, which
25869 is returned, but also of SRC, which is passed as a pointer for that
25870 reason. */
25871 static rtx
25872 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
25873 rtx srcreg, rtx value, rtx vec_value,
25874 int desired_align, int align_bytes,
25875 bool issetmem)
25876 {
25877 rtx src = NULL;
25878 rtx orig_dst = dst;
25879 rtx orig_src = NULL;
25880 int piece_size = 1;
25881 int copied_bytes = 0;
25882
25883 if (!issetmem)
25884 {
25885 gcc_assert (srcp != NULL);
25886 src = *srcp;
25887 orig_src = src;
25888 }
25889
25890 for (piece_size = 1;
25891 piece_size <= desired_align && copied_bytes < align_bytes;
25892 piece_size <<= 1)
25893 {
25894 if (align_bytes & piece_size)
25895 {
25896 if (issetmem)
25897 {
25898 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
25899 dst = emit_memset (dst, destreg, vec_value, piece_size);
25900 else
25901 dst = emit_memset (dst, destreg, value, piece_size);
25902 }
25903 else
25904 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
25905 copied_bytes += piece_size;
25906 }
25907 }
25908 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
25909 set_mem_align (dst, desired_align * BITS_PER_UNIT);
25910 if (MEM_SIZE_KNOWN_P (orig_dst))
25911 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
25912
25913 if (!issetmem)
25914 {
25915 int src_align_bytes = get_mem_align_offset (src, desired_align
25916 * BITS_PER_UNIT);
25917 if (src_align_bytes >= 0)
25918 src_align_bytes = desired_align - src_align_bytes;
25919 if (src_align_bytes >= 0)
25920 {
25921 unsigned int src_align;
25922 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
25923 {
25924 if ((src_align_bytes & (src_align - 1))
25925 == (align_bytes & (src_align - 1)))
25926 break;
25927 }
25928 if (src_align > (unsigned int) desired_align)
25929 src_align = desired_align;
25930 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
25931 set_mem_align (src, src_align * BITS_PER_UNIT);
25932 }
25933 if (MEM_SIZE_KNOWN_P (orig_src))
25934 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
25935 *srcp = src;
25936 }
25937
25938 return dst;
25939 }
25940
25941 /* Return true if ALG can be used in current context.
25942 Assume we expand memset if MEMSET is true. */
25943 static bool
25944 alg_usable_p (enum stringop_alg alg, bool memset, bool have_as)
25945 {
25946 if (alg == no_stringop)
25947 return false;
25948 if (alg == vector_loop)
25949 return TARGET_SSE || TARGET_AVX;
25950 /* Algorithms using the rep prefix want at least edi and ecx;
25951 additionally, memset wants eax and memcpy wants esi. Don't
25952 consider such algorithms if the user has appropriated those
25953 registers for their own purposes, or if we have a non-default
25954 address space, since some string insns cannot override the segment. */
25955 if (alg == rep_prefix_1_byte
25956 || alg == rep_prefix_4_byte
25957 || alg == rep_prefix_8_byte)
25958 {
25959 if (have_as)
25960 return false;
25961 if (fixed_regs[CX_REG]
25962 || fixed_regs[DI_REG]
25963 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]))
25964 return false;
25965 }
25966 return true;
25967 }
25968
25969 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
25970 static enum stringop_alg
25971 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
25972 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
25973 bool memset, bool zero_memset, bool have_as,
25974 int *dynamic_check, bool *noalign)
25975 {
25976 const struct stringop_algs * algs;
25977 bool optimize_for_speed;
25978 int max = 0;
25979 const struct processor_costs *cost;
25980 int i;
25981 bool any_alg_usable_p = false;
25982
25983 *noalign = false;
25984 *dynamic_check = -1;
25985
25986 /* Even if the string operation call is cold, we still might spend a lot
25987 of time processing large blocks. */
25988 if (optimize_function_for_size_p (cfun)
25989 || (optimize_insn_for_size_p ()
25990 && (max_size < 256
25991 || (expected_size != -1 && expected_size < 256))))
25992 optimize_for_speed = false;
25993 else
25994 optimize_for_speed = true;
25995
25996 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
25997 if (memset)
25998 algs = &cost->memset[TARGET_64BIT != 0];
25999 else
26000 algs = &cost->memcpy[TARGET_64BIT != 0];
26001
26002 /* See maximal size for user defined algorithm. */
26003 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
26004 {
26005 enum stringop_alg candidate = algs->size[i].alg;
26006 bool usable = alg_usable_p (candidate, memset, have_as);
26007 any_alg_usable_p |= usable;
26008
26009 if (candidate != libcall && candidate && usable)
26010 max = algs->size[i].max;
26011 }
26012
26013 /* If expected size is not known but max size is small enough
26014 so inline version is a win, set expected size into
26015 the range. */
26016 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
26017 && expected_size == -1)
26018 expected_size = min_size / 2 + max_size / 2;
26019
26020 /* If user specified the algorithm, honnor it if possible. */
26021 if (ix86_stringop_alg != no_stringop
26022 && alg_usable_p (ix86_stringop_alg, memset, have_as))
26023 return ix86_stringop_alg;
26024 /* rep; movq or rep; movl is the smallest variant. */
26025 else if (!optimize_for_speed)
26026 {
26027 *noalign = true;
26028 if (!count || (count & 3) || (memset && !zero_memset))
26029 return alg_usable_p (rep_prefix_1_byte, memset, have_as)
26030 ? rep_prefix_1_byte : loop_1_byte;
26031 else
26032 return alg_usable_p (rep_prefix_4_byte, memset, have_as)
26033 ? rep_prefix_4_byte : loop;
26034 }
26035 /* Very tiny blocks are best handled via the loop, REP is expensive to
26036 setup. */
26037 else if (expected_size != -1 && expected_size < 4)
26038 return loop_1_byte;
26039 else if (expected_size != -1)
26040 {
26041 enum stringop_alg alg = libcall;
26042 bool alg_noalign = false;
26043 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
26044 {
26045 /* We get here if the algorithms that were not libcall-based
26046 were rep-prefix based and we are unable to use rep prefixes
26047 based on global register usage. Break out of the loop and
26048 use the heuristic below. */
26049 if (algs->size[i].max == 0)
26050 break;
26051 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
26052 {
26053 enum stringop_alg candidate = algs->size[i].alg;
26054
26055 if (candidate != libcall
26056 && alg_usable_p (candidate, memset, have_as))
26057 {
26058 alg = candidate;
26059 alg_noalign = algs->size[i].noalign;
26060 }
26061 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
26062 last non-libcall inline algorithm. */
26063 if (TARGET_INLINE_ALL_STRINGOPS)
26064 {
26065 /* When the current size is best to be copied by a libcall,
26066 but we are still forced to inline, run the heuristic below
26067 that will pick code for medium sized blocks. */
26068 if (alg != libcall)
26069 {
26070 *noalign = alg_noalign;
26071 return alg;
26072 }
26073 else if (!any_alg_usable_p)
26074 break;
26075 }
26076 else if (alg_usable_p (candidate, memset, have_as))
26077 {
26078 *noalign = algs->size[i].noalign;
26079 return candidate;
26080 }
26081 }
26082 }
26083 }
26084 /* When asked to inline the call anyway, try to pick meaningful choice.
26085 We look for maximal size of block that is faster to copy by hand and
26086 take blocks of at most of that size guessing that average size will
26087 be roughly half of the block.
26088
26089 If this turns out to be bad, we might simply specify the preferred
26090 choice in ix86_costs. */
26091 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
26092 && (algs->unknown_size == libcall
26093 || !alg_usable_p (algs->unknown_size, memset, have_as)))
26094 {
26095 enum stringop_alg alg;
26096
26097 /* If there aren't any usable algorithms, then recursing on
26098 smaller sizes isn't going to find anything. Just return the
26099 simple byte-at-a-time copy loop. */
26100 if (!any_alg_usable_p)
26101 {
26102 /* Pick something reasonable. */
26103 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
26104 *dynamic_check = 128;
26105 return loop_1_byte;
26106 }
26107 if (max <= 0)
26108 max = 4096;
26109 alg = decide_alg (count, max / 2, min_size, max_size, memset,
26110 zero_memset, have_as, dynamic_check, noalign);
26111 gcc_assert (*dynamic_check == -1);
26112 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
26113 *dynamic_check = max;
26114 else
26115 gcc_assert (alg != libcall);
26116 return alg;
26117 }
26118 return (alg_usable_p (algs->unknown_size, memset, have_as)
26119 ? algs->unknown_size : libcall);
26120 }
26121
26122 /* Decide on alignment. We know that the operand is already aligned to ALIGN
26123 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
26124 static int
26125 decide_alignment (int align,
26126 enum stringop_alg alg,
26127 int expected_size,
26128 machine_mode move_mode)
26129 {
26130 int desired_align = 0;
26131
26132 gcc_assert (alg != no_stringop);
26133
26134 if (alg == libcall)
26135 return 0;
26136 if (move_mode == VOIDmode)
26137 return 0;
26138
26139 desired_align = GET_MODE_SIZE (move_mode);
26140 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
26141 copying whole cacheline at once. */
26142 if (TARGET_PENTIUMPRO
26143 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
26144 desired_align = 8;
26145
26146 if (optimize_size)
26147 desired_align = 1;
26148 if (desired_align < align)
26149 desired_align = align;
26150 if (expected_size != -1 && expected_size < 4)
26151 desired_align = align;
26152
26153 return desired_align;
26154 }
26155
26156
26157 /* Helper function for memcpy. For QImode value 0xXY produce
26158 0xXYXYXYXY of wide specified by MODE. This is essentially
26159 a * 0x10101010, but we can do slightly better than
26160 synth_mult by unwinding the sequence by hand on CPUs with
26161 slow multiply. */
26162 static rtx
26163 promote_duplicated_reg (machine_mode mode, rtx val)
26164 {
26165 machine_mode valmode = GET_MODE (val);
26166 rtx tmp;
26167 int nops = mode == DImode ? 3 : 2;
26168
26169 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
26170 if (val == const0_rtx)
26171 return copy_to_mode_reg (mode, CONST0_RTX (mode));
26172 if (CONST_INT_P (val))
26173 {
26174 HOST_WIDE_INT v = INTVAL (val) & 255;
26175
26176 v |= v << 8;
26177 v |= v << 16;
26178 if (mode == DImode)
26179 v |= (v << 16) << 16;
26180 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
26181 }
26182
26183 if (valmode == VOIDmode)
26184 valmode = QImode;
26185 if (valmode != QImode)
26186 val = gen_lowpart (QImode, val);
26187 if (mode == QImode)
26188 return val;
26189 if (!TARGET_PARTIAL_REG_STALL)
26190 nops--;
26191 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
26192 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
26193 <= (ix86_cost->shift_const + ix86_cost->add) * nops
26194 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
26195 {
26196 rtx reg = convert_modes (mode, QImode, val, true);
26197 tmp = promote_duplicated_reg (mode, const1_rtx);
26198 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
26199 OPTAB_DIRECT);
26200 }
26201 else
26202 {
26203 rtx reg = convert_modes (mode, QImode, val, true);
26204
26205 if (!TARGET_PARTIAL_REG_STALL)
26206 if (mode == SImode)
26207 emit_insn (gen_insvsi_1 (reg, reg));
26208 else
26209 emit_insn (gen_insvdi_1 (reg, reg));
26210 else
26211 {
26212 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
26213 NULL, 1, OPTAB_DIRECT);
26214 reg =
26215 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
26216 }
26217 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
26218 NULL, 1, OPTAB_DIRECT);
26219 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
26220 if (mode == SImode)
26221 return reg;
26222 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
26223 NULL, 1, OPTAB_DIRECT);
26224 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
26225 return reg;
26226 }
26227 }
26228
26229 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
26230 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
26231 alignment from ALIGN to DESIRED_ALIGN. */
26232 static rtx
26233 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
26234 int align)
26235 {
26236 rtx promoted_val;
26237
26238 if (TARGET_64BIT
26239 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
26240 promoted_val = promote_duplicated_reg (DImode, val);
26241 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
26242 promoted_val = promote_duplicated_reg (SImode, val);
26243 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
26244 promoted_val = promote_duplicated_reg (HImode, val);
26245 else
26246 promoted_val = val;
26247
26248 return promoted_val;
26249 }
26250
26251 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
26252 operations when profitable. The code depends upon architecture, block size
26253 and alignment, but always has one of the following overall structures:
26254
26255 Aligned move sequence:
26256
26257 1) Prologue guard: Conditional that jumps up to epilogues for small
26258 blocks that can be handled by epilogue alone. This is faster
26259 but also needed for correctness, since prologue assume the block
26260 is larger than the desired alignment.
26261
26262 Optional dynamic check for size and libcall for large
26263 blocks is emitted here too, with -minline-stringops-dynamically.
26264
26265 2) Prologue: copy first few bytes in order to get destination
26266 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
26267 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
26268 copied. We emit either a jump tree on power of two sized
26269 blocks, or a byte loop.
26270
26271 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
26272 with specified algorithm.
26273
26274 4) Epilogue: code copying tail of the block that is too small to be
26275 handled by main body (or up to size guarded by prologue guard).
26276
26277 Misaligned move sequence
26278
26279 1) missaligned move prologue/epilogue containing:
26280 a) Prologue handling small memory blocks and jumping to done_label
26281 (skipped if blocks are known to be large enough)
26282 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
26283 needed by single possibly misaligned move
26284 (skipped if alignment is not needed)
26285 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
26286
26287 2) Zero size guard dispatching to done_label, if needed
26288
26289 3) dispatch to library call, if needed,
26290
26291 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
26292 with specified algorithm. */
26293 bool
26294 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
26295 rtx align_exp, rtx expected_align_exp,
26296 rtx expected_size_exp, rtx min_size_exp,
26297 rtx max_size_exp, rtx probable_max_size_exp,
26298 bool issetmem)
26299 {
26300 rtx destreg;
26301 rtx srcreg = NULL;
26302 rtx_code_label *label = NULL;
26303 rtx tmp;
26304 rtx_code_label *jump_around_label = NULL;
26305 HOST_WIDE_INT align = 1;
26306 unsigned HOST_WIDE_INT count = 0;
26307 HOST_WIDE_INT expected_size = -1;
26308 int size_needed = 0, epilogue_size_needed;
26309 int desired_align = 0, align_bytes = 0;
26310 enum stringop_alg alg;
26311 rtx promoted_val = NULL;
26312 rtx vec_promoted_val = NULL;
26313 bool force_loopy_epilogue = false;
26314 int dynamic_check;
26315 bool need_zero_guard = false;
26316 bool noalign;
26317 machine_mode move_mode = VOIDmode;
26318 int unroll_factor = 1;
26319 /* TODO: Once value ranges are available, fill in proper data. */
26320 unsigned HOST_WIDE_INT min_size = 0;
26321 unsigned HOST_WIDE_INT max_size = -1;
26322 unsigned HOST_WIDE_INT probable_max_size = -1;
26323 bool misaligned_prologue_used = false;
26324 bool have_as;
26325
26326 if (CONST_INT_P (align_exp))
26327 align = INTVAL (align_exp);
26328 /* i386 can do misaligned access on reasonably increased cost. */
26329 if (CONST_INT_P (expected_align_exp)
26330 && INTVAL (expected_align_exp) > align)
26331 align = INTVAL (expected_align_exp);
26332 /* ALIGN is the minimum of destination and source alignment, but we care here
26333 just about destination alignment. */
26334 else if (!issetmem
26335 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
26336 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
26337
26338 if (CONST_INT_P (count_exp))
26339 {
26340 min_size = max_size = probable_max_size = count = expected_size
26341 = INTVAL (count_exp);
26342 /* When COUNT is 0, there is nothing to do. */
26343 if (!count)
26344 return true;
26345 }
26346 else
26347 {
26348 if (min_size_exp)
26349 min_size = INTVAL (min_size_exp);
26350 if (max_size_exp)
26351 max_size = INTVAL (max_size_exp);
26352 if (probable_max_size_exp)
26353 probable_max_size = INTVAL (probable_max_size_exp);
26354 if (CONST_INT_P (expected_size_exp))
26355 expected_size = INTVAL (expected_size_exp);
26356 }
26357
26358 /* Make sure we don't need to care about overflow later on. */
26359 if (count > (HOST_WIDE_INT_1U << 30))
26360 return false;
26361
26362 have_as = !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst));
26363 if (!issetmem)
26364 have_as |= !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src));
26365
26366 /* Step 0: Decide on preferred algorithm, desired alignment and
26367 size of chunks to be copied by main loop. */
26368 alg = decide_alg (count, expected_size, min_size, probable_max_size,
26369 issetmem,
26370 issetmem && val_exp == const0_rtx, have_as,
26371 &dynamic_check, &noalign);
26372 if (alg == libcall)
26373 return false;
26374 gcc_assert (alg != no_stringop);
26375
26376 /* For now vector-version of memset is generated only for memory zeroing, as
26377 creating of promoted vector value is very cheap in this case. */
26378 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
26379 alg = unrolled_loop;
26380
26381 if (!count)
26382 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
26383 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
26384 if (!issetmem)
26385 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
26386
26387 unroll_factor = 1;
26388 move_mode = word_mode;
26389 switch (alg)
26390 {
26391 case libcall:
26392 case no_stringop:
26393 case last_alg:
26394 gcc_unreachable ();
26395 case loop_1_byte:
26396 need_zero_guard = true;
26397 move_mode = QImode;
26398 break;
26399 case loop:
26400 need_zero_guard = true;
26401 break;
26402 case unrolled_loop:
26403 need_zero_guard = true;
26404 unroll_factor = (TARGET_64BIT ? 4 : 2);
26405 break;
26406 case vector_loop:
26407 need_zero_guard = true;
26408 unroll_factor = 4;
26409 /* Find the widest supported mode. */
26410 move_mode = word_mode;
26411 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
26412 != CODE_FOR_nothing)
26413 move_mode = GET_MODE_WIDER_MODE (move_mode);
26414
26415 /* Find the corresponding vector mode with the same size as MOVE_MODE.
26416 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
26417 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
26418 {
26419 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
26420 move_mode = mode_for_vector (word_mode, nunits);
26421 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
26422 move_mode = word_mode;
26423 }
26424 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
26425 break;
26426 case rep_prefix_8_byte:
26427 move_mode = DImode;
26428 break;
26429 case rep_prefix_4_byte:
26430 move_mode = SImode;
26431 break;
26432 case rep_prefix_1_byte:
26433 move_mode = QImode;
26434 break;
26435 }
26436 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
26437 epilogue_size_needed = size_needed;
26438
26439 desired_align = decide_alignment (align, alg, expected_size, move_mode);
26440 if (!TARGET_ALIGN_STRINGOPS || noalign)
26441 align = desired_align;
26442
26443 /* Step 1: Prologue guard. */
26444
26445 /* Alignment code needs count to be in register. */
26446 if (CONST_INT_P (count_exp) && desired_align > align)
26447 {
26448 if (INTVAL (count_exp) > desired_align
26449 && INTVAL (count_exp) > size_needed)
26450 {
26451 align_bytes
26452 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
26453 if (align_bytes <= 0)
26454 align_bytes = 0;
26455 else
26456 align_bytes = desired_align - align_bytes;
26457 }
26458 if (align_bytes == 0)
26459 count_exp = force_reg (counter_mode (count_exp), count_exp);
26460 }
26461 gcc_assert (desired_align >= 1 && align >= 1);
26462
26463 /* Misaligned move sequences handle both prologue and epilogue at once.
26464 Default code generation results in a smaller code for large alignments
26465 and also avoids redundant job when sizes are known precisely. */
26466 misaligned_prologue_used
26467 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
26468 && MAX (desired_align, epilogue_size_needed) <= 32
26469 && desired_align <= epilogue_size_needed
26470 && ((desired_align > align && !align_bytes)
26471 || (!count && epilogue_size_needed > 1)));
26472
26473 /* Do the cheap promotion to allow better CSE across the
26474 main loop and epilogue (ie one load of the big constant in the
26475 front of all code.
26476 For now the misaligned move sequences do not have fast path
26477 without broadcasting. */
26478 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
26479 {
26480 if (alg == vector_loop)
26481 {
26482 gcc_assert (val_exp == const0_rtx);
26483 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
26484 promoted_val = promote_duplicated_reg_to_size (val_exp,
26485 GET_MODE_SIZE (word_mode),
26486 desired_align, align);
26487 }
26488 else
26489 {
26490 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
26491 desired_align, align);
26492 }
26493 }
26494 /* Misaligned move sequences handles both prologues and epilogues at once.
26495 Default code generation results in smaller code for large alignments and
26496 also avoids redundant job when sizes are known precisely. */
26497 if (misaligned_prologue_used)
26498 {
26499 /* Misaligned move prologue handled small blocks by itself. */
26500 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
26501 (dst, src, &destreg, &srcreg,
26502 move_mode, promoted_val, vec_promoted_val,
26503 &count_exp,
26504 &jump_around_label,
26505 desired_align < align
26506 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
26507 desired_align, align, &min_size, dynamic_check, issetmem);
26508 if (!issetmem)
26509 src = change_address (src, BLKmode, srcreg);
26510 dst = change_address (dst, BLKmode, destreg);
26511 set_mem_align (dst, desired_align * BITS_PER_UNIT);
26512 epilogue_size_needed = 0;
26513 if (need_zero_guard
26514 && min_size < (unsigned HOST_WIDE_INT) size_needed)
26515 {
26516 /* It is possible that we copied enough so the main loop will not
26517 execute. */
26518 gcc_assert (size_needed > 1);
26519 if (jump_around_label == NULL_RTX)
26520 jump_around_label = gen_label_rtx ();
26521 emit_cmp_and_jump_insns (count_exp,
26522 GEN_INT (size_needed),
26523 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
26524 if (expected_size == -1
26525 || expected_size < (desired_align - align) / 2 + size_needed)
26526 predict_jump (REG_BR_PROB_BASE * 20 / 100);
26527 else
26528 predict_jump (REG_BR_PROB_BASE * 60 / 100);
26529 }
26530 }
26531 /* Ensure that alignment prologue won't copy past end of block. */
26532 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
26533 {
26534 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
26535 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
26536 Make sure it is power of 2. */
26537 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
26538
26539 /* To improve performance of small blocks, we jump around the VAL
26540 promoting mode. This mean that if the promoted VAL is not constant,
26541 we might not use it in the epilogue and have to use byte
26542 loop variant. */
26543 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
26544 force_loopy_epilogue = true;
26545 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
26546 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
26547 {
26548 /* If main algorithm works on QImode, no epilogue is needed.
26549 For small sizes just don't align anything. */
26550 if (size_needed == 1)
26551 desired_align = align;
26552 else
26553 goto epilogue;
26554 }
26555 else if (!count
26556 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
26557 {
26558 label = gen_label_rtx ();
26559 emit_cmp_and_jump_insns (count_exp,
26560 GEN_INT (epilogue_size_needed),
26561 LTU, 0, counter_mode (count_exp), 1, label);
26562 if (expected_size == -1 || expected_size < epilogue_size_needed)
26563 predict_jump (REG_BR_PROB_BASE * 60 / 100);
26564 else
26565 predict_jump (REG_BR_PROB_BASE * 20 / 100);
26566 }
26567 }
26568
26569 /* Emit code to decide on runtime whether library call or inline should be
26570 used. */
26571 if (dynamic_check != -1)
26572 {
26573 if (!issetmem && CONST_INT_P (count_exp))
26574 {
26575 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
26576 {
26577 emit_block_move_via_libcall (dst, src, count_exp, false);
26578 count_exp = const0_rtx;
26579 goto epilogue;
26580 }
26581 }
26582 else
26583 {
26584 rtx_code_label *hot_label = gen_label_rtx ();
26585 if (jump_around_label == NULL_RTX)
26586 jump_around_label = gen_label_rtx ();
26587 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
26588 LEU, 0, counter_mode (count_exp),
26589 1, hot_label);
26590 predict_jump (REG_BR_PROB_BASE * 90 / 100);
26591 if (issetmem)
26592 set_storage_via_libcall (dst, count_exp, val_exp, false);
26593 else
26594 emit_block_move_via_libcall (dst, src, count_exp, false);
26595 emit_jump (jump_around_label);
26596 emit_label (hot_label);
26597 }
26598 }
26599
26600 /* Step 2: Alignment prologue. */
26601 /* Do the expensive promotion once we branched off the small blocks. */
26602 if (issetmem && !promoted_val)
26603 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
26604 desired_align, align);
26605
26606 if (desired_align > align && !misaligned_prologue_used)
26607 {
26608 if (align_bytes == 0)
26609 {
26610 /* Except for the first move in prologue, we no longer know
26611 constant offset in aliasing info. It don't seems to worth
26612 the pain to maintain it for the first move, so throw away
26613 the info early. */
26614 dst = change_address (dst, BLKmode, destreg);
26615 if (!issetmem)
26616 src = change_address (src, BLKmode, srcreg);
26617 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
26618 promoted_val, vec_promoted_val,
26619 count_exp, align, desired_align,
26620 issetmem);
26621 /* At most desired_align - align bytes are copied. */
26622 if (min_size < (unsigned)(desired_align - align))
26623 min_size = 0;
26624 else
26625 min_size -= desired_align - align;
26626 }
26627 else
26628 {
26629 /* If we know how many bytes need to be stored before dst is
26630 sufficiently aligned, maintain aliasing info accurately. */
26631 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
26632 srcreg,
26633 promoted_val,
26634 vec_promoted_val,
26635 desired_align,
26636 align_bytes,
26637 issetmem);
26638
26639 count_exp = plus_constant (counter_mode (count_exp),
26640 count_exp, -align_bytes);
26641 count -= align_bytes;
26642 min_size -= align_bytes;
26643 max_size -= align_bytes;
26644 }
26645 if (need_zero_guard
26646 && min_size < (unsigned HOST_WIDE_INT) size_needed
26647 && (count < (unsigned HOST_WIDE_INT) size_needed
26648 || (align_bytes == 0
26649 && count < ((unsigned HOST_WIDE_INT) size_needed
26650 + desired_align - align))))
26651 {
26652 /* It is possible that we copied enough so the main loop will not
26653 execute. */
26654 gcc_assert (size_needed > 1);
26655 if (label == NULL_RTX)
26656 label = gen_label_rtx ();
26657 emit_cmp_and_jump_insns (count_exp,
26658 GEN_INT (size_needed),
26659 LTU, 0, counter_mode (count_exp), 1, label);
26660 if (expected_size == -1
26661 || expected_size < (desired_align - align) / 2 + size_needed)
26662 predict_jump (REG_BR_PROB_BASE * 20 / 100);
26663 else
26664 predict_jump (REG_BR_PROB_BASE * 60 / 100);
26665 }
26666 }
26667 if (label && size_needed == 1)
26668 {
26669 emit_label (label);
26670 LABEL_NUSES (label) = 1;
26671 label = NULL;
26672 epilogue_size_needed = 1;
26673 if (issetmem)
26674 promoted_val = val_exp;
26675 }
26676 else if (label == NULL_RTX && !misaligned_prologue_used)
26677 epilogue_size_needed = size_needed;
26678
26679 /* Step 3: Main loop. */
26680
26681 switch (alg)
26682 {
26683 case libcall:
26684 case no_stringop:
26685 case last_alg:
26686 gcc_unreachable ();
26687 case loop_1_byte:
26688 case loop:
26689 case unrolled_loop:
26690 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
26691 count_exp, move_mode, unroll_factor,
26692 expected_size, issetmem);
26693 break;
26694 case vector_loop:
26695 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
26696 vec_promoted_val, count_exp, move_mode,
26697 unroll_factor, expected_size, issetmem);
26698 break;
26699 case rep_prefix_8_byte:
26700 case rep_prefix_4_byte:
26701 case rep_prefix_1_byte:
26702 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
26703 val_exp, count_exp, move_mode, issetmem);
26704 break;
26705 }
26706 /* Adjust properly the offset of src and dest memory for aliasing. */
26707 if (CONST_INT_P (count_exp))
26708 {
26709 if (!issetmem)
26710 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
26711 (count / size_needed) * size_needed);
26712 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
26713 (count / size_needed) * size_needed);
26714 }
26715 else
26716 {
26717 if (!issetmem)
26718 src = change_address (src, BLKmode, srcreg);
26719 dst = change_address (dst, BLKmode, destreg);
26720 }
26721
26722 /* Step 4: Epilogue to copy the remaining bytes. */
26723 epilogue:
26724 if (label)
26725 {
26726 /* When the main loop is done, COUNT_EXP might hold original count,
26727 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
26728 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
26729 bytes. Compensate if needed. */
26730
26731 if (size_needed < epilogue_size_needed)
26732 {
26733 tmp =
26734 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
26735 GEN_INT (size_needed - 1), count_exp, 1,
26736 OPTAB_DIRECT);
26737 if (tmp != count_exp)
26738 emit_move_insn (count_exp, tmp);
26739 }
26740 emit_label (label);
26741 LABEL_NUSES (label) = 1;
26742 }
26743
26744 if (count_exp != const0_rtx && epilogue_size_needed > 1)
26745 {
26746 if (force_loopy_epilogue)
26747 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
26748 epilogue_size_needed);
26749 else
26750 {
26751 if (issetmem)
26752 expand_setmem_epilogue (dst, destreg, promoted_val,
26753 vec_promoted_val, count_exp,
26754 epilogue_size_needed);
26755 else
26756 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
26757 epilogue_size_needed);
26758 }
26759 }
26760 if (jump_around_label)
26761 emit_label (jump_around_label);
26762 return true;
26763 }
26764
26765
26766 /* Expand the appropriate insns for doing strlen if not just doing
26767 repnz; scasb
26768
26769 out = result, initialized with the start address
26770 align_rtx = alignment of the address.
26771 scratch = scratch register, initialized with the startaddress when
26772 not aligned, otherwise undefined
26773
26774 This is just the body. It needs the initializations mentioned above and
26775 some address computing at the end. These things are done in i386.md. */
26776
26777 static void
26778 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
26779 {
26780 int align;
26781 rtx tmp;
26782 rtx_code_label *align_2_label = NULL;
26783 rtx_code_label *align_3_label = NULL;
26784 rtx_code_label *align_4_label = gen_label_rtx ();
26785 rtx_code_label *end_0_label = gen_label_rtx ();
26786 rtx mem;
26787 rtx tmpreg = gen_reg_rtx (SImode);
26788 rtx scratch = gen_reg_rtx (SImode);
26789 rtx cmp;
26790
26791 align = 0;
26792 if (CONST_INT_P (align_rtx))
26793 align = INTVAL (align_rtx);
26794
26795 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
26796
26797 /* Is there a known alignment and is it less than 4? */
26798 if (align < 4)
26799 {
26800 rtx scratch1 = gen_reg_rtx (Pmode);
26801 emit_move_insn (scratch1, out);
26802 /* Is there a known alignment and is it not 2? */
26803 if (align != 2)
26804 {
26805 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
26806 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
26807
26808 /* Leave just the 3 lower bits. */
26809 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
26810 NULL_RTX, 0, OPTAB_WIDEN);
26811
26812 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
26813 Pmode, 1, align_4_label);
26814 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
26815 Pmode, 1, align_2_label);
26816 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
26817 Pmode, 1, align_3_label);
26818 }
26819 else
26820 {
26821 /* Since the alignment is 2, we have to check 2 or 0 bytes;
26822 check if is aligned to 4 - byte. */
26823
26824 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
26825 NULL_RTX, 0, OPTAB_WIDEN);
26826
26827 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
26828 Pmode, 1, align_4_label);
26829 }
26830
26831 mem = change_address (src, QImode, out);
26832
26833 /* Now compare the bytes. */
26834
26835 /* Compare the first n unaligned byte on a byte per byte basis. */
26836 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
26837 QImode, 1, end_0_label);
26838
26839 /* Increment the address. */
26840 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
26841
26842 /* Not needed with an alignment of 2 */
26843 if (align != 2)
26844 {
26845 emit_label (align_2_label);
26846
26847 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
26848 end_0_label);
26849
26850 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
26851
26852 emit_label (align_3_label);
26853 }
26854
26855 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
26856 end_0_label);
26857
26858 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
26859 }
26860
26861 /* Generate loop to check 4 bytes at a time. It is not a good idea to
26862 align this loop. It gives only huge programs, but does not help to
26863 speed up. */
26864 emit_label (align_4_label);
26865
26866 mem = change_address (src, SImode, out);
26867 emit_move_insn (scratch, mem);
26868 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
26869
26870 /* This formula yields a nonzero result iff one of the bytes is zero.
26871 This saves three branches inside loop and many cycles. */
26872
26873 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
26874 emit_insn (gen_one_cmplsi2 (scratch, scratch));
26875 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
26876 emit_insn (gen_andsi3 (tmpreg, tmpreg,
26877 gen_int_mode (0x80808080, SImode)));
26878 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
26879 align_4_label);
26880
26881 if (TARGET_CMOVE)
26882 {
26883 rtx reg = gen_reg_rtx (SImode);
26884 rtx reg2 = gen_reg_rtx (Pmode);
26885 emit_move_insn (reg, tmpreg);
26886 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
26887
26888 /* If zero is not in the first two bytes, move two bytes forward. */
26889 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
26890 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
26891 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
26892 emit_insn (gen_rtx_SET (tmpreg,
26893 gen_rtx_IF_THEN_ELSE (SImode, tmp,
26894 reg,
26895 tmpreg)));
26896 /* Emit lea manually to avoid clobbering of flags. */
26897 emit_insn (gen_rtx_SET (reg2, gen_rtx_PLUS (Pmode, out, const2_rtx)));
26898
26899 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
26900 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
26901 emit_insn (gen_rtx_SET (out,
26902 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
26903 reg2,
26904 out)));
26905 }
26906 else
26907 {
26908 rtx_code_label *end_2_label = gen_label_rtx ();
26909 /* Is zero in the first two bytes? */
26910
26911 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
26912 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
26913 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
26914 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
26915 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
26916 pc_rtx);
26917 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
26918 JUMP_LABEL (tmp) = end_2_label;
26919
26920 /* Not in the first two. Move two bytes forward. */
26921 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
26922 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
26923
26924 emit_label (end_2_label);
26925
26926 }
26927
26928 /* Avoid branch in fixing the byte. */
26929 tmpreg = gen_lowpart (QImode, tmpreg);
26930 emit_insn (gen_addqi3_cconly_overflow (tmpreg, tmpreg));
26931 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
26932 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
26933 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
26934
26935 emit_label (end_0_label);
26936 }
26937
26938 /* Expand strlen. */
26939
26940 bool
26941 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
26942 {
26943 rtx addr, scratch1, scratch2, scratch3, scratch4;
26944
26945 /* The generic case of strlen expander is long. Avoid it's
26946 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
26947
26948 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
26949 && !TARGET_INLINE_ALL_STRINGOPS
26950 && !optimize_insn_for_size_p ()
26951 && (!CONST_INT_P (align) || INTVAL (align) < 4))
26952 return false;
26953
26954 addr = force_reg (Pmode, XEXP (src, 0));
26955 scratch1 = gen_reg_rtx (Pmode);
26956
26957 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
26958 && !optimize_insn_for_size_p ())
26959 {
26960 /* Well it seems that some optimizer does not combine a call like
26961 foo(strlen(bar), strlen(bar));
26962 when the move and the subtraction is done here. It does calculate
26963 the length just once when these instructions are done inside of
26964 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
26965 often used and I use one fewer register for the lifetime of
26966 output_strlen_unroll() this is better. */
26967
26968 emit_move_insn (out, addr);
26969
26970 ix86_expand_strlensi_unroll_1 (out, src, align);
26971
26972 /* strlensi_unroll_1 returns the address of the zero at the end of
26973 the string, like memchr(), so compute the length by subtracting
26974 the start address. */
26975 emit_insn (ix86_gen_sub3 (out, out, addr));
26976 }
26977 else
26978 {
26979 rtx unspec;
26980
26981 /* Can't use this if the user has appropriated eax, ecx, or edi. */
26982 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
26983 return false;
26984 /* Can't use this for non-default address spaces. */
26985 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src)))
26986 return false;
26987
26988 scratch2 = gen_reg_rtx (Pmode);
26989 scratch3 = gen_reg_rtx (Pmode);
26990 scratch4 = force_reg (Pmode, constm1_rtx);
26991
26992 emit_move_insn (scratch3, addr);
26993 eoschar = force_reg (QImode, eoschar);
26994
26995 src = replace_equiv_address_nv (src, scratch3);
26996
26997 /* If .md starts supporting :P, this can be done in .md. */
26998 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
26999 scratch4), UNSPEC_SCAS);
27000 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
27001 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
27002 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
27003 }
27004 return true;
27005 }
27006
27007 /* For given symbol (function) construct code to compute address of it's PLT
27008 entry in large x86-64 PIC model. */
27009 static rtx
27010 construct_plt_address (rtx symbol)
27011 {
27012 rtx tmp, unspec;
27013
27014 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
27015 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
27016 gcc_assert (Pmode == DImode);
27017
27018 tmp = gen_reg_rtx (Pmode);
27019 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
27020
27021 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
27022 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
27023 return tmp;
27024 }
27025
27026 rtx
27027 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
27028 rtx callarg2,
27029 rtx pop, bool sibcall)
27030 {
27031 rtx vec[3];
27032 rtx use = NULL, call;
27033 unsigned int vec_len = 0;
27034
27035 if (pop == const0_rtx)
27036 pop = NULL;
27037 gcc_assert (!TARGET_64BIT || !pop);
27038
27039 if (TARGET_MACHO && !TARGET_64BIT)
27040 {
27041 #if TARGET_MACHO
27042 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
27043 fnaddr = machopic_indirect_call_target (fnaddr);
27044 #endif
27045 }
27046 else
27047 {
27048 /* Static functions and indirect calls don't need the pic register. Also,
27049 check if PLT was explicitly avoided via no-plt or "noplt" attribute, making
27050 it an indirect call. */
27051 rtx addr = XEXP (fnaddr, 0);
27052 if (flag_pic
27053 && GET_CODE (addr) == SYMBOL_REF
27054 && !SYMBOL_REF_LOCAL_P (addr))
27055 {
27056 if (flag_plt
27057 && (SYMBOL_REF_DECL (addr) == NULL_TREE
27058 || !lookup_attribute ("noplt",
27059 DECL_ATTRIBUTES (SYMBOL_REF_DECL (addr)))))
27060 {
27061 if (!TARGET_64BIT
27062 || (ix86_cmodel == CM_LARGE_PIC
27063 && DEFAULT_ABI != MS_ABI))
27064 {
27065 use_reg (&use, gen_rtx_REG (Pmode,
27066 REAL_PIC_OFFSET_TABLE_REGNUM));
27067 if (ix86_use_pseudo_pic_reg ())
27068 emit_move_insn (gen_rtx_REG (Pmode,
27069 REAL_PIC_OFFSET_TABLE_REGNUM),
27070 pic_offset_table_rtx);
27071 }
27072 }
27073 else if (!TARGET_PECOFF && !TARGET_MACHO)
27074 {
27075 if (TARGET_64BIT)
27076 {
27077 fnaddr = gen_rtx_UNSPEC (Pmode,
27078 gen_rtvec (1, addr),
27079 UNSPEC_GOTPCREL);
27080 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
27081 }
27082 else
27083 {
27084 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
27085 UNSPEC_GOT);
27086 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
27087 fnaddr = gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
27088 fnaddr);
27089 }
27090 fnaddr = gen_const_mem (Pmode, fnaddr);
27091 /* Pmode may not be the same as word_mode for x32, which
27092 doesn't support indirect branch via 32-bit memory slot.
27093 Since x32 GOT slot is 64 bit with zero upper 32 bits,
27094 indirect branch via x32 GOT slot is OK. */
27095 if (GET_MODE (fnaddr) != word_mode)
27096 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
27097 fnaddr = gen_rtx_MEM (QImode, fnaddr);
27098 }
27099 }
27100 }
27101
27102 /* Skip setting up RAX register for -mskip-rax-setup when there are no
27103 parameters passed in vector registers. */
27104 if (TARGET_64BIT
27105 && (INTVAL (callarg2) > 0
27106 || (INTVAL (callarg2) == 0
27107 && (TARGET_SSE || !flag_skip_rax_setup))))
27108 {
27109 rtx al = gen_rtx_REG (QImode, AX_REG);
27110 emit_move_insn (al, callarg2);
27111 use_reg (&use, al);
27112 }
27113
27114 if (ix86_cmodel == CM_LARGE_PIC
27115 && !TARGET_PECOFF
27116 && MEM_P (fnaddr)
27117 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
27118 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
27119 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
27120 /* Since x32 GOT slot is 64 bit with zero upper 32 bits, indirect
27121 branch via x32 GOT slot is OK. */
27122 else if (!(TARGET_X32
27123 && MEM_P (fnaddr)
27124 && GET_CODE (XEXP (fnaddr, 0)) == ZERO_EXTEND
27125 && GOT_memory_operand (XEXP (XEXP (fnaddr, 0), 0), Pmode))
27126 && (sibcall
27127 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
27128 : !call_insn_operand (XEXP (fnaddr, 0), word_mode)))
27129 {
27130 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
27131 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
27132 }
27133
27134 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
27135
27136 if (retval)
27137 {
27138 /* We should add bounds as destination register in case
27139 pointer with bounds may be returned. */
27140 if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval)))
27141 {
27142 rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
27143 rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
27144 if (GET_CODE (retval) == PARALLEL)
27145 {
27146 b0 = gen_rtx_EXPR_LIST (VOIDmode, b0, const0_rtx);
27147 b1 = gen_rtx_EXPR_LIST (VOIDmode, b1, const0_rtx);
27148 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, b0, b1));
27149 retval = chkp_join_splitted_slot (retval, par);
27150 }
27151 else
27152 {
27153 retval = gen_rtx_PARALLEL (VOIDmode,
27154 gen_rtvec (3, retval, b0, b1));
27155 chkp_put_regs_to_expr_list (retval);
27156 }
27157 }
27158
27159 call = gen_rtx_SET (retval, call);
27160 }
27161 vec[vec_len++] = call;
27162
27163 if (pop)
27164 {
27165 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
27166 pop = gen_rtx_SET (stack_pointer_rtx, pop);
27167 vec[vec_len++] = pop;
27168 }
27169
27170 if (TARGET_64BIT_MS_ABI
27171 && (!callarg2 || INTVAL (callarg2) != -2))
27172 {
27173 int const cregs_size
27174 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
27175 int i;
27176
27177 for (i = 0; i < cregs_size; i++)
27178 {
27179 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
27180 machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
27181
27182 clobber_reg (&use, gen_rtx_REG (mode, regno));
27183 }
27184 }
27185
27186 if (vec_len > 1)
27187 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
27188 call = emit_call_insn (call);
27189 if (use)
27190 CALL_INSN_FUNCTION_USAGE (call) = use;
27191
27192 return call;
27193 }
27194
27195 /* Return true if the function being called was marked with attribute "noplt"
27196 or using -fno-plt and we are compiling for non-PIC and x86_64. We need to
27197 handle the non-PIC case in the backend because there is no easy interface
27198 for the front-end to force non-PLT calls to use the GOT. This is currently
27199 used only with 64-bit ELF targets to call the function marked "noplt"
27200 indirectly. */
27201
27202 static bool
27203 ix86_nopic_noplt_attribute_p (rtx call_op)
27204 {
27205 if (flag_pic || ix86_cmodel == CM_LARGE
27206 || !TARGET_64BIT || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
27207 || SYMBOL_REF_LOCAL_P (call_op))
27208 return false;
27209
27210 tree symbol_decl = SYMBOL_REF_DECL (call_op);
27211
27212 if (!flag_plt
27213 || (symbol_decl != NULL_TREE
27214 && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
27215 return true;
27216
27217 return false;
27218 }
27219
27220 /* Output the assembly for a call instruction. */
27221
27222 const char *
27223 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
27224 {
27225 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
27226 bool seh_nop_p = false;
27227 const char *xasm;
27228
27229 if (SIBLING_CALL_P (insn))
27230 {
27231 if (direct_p && ix86_nopic_noplt_attribute_p (call_op))
27232 xasm = "%!jmp\t*%p0@GOTPCREL(%%rip)";
27233 else if (direct_p)
27234 xasm = "%!jmp\t%P0";
27235 /* SEH epilogue detection requires the indirect branch case
27236 to include REX.W. */
27237 else if (TARGET_SEH)
27238 xasm = "%!rex.W jmp %A0";
27239 else
27240 xasm = "%!jmp\t%A0";
27241
27242 output_asm_insn (xasm, &call_op);
27243 return "";
27244 }
27245
27246 /* SEH unwinding can require an extra nop to be emitted in several
27247 circumstances. Determine if we have one of those. */
27248 if (TARGET_SEH)
27249 {
27250 rtx_insn *i;
27251
27252 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
27253 {
27254 /* If we get to another real insn, we don't need the nop. */
27255 if (INSN_P (i))
27256 break;
27257
27258 /* If we get to the epilogue note, prevent a catch region from
27259 being adjacent to the standard epilogue sequence. If non-
27260 call-exceptions, we'll have done this during epilogue emission. */
27261 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
27262 && !flag_non_call_exceptions
27263 && !can_throw_internal (insn))
27264 {
27265 seh_nop_p = true;
27266 break;
27267 }
27268 }
27269
27270 /* If we didn't find a real insn following the call, prevent the
27271 unwinder from looking into the next function. */
27272 if (i == NULL)
27273 seh_nop_p = true;
27274 }
27275
27276 if (direct_p && ix86_nopic_noplt_attribute_p (call_op))
27277 xasm = "%!call\t*%p0@GOTPCREL(%%rip)";
27278 else if (direct_p)
27279 xasm = "%!call\t%P0";
27280 else
27281 xasm = "%!call\t%A0";
27282
27283 output_asm_insn (xasm, &call_op);
27284
27285 if (seh_nop_p)
27286 return "nop";
27287
27288 return "";
27289 }
27290 \f
27291 /* Clear stack slot assignments remembered from previous functions.
27292 This is called from INIT_EXPANDERS once before RTL is emitted for each
27293 function. */
27294
27295 static struct machine_function *
27296 ix86_init_machine_status (void)
27297 {
27298 struct machine_function *f;
27299
27300 f = ggc_cleared_alloc<machine_function> ();
27301 f->use_fast_prologue_epilogue_nregs = -1;
27302 f->call_abi = ix86_abi;
27303
27304 return f;
27305 }
27306
27307 /* Return a MEM corresponding to a stack slot with mode MODE.
27308 Allocate a new slot if necessary.
27309
27310 The RTL for a function can have several slots available: N is
27311 which slot to use. */
27312
27313 rtx
27314 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
27315 {
27316 struct stack_local_entry *s;
27317
27318 gcc_assert (n < MAX_386_STACK_LOCALS);
27319
27320 for (s = ix86_stack_locals; s; s = s->next)
27321 if (s->mode == mode && s->n == n)
27322 return validize_mem (copy_rtx (s->rtl));
27323
27324 s = ggc_alloc<stack_local_entry> ();
27325 s->n = n;
27326 s->mode = mode;
27327 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
27328
27329 s->next = ix86_stack_locals;
27330 ix86_stack_locals = s;
27331 return validize_mem (copy_rtx (s->rtl));
27332 }
27333
27334 static void
27335 ix86_instantiate_decls (void)
27336 {
27337 struct stack_local_entry *s;
27338
27339 for (s = ix86_stack_locals; s; s = s->next)
27340 if (s->rtl != NULL_RTX)
27341 instantiate_decl_rtl (s->rtl);
27342 }
27343 \f
27344 /* Return the number used for encoding REG, in the range 0..7. */
27345
27346 static int
27347 reg_encoded_number (rtx reg)
27348 {
27349 unsigned regno = REGNO (reg);
27350 switch (regno)
27351 {
27352 case AX_REG:
27353 return 0;
27354 case CX_REG:
27355 return 1;
27356 case DX_REG:
27357 return 2;
27358 case BX_REG:
27359 return 3;
27360 case SP_REG:
27361 return 4;
27362 case BP_REG:
27363 return 5;
27364 case SI_REG:
27365 return 6;
27366 case DI_REG:
27367 return 7;
27368 default:
27369 break;
27370 }
27371 if (IN_RANGE (regno, FIRST_STACK_REG, LAST_STACK_REG))
27372 return regno - FIRST_STACK_REG;
27373 if (IN_RANGE (regno, FIRST_SSE_REG, LAST_SSE_REG))
27374 return regno - FIRST_SSE_REG;
27375 if (IN_RANGE (regno, FIRST_MMX_REG, LAST_MMX_REG))
27376 return regno - FIRST_MMX_REG;
27377 if (IN_RANGE (regno, FIRST_REX_SSE_REG, LAST_REX_SSE_REG))
27378 return regno - FIRST_REX_SSE_REG;
27379 if (IN_RANGE (regno, FIRST_REX_INT_REG, LAST_REX_INT_REG))
27380 return regno - FIRST_REX_INT_REG;
27381 if (IN_RANGE (regno, FIRST_MASK_REG, LAST_MASK_REG))
27382 return regno - FIRST_MASK_REG;
27383 if (IN_RANGE (regno, FIRST_BND_REG, LAST_BND_REG))
27384 return regno - FIRST_BND_REG;
27385 return -1;
27386 }
27387
27388 /* Given an insn INSN with NOPERANDS OPERANDS, return the modr/m byte used
27389 in its encoding if it could be relevant for ROP mitigation, otherwise
27390 return -1. If POPNO0 and POPNO1 are nonnull, store the operand numbers
27391 used for calculating it into them. */
27392
27393 static int
27394 ix86_get_modrm_for_rop (rtx_insn *insn, rtx *operands, int noperands,
27395 int *popno0 = 0, int *popno1 = 0)
27396 {
27397 if (asm_noperands (PATTERN (insn)) >= 0)
27398 return -1;
27399 int has_modrm = get_attr_modrm (insn);
27400 if (!has_modrm)
27401 return -1;
27402 enum attr_modrm_class cls = get_attr_modrm_class (insn);
27403 rtx op0, op1;
27404 switch (cls)
27405 {
27406 case MODRM_CLASS_OP02:
27407 gcc_assert (noperands >= 3);
27408 if (popno0)
27409 {
27410 *popno0 = 0;
27411 *popno1 = 2;
27412 }
27413 op0 = operands[0];
27414 op1 = operands[2];
27415 break;
27416 case MODRM_CLASS_OP01:
27417 gcc_assert (noperands >= 2);
27418 if (popno0)
27419 {
27420 *popno0 = 0;
27421 *popno1 = 1;
27422 }
27423 op0 = operands[0];
27424 op1 = operands[1];
27425 break;
27426 default:
27427 return -1;
27428 }
27429 if (REG_P (op0) && REG_P (op1))
27430 {
27431 int enc0 = reg_encoded_number (op0);
27432 int enc1 = reg_encoded_number (op1);
27433 return 0xc0 + (enc1 << 3) + enc0;
27434 }
27435 return -1;
27436 }
27437
27438 /* Check whether x86 address PARTS is a pc-relative address. */
27439
27440 static bool
27441 rip_relative_addr_p (struct ix86_address *parts)
27442 {
27443 rtx base, index, disp;
27444
27445 base = parts->base;
27446 index = parts->index;
27447 disp = parts->disp;
27448
27449 if (disp && !base && !index)
27450 {
27451 if (TARGET_64BIT)
27452 {
27453 rtx symbol = disp;
27454
27455 if (GET_CODE (disp) == CONST)
27456 symbol = XEXP (disp, 0);
27457 if (GET_CODE (symbol) == PLUS
27458 && CONST_INT_P (XEXP (symbol, 1)))
27459 symbol = XEXP (symbol, 0);
27460
27461 if (GET_CODE (symbol) == LABEL_REF
27462 || (GET_CODE (symbol) == SYMBOL_REF
27463 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
27464 || (GET_CODE (symbol) == UNSPEC
27465 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
27466 || XINT (symbol, 1) == UNSPEC_PCREL
27467 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
27468 return true;
27469 }
27470 }
27471 return false;
27472 }
27473
27474 /* Calculate the length of the memory address in the instruction encoding.
27475 Includes addr32 prefix, does not include the one-byte modrm, opcode,
27476 or other prefixes. We never generate addr32 prefix for LEA insn. */
27477
27478 int
27479 memory_address_length (rtx addr, bool lea)
27480 {
27481 struct ix86_address parts;
27482 rtx base, index, disp;
27483 int len;
27484 int ok;
27485
27486 if (GET_CODE (addr) == PRE_DEC
27487 || GET_CODE (addr) == POST_INC
27488 || GET_CODE (addr) == PRE_MODIFY
27489 || GET_CODE (addr) == POST_MODIFY)
27490 return 0;
27491
27492 ok = ix86_decompose_address (addr, &parts);
27493 gcc_assert (ok);
27494
27495 len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1;
27496
27497 /* If this is not LEA instruction, add the length of addr32 prefix. */
27498 if (TARGET_64BIT && !lea
27499 && (SImode_address_operand (addr, VOIDmode)
27500 || (parts.base && GET_MODE (parts.base) == SImode)
27501 || (parts.index && GET_MODE (parts.index) == SImode)))
27502 len++;
27503
27504 base = parts.base;
27505 index = parts.index;
27506 disp = parts.disp;
27507
27508 if (base && SUBREG_P (base))
27509 base = SUBREG_REG (base);
27510 if (index && SUBREG_P (index))
27511 index = SUBREG_REG (index);
27512
27513 gcc_assert (base == NULL_RTX || REG_P (base));
27514 gcc_assert (index == NULL_RTX || REG_P (index));
27515
27516 /* Rule of thumb:
27517 - esp as the base always wants an index,
27518 - ebp as the base always wants a displacement,
27519 - r12 as the base always wants an index,
27520 - r13 as the base always wants a displacement. */
27521
27522 /* Register Indirect. */
27523 if (base && !index && !disp)
27524 {
27525 /* esp (for its index) and ebp (for its displacement) need
27526 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
27527 code. */
27528 if (base == arg_pointer_rtx
27529 || base == frame_pointer_rtx
27530 || REGNO (base) == SP_REG
27531 || REGNO (base) == BP_REG
27532 || REGNO (base) == R12_REG
27533 || REGNO (base) == R13_REG)
27534 len++;
27535 }
27536
27537 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
27538 is not disp32, but disp32(%rip), so for disp32
27539 SIB byte is needed, unless print_operand_address
27540 optimizes it into disp32(%rip) or (%rip) is implied
27541 by UNSPEC. */
27542 else if (disp && !base && !index)
27543 {
27544 len += 4;
27545 if (rip_relative_addr_p (&parts))
27546 len++;
27547 }
27548 else
27549 {
27550 /* Find the length of the displacement constant. */
27551 if (disp)
27552 {
27553 if (base && satisfies_constraint_K (disp))
27554 len += 1;
27555 else
27556 len += 4;
27557 }
27558 /* ebp always wants a displacement. Similarly r13. */
27559 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
27560 len++;
27561
27562 /* An index requires the two-byte modrm form.... */
27563 if (index
27564 /* ...like esp (or r12), which always wants an index. */
27565 || base == arg_pointer_rtx
27566 || base == frame_pointer_rtx
27567 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
27568 len++;
27569 }
27570
27571 return len;
27572 }
27573
27574 /* Compute default value for "length_immediate" attribute. When SHORTFORM
27575 is set, expect that insn have 8bit immediate alternative. */
27576 int
27577 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
27578 {
27579 int len = 0;
27580 int i;
27581 extract_insn_cached (insn);
27582 for (i = recog_data.n_operands - 1; i >= 0; --i)
27583 if (CONSTANT_P (recog_data.operand[i]))
27584 {
27585 enum attr_mode mode = get_attr_mode (insn);
27586
27587 gcc_assert (!len);
27588 if (shortform && CONST_INT_P (recog_data.operand[i]))
27589 {
27590 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
27591 switch (mode)
27592 {
27593 case MODE_QI:
27594 len = 1;
27595 continue;
27596 case MODE_HI:
27597 ival = trunc_int_for_mode (ival, HImode);
27598 break;
27599 case MODE_SI:
27600 ival = trunc_int_for_mode (ival, SImode);
27601 break;
27602 default:
27603 break;
27604 }
27605 if (IN_RANGE (ival, -128, 127))
27606 {
27607 len = 1;
27608 continue;
27609 }
27610 }
27611 switch (mode)
27612 {
27613 case MODE_QI:
27614 len = 1;
27615 break;
27616 case MODE_HI:
27617 len = 2;
27618 break;
27619 case MODE_SI:
27620 len = 4;
27621 break;
27622 /* Immediates for DImode instructions are encoded
27623 as 32bit sign extended values. */
27624 case MODE_DI:
27625 len = 4;
27626 break;
27627 default:
27628 fatal_insn ("unknown insn mode", insn);
27629 }
27630 }
27631 return len;
27632 }
27633
27634 /* Compute default value for "length_address" attribute. */
27635 int
27636 ix86_attr_length_address_default (rtx_insn *insn)
27637 {
27638 int i;
27639
27640 if (get_attr_type (insn) == TYPE_LEA)
27641 {
27642 rtx set = PATTERN (insn), addr;
27643
27644 if (GET_CODE (set) == PARALLEL)
27645 set = XVECEXP (set, 0, 0);
27646
27647 gcc_assert (GET_CODE (set) == SET);
27648
27649 addr = SET_SRC (set);
27650
27651 return memory_address_length (addr, true);
27652 }
27653
27654 extract_insn_cached (insn);
27655 for (i = recog_data.n_operands - 1; i >= 0; --i)
27656 {
27657 rtx op = recog_data.operand[i];
27658 if (MEM_P (op))
27659 {
27660 constrain_operands_cached (insn, reload_completed);
27661 if (which_alternative != -1)
27662 {
27663 const char *constraints = recog_data.constraints[i];
27664 int alt = which_alternative;
27665
27666 while (*constraints == '=' || *constraints == '+')
27667 constraints++;
27668 while (alt-- > 0)
27669 while (*constraints++ != ',')
27670 ;
27671 /* Skip ignored operands. */
27672 if (*constraints == 'X')
27673 continue;
27674 }
27675
27676 int len = memory_address_length (XEXP (op, 0), false);
27677
27678 /* Account for segment prefix for non-default addr spaces. */
27679 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op)))
27680 len++;
27681
27682 return len;
27683 }
27684 }
27685 return 0;
27686 }
27687
27688 /* Compute default value for "length_vex" attribute. It includes
27689 2 or 3 byte VEX prefix and 1 opcode byte. */
27690
27691 int
27692 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
27693 bool has_vex_w)
27694 {
27695 int i;
27696
27697 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
27698 byte VEX prefix. */
27699 if (!has_0f_opcode || has_vex_w)
27700 return 3 + 1;
27701
27702 /* We can always use 2 byte VEX prefix in 32bit. */
27703 if (!TARGET_64BIT)
27704 return 2 + 1;
27705
27706 extract_insn_cached (insn);
27707
27708 for (i = recog_data.n_operands - 1; i >= 0; --i)
27709 if (REG_P (recog_data.operand[i]))
27710 {
27711 /* REX.W bit uses 3 byte VEX prefix. */
27712 if (GET_MODE (recog_data.operand[i]) == DImode
27713 && GENERAL_REG_P (recog_data.operand[i]))
27714 return 3 + 1;
27715 }
27716 else
27717 {
27718 /* REX.X or REX.B bits use 3 byte VEX prefix. */
27719 if (MEM_P (recog_data.operand[i])
27720 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
27721 return 3 + 1;
27722 }
27723
27724 return 2 + 1;
27725 }
27726 \f
27727 /* Return the maximum number of instructions a cpu can issue. */
27728
27729 static int
27730 ix86_issue_rate (void)
27731 {
27732 switch (ix86_tune)
27733 {
27734 case PROCESSOR_PENTIUM:
27735 case PROCESSOR_LAKEMONT:
27736 case PROCESSOR_BONNELL:
27737 case PROCESSOR_SILVERMONT:
27738 case PROCESSOR_KNL:
27739 case PROCESSOR_INTEL:
27740 case PROCESSOR_K6:
27741 case PROCESSOR_BTVER2:
27742 case PROCESSOR_PENTIUM4:
27743 case PROCESSOR_NOCONA:
27744 return 2;
27745
27746 case PROCESSOR_PENTIUMPRO:
27747 case PROCESSOR_ATHLON:
27748 case PROCESSOR_K8:
27749 case PROCESSOR_AMDFAM10:
27750 case PROCESSOR_GENERIC:
27751 case PROCESSOR_BTVER1:
27752 return 3;
27753
27754 case PROCESSOR_BDVER1:
27755 case PROCESSOR_BDVER2:
27756 case PROCESSOR_BDVER3:
27757 case PROCESSOR_BDVER4:
27758 case PROCESSOR_ZNVER1:
27759 case PROCESSOR_CORE2:
27760 case PROCESSOR_NEHALEM:
27761 case PROCESSOR_SANDYBRIDGE:
27762 case PROCESSOR_HASWELL:
27763 return 4;
27764
27765 default:
27766 return 1;
27767 }
27768 }
27769
27770 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
27771 by DEP_INSN and nothing set by DEP_INSN. */
27772
27773 static bool
27774 ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
27775 {
27776 rtx set, set2;
27777
27778 /* Simplify the test for uninteresting insns. */
27779 if (insn_type != TYPE_SETCC
27780 && insn_type != TYPE_ICMOV
27781 && insn_type != TYPE_FCMOV
27782 && insn_type != TYPE_IBR)
27783 return false;
27784
27785 if ((set = single_set (dep_insn)) != 0)
27786 {
27787 set = SET_DEST (set);
27788 set2 = NULL_RTX;
27789 }
27790 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
27791 && XVECLEN (PATTERN (dep_insn), 0) == 2
27792 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
27793 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
27794 {
27795 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
27796 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
27797 }
27798 else
27799 return false;
27800
27801 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
27802 return false;
27803
27804 /* This test is true if the dependent insn reads the flags but
27805 not any other potentially set register. */
27806 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
27807 return false;
27808
27809 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
27810 return false;
27811
27812 return true;
27813 }
27814
27815 /* Return true iff USE_INSN has a memory address with operands set by
27816 SET_INSN. */
27817
27818 bool
27819 ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
27820 {
27821 int i;
27822 extract_insn_cached (use_insn);
27823 for (i = recog_data.n_operands - 1; i >= 0; --i)
27824 if (MEM_P (recog_data.operand[i]))
27825 {
27826 rtx addr = XEXP (recog_data.operand[i], 0);
27827 return modified_in_p (addr, set_insn) != 0;
27828 }
27829 return false;
27830 }
27831
27832 /* Helper function for exact_store_load_dependency.
27833 Return true if addr is found in insn. */
27834 static bool
27835 exact_dependency_1 (rtx addr, rtx insn)
27836 {
27837 enum rtx_code code;
27838 const char *format_ptr;
27839 int i, j;
27840
27841 code = GET_CODE (insn);
27842 switch (code)
27843 {
27844 case MEM:
27845 if (rtx_equal_p (addr, insn))
27846 return true;
27847 break;
27848 case REG:
27849 CASE_CONST_ANY:
27850 case SYMBOL_REF:
27851 case CODE_LABEL:
27852 case PC:
27853 case CC0:
27854 case EXPR_LIST:
27855 return false;
27856 default:
27857 break;
27858 }
27859
27860 format_ptr = GET_RTX_FORMAT (code);
27861 for (i = 0; i < GET_RTX_LENGTH (code); i++)
27862 {
27863 switch (*format_ptr++)
27864 {
27865 case 'e':
27866 if (exact_dependency_1 (addr, XEXP (insn, i)))
27867 return true;
27868 break;
27869 case 'E':
27870 for (j = 0; j < XVECLEN (insn, i); j++)
27871 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
27872 return true;
27873 break;
27874 }
27875 }
27876 return false;
27877 }
27878
27879 /* Return true if there exists exact dependency for store & load, i.e.
27880 the same memory address is used in them. */
27881 static bool
27882 exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
27883 {
27884 rtx set1, set2;
27885
27886 set1 = single_set (store);
27887 if (!set1)
27888 return false;
27889 if (!MEM_P (SET_DEST (set1)))
27890 return false;
27891 set2 = single_set (load);
27892 if (!set2)
27893 return false;
27894 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
27895 return true;
27896 return false;
27897 }
27898
27899 static int
27900 ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
27901 {
27902 enum attr_type insn_type, dep_insn_type;
27903 enum attr_memory memory;
27904 rtx set, set2;
27905 int dep_insn_code_number;
27906
27907 /* Anti and output dependencies have zero cost on all CPUs. */
27908 if (REG_NOTE_KIND (link) != 0)
27909 return 0;
27910
27911 dep_insn_code_number = recog_memoized (dep_insn);
27912
27913 /* If we can't recognize the insns, we can't really do anything. */
27914 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
27915 return cost;
27916
27917 insn_type = get_attr_type (insn);
27918 dep_insn_type = get_attr_type (dep_insn);
27919
27920 switch (ix86_tune)
27921 {
27922 case PROCESSOR_PENTIUM:
27923 case PROCESSOR_LAKEMONT:
27924 /* Address Generation Interlock adds a cycle of latency. */
27925 if (insn_type == TYPE_LEA)
27926 {
27927 rtx addr = PATTERN (insn);
27928
27929 if (GET_CODE (addr) == PARALLEL)
27930 addr = XVECEXP (addr, 0, 0);
27931
27932 gcc_assert (GET_CODE (addr) == SET);
27933
27934 addr = SET_SRC (addr);
27935 if (modified_in_p (addr, dep_insn))
27936 cost += 1;
27937 }
27938 else if (ix86_agi_dependent (dep_insn, insn))
27939 cost += 1;
27940
27941 /* ??? Compares pair with jump/setcc. */
27942 if (ix86_flags_dependent (insn, dep_insn, insn_type))
27943 cost = 0;
27944
27945 /* Floating point stores require value to be ready one cycle earlier. */
27946 if (insn_type == TYPE_FMOV
27947 && get_attr_memory (insn) == MEMORY_STORE
27948 && !ix86_agi_dependent (dep_insn, insn))
27949 cost += 1;
27950 break;
27951
27952 case PROCESSOR_PENTIUMPRO:
27953 /* INT->FP conversion is expensive. */
27954 if (get_attr_fp_int_src (dep_insn))
27955 cost += 5;
27956
27957 /* There is one cycle extra latency between an FP op and a store. */
27958 if (insn_type == TYPE_FMOV
27959 && (set = single_set (dep_insn)) != NULL_RTX
27960 && (set2 = single_set (insn)) != NULL_RTX
27961 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
27962 && MEM_P (SET_DEST (set2)))
27963 cost += 1;
27964
27965 memory = get_attr_memory (insn);
27966
27967 /* Show ability of reorder buffer to hide latency of load by executing
27968 in parallel with previous instruction in case
27969 previous instruction is not needed to compute the address. */
27970 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
27971 && !ix86_agi_dependent (dep_insn, insn))
27972 {
27973 /* Claim moves to take one cycle, as core can issue one load
27974 at time and the next load can start cycle later. */
27975 if (dep_insn_type == TYPE_IMOV
27976 || dep_insn_type == TYPE_FMOV)
27977 cost = 1;
27978 else if (cost > 1)
27979 cost--;
27980 }
27981 break;
27982
27983 case PROCESSOR_K6:
27984 /* The esp dependency is resolved before
27985 the instruction is really finished. */
27986 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
27987 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
27988 return 1;
27989
27990 /* INT->FP conversion is expensive. */
27991 if (get_attr_fp_int_src (dep_insn))
27992 cost += 5;
27993
27994 memory = get_attr_memory (insn);
27995
27996 /* Show ability of reorder buffer to hide latency of load by executing
27997 in parallel with previous instruction in case
27998 previous instruction is not needed to compute the address. */
27999 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
28000 && !ix86_agi_dependent (dep_insn, insn))
28001 {
28002 /* Claim moves to take one cycle, as core can issue one load
28003 at time and the next load can start cycle later. */
28004 if (dep_insn_type == TYPE_IMOV
28005 || dep_insn_type == TYPE_FMOV)
28006 cost = 1;
28007 else if (cost > 2)
28008 cost -= 2;
28009 else
28010 cost = 1;
28011 }
28012 break;
28013
28014 case PROCESSOR_AMDFAM10:
28015 case PROCESSOR_BDVER1:
28016 case PROCESSOR_BDVER2:
28017 case PROCESSOR_BDVER3:
28018 case PROCESSOR_BDVER4:
28019 case PROCESSOR_ZNVER1:
28020 case PROCESSOR_BTVER1:
28021 case PROCESSOR_BTVER2:
28022 case PROCESSOR_GENERIC:
28023 /* Stack engine allows to execute push&pop instructions in parall. */
28024 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
28025 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
28026 return 0;
28027 /* FALLTHRU */
28028
28029 case PROCESSOR_ATHLON:
28030 case PROCESSOR_K8:
28031 memory = get_attr_memory (insn);
28032
28033 /* Show ability of reorder buffer to hide latency of load by executing
28034 in parallel with previous instruction in case
28035 previous instruction is not needed to compute the address. */
28036 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
28037 && !ix86_agi_dependent (dep_insn, insn))
28038 {
28039 enum attr_unit unit = get_attr_unit (insn);
28040 int loadcost = 3;
28041
28042 /* Because of the difference between the length of integer and
28043 floating unit pipeline preparation stages, the memory operands
28044 for floating point are cheaper.
28045
28046 ??? For Athlon it the difference is most probably 2. */
28047 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
28048 loadcost = 3;
28049 else
28050 loadcost = TARGET_ATHLON ? 2 : 0;
28051
28052 if (cost >= loadcost)
28053 cost -= loadcost;
28054 else
28055 cost = 0;
28056 }
28057 break;
28058
28059 case PROCESSOR_CORE2:
28060 case PROCESSOR_NEHALEM:
28061 case PROCESSOR_SANDYBRIDGE:
28062 case PROCESSOR_HASWELL:
28063 /* Stack engine allows to execute push&pop instructions in parall. */
28064 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
28065 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
28066 return 0;
28067
28068 memory = get_attr_memory (insn);
28069
28070 /* Show ability of reorder buffer to hide latency of load by executing
28071 in parallel with previous instruction in case
28072 previous instruction is not needed to compute the address. */
28073 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
28074 && !ix86_agi_dependent (dep_insn, insn))
28075 {
28076 if (cost >= 4)
28077 cost -= 4;
28078 else
28079 cost = 0;
28080 }
28081 break;
28082
28083 case PROCESSOR_SILVERMONT:
28084 case PROCESSOR_KNL:
28085 case PROCESSOR_INTEL:
28086 if (!reload_completed)
28087 return cost;
28088
28089 /* Increase cost of integer loads. */
28090 memory = get_attr_memory (dep_insn);
28091 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
28092 {
28093 enum attr_unit unit = get_attr_unit (dep_insn);
28094 if (unit == UNIT_INTEGER && cost == 1)
28095 {
28096 if (memory == MEMORY_LOAD)
28097 cost = 3;
28098 else
28099 {
28100 /* Increase cost of ld/st for short int types only
28101 because of store forwarding issue. */
28102 rtx set = single_set (dep_insn);
28103 if (set && (GET_MODE (SET_DEST (set)) == QImode
28104 || GET_MODE (SET_DEST (set)) == HImode))
28105 {
28106 /* Increase cost of store/load insn if exact
28107 dependence exists and it is load insn. */
28108 enum attr_memory insn_memory = get_attr_memory (insn);
28109 if (insn_memory == MEMORY_LOAD
28110 && exact_store_load_dependency (dep_insn, insn))
28111 cost = 3;
28112 }
28113 }
28114 }
28115 }
28116
28117 default:
28118 break;
28119 }
28120
28121 return cost;
28122 }
28123
28124 /* How many alternative schedules to try. This should be as wide as the
28125 scheduling freedom in the DFA, but no wider. Making this value too
28126 large results extra work for the scheduler. */
28127
28128 static int
28129 ia32_multipass_dfa_lookahead (void)
28130 {
28131 switch (ix86_tune)
28132 {
28133 case PROCESSOR_PENTIUM:
28134 case PROCESSOR_LAKEMONT:
28135 return 2;
28136
28137 case PROCESSOR_PENTIUMPRO:
28138 case PROCESSOR_K6:
28139 return 1;
28140
28141 case PROCESSOR_BDVER1:
28142 case PROCESSOR_BDVER2:
28143 case PROCESSOR_BDVER3:
28144 case PROCESSOR_BDVER4:
28145 /* We use lookahead value 4 for BD both before and after reload
28146 schedules. Plan is to have value 8 included for O3. */
28147 return 4;
28148
28149 case PROCESSOR_CORE2:
28150 case PROCESSOR_NEHALEM:
28151 case PROCESSOR_SANDYBRIDGE:
28152 case PROCESSOR_HASWELL:
28153 case PROCESSOR_BONNELL:
28154 case PROCESSOR_SILVERMONT:
28155 case PROCESSOR_KNL:
28156 case PROCESSOR_INTEL:
28157 /* Generally, we want haifa-sched:max_issue() to look ahead as far
28158 as many instructions can be executed on a cycle, i.e.,
28159 issue_rate. I wonder why tuning for many CPUs does not do this. */
28160 if (reload_completed)
28161 return ix86_issue_rate ();
28162 /* Don't use lookahead for pre-reload schedule to save compile time. */
28163 return 0;
28164
28165 default:
28166 return 0;
28167 }
28168 }
28169
28170 /* Return true if target platform supports macro-fusion. */
28171
28172 static bool
28173 ix86_macro_fusion_p ()
28174 {
28175 return TARGET_FUSE_CMP_AND_BRANCH;
28176 }
28177
28178 /* Check whether current microarchitecture support macro fusion
28179 for insn pair "CONDGEN + CONDJMP". Refer to
28180 "Intel Architectures Optimization Reference Manual". */
28181
28182 static bool
28183 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
28184 {
28185 rtx src, dest;
28186 enum rtx_code ccode;
28187 rtx compare_set = NULL_RTX, test_if, cond;
28188 rtx alu_set = NULL_RTX, addr = NULL_RTX;
28189
28190 if (!any_condjump_p (condjmp))
28191 return false;
28192
28193 if (get_attr_type (condgen) != TYPE_TEST
28194 && get_attr_type (condgen) != TYPE_ICMP
28195 && get_attr_type (condgen) != TYPE_INCDEC
28196 && get_attr_type (condgen) != TYPE_ALU)
28197 return false;
28198
28199 compare_set = single_set (condgen);
28200 if (compare_set == NULL_RTX
28201 && !TARGET_FUSE_ALU_AND_BRANCH)
28202 return false;
28203
28204 if (compare_set == NULL_RTX)
28205 {
28206 int i;
28207 rtx pat = PATTERN (condgen);
28208 for (i = 0; i < XVECLEN (pat, 0); i++)
28209 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
28210 {
28211 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
28212 if (GET_CODE (set_src) == COMPARE)
28213 compare_set = XVECEXP (pat, 0, i);
28214 else
28215 alu_set = XVECEXP (pat, 0, i);
28216 }
28217 }
28218 if (compare_set == NULL_RTX)
28219 return false;
28220 src = SET_SRC (compare_set);
28221 if (GET_CODE (src) != COMPARE)
28222 return false;
28223
28224 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
28225 supported. */
28226 if ((MEM_P (XEXP (src, 0))
28227 && CONST_INT_P (XEXP (src, 1)))
28228 || (MEM_P (XEXP (src, 1))
28229 && CONST_INT_P (XEXP (src, 0))))
28230 return false;
28231
28232 /* No fusion for RIP-relative address. */
28233 if (MEM_P (XEXP (src, 0)))
28234 addr = XEXP (XEXP (src, 0), 0);
28235 else if (MEM_P (XEXP (src, 1)))
28236 addr = XEXP (XEXP (src, 1), 0);
28237
28238 if (addr) {
28239 ix86_address parts;
28240 int ok = ix86_decompose_address (addr, &parts);
28241 gcc_assert (ok);
28242
28243 if (rip_relative_addr_p (&parts))
28244 return false;
28245 }
28246
28247 test_if = SET_SRC (pc_set (condjmp));
28248 cond = XEXP (test_if, 0);
28249 ccode = GET_CODE (cond);
28250 /* Check whether conditional jump use Sign or Overflow Flags. */
28251 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
28252 && (ccode == GE
28253 || ccode == GT
28254 || ccode == LE
28255 || ccode == LT))
28256 return false;
28257
28258 /* Return true for TYPE_TEST and TYPE_ICMP. */
28259 if (get_attr_type (condgen) == TYPE_TEST
28260 || get_attr_type (condgen) == TYPE_ICMP)
28261 return true;
28262
28263 /* The following is the case that macro-fusion for alu + jmp. */
28264 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
28265 return false;
28266
28267 /* No fusion for alu op with memory destination operand. */
28268 dest = SET_DEST (alu_set);
28269 if (MEM_P (dest))
28270 return false;
28271
28272 /* Macro-fusion for inc/dec + unsigned conditional jump is not
28273 supported. */
28274 if (get_attr_type (condgen) == TYPE_INCDEC
28275 && (ccode == GEU
28276 || ccode == GTU
28277 || ccode == LEU
28278 || ccode == LTU))
28279 return false;
28280
28281 return true;
28282 }
28283
28284 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
28285 execution. It is applied if
28286 (1) IMUL instruction is on the top of list;
28287 (2) There exists the only producer of independent IMUL instruction in
28288 ready list.
28289 Return index of IMUL producer if it was found and -1 otherwise. */
28290 static int
28291 do_reorder_for_imul (rtx_insn **ready, int n_ready)
28292 {
28293 rtx_insn *insn;
28294 rtx set, insn1, insn2;
28295 sd_iterator_def sd_it;
28296 dep_t dep;
28297 int index = -1;
28298 int i;
28299
28300 if (!TARGET_BONNELL)
28301 return index;
28302
28303 /* Check that IMUL instruction is on the top of ready list. */
28304 insn = ready[n_ready - 1];
28305 set = single_set (insn);
28306 if (!set)
28307 return index;
28308 if (!(GET_CODE (SET_SRC (set)) == MULT
28309 && GET_MODE (SET_SRC (set)) == SImode))
28310 return index;
28311
28312 /* Search for producer of independent IMUL instruction. */
28313 for (i = n_ready - 2; i >= 0; i--)
28314 {
28315 insn = ready[i];
28316 if (!NONDEBUG_INSN_P (insn))
28317 continue;
28318 /* Skip IMUL instruction. */
28319 insn2 = PATTERN (insn);
28320 if (GET_CODE (insn2) == PARALLEL)
28321 insn2 = XVECEXP (insn2, 0, 0);
28322 if (GET_CODE (insn2) == SET
28323 && GET_CODE (SET_SRC (insn2)) == MULT
28324 && GET_MODE (SET_SRC (insn2)) == SImode)
28325 continue;
28326
28327 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
28328 {
28329 rtx con;
28330 con = DEP_CON (dep);
28331 if (!NONDEBUG_INSN_P (con))
28332 continue;
28333 insn1 = PATTERN (con);
28334 if (GET_CODE (insn1) == PARALLEL)
28335 insn1 = XVECEXP (insn1, 0, 0);
28336
28337 if (GET_CODE (insn1) == SET
28338 && GET_CODE (SET_SRC (insn1)) == MULT
28339 && GET_MODE (SET_SRC (insn1)) == SImode)
28340 {
28341 sd_iterator_def sd_it1;
28342 dep_t dep1;
28343 /* Check if there is no other dependee for IMUL. */
28344 index = i;
28345 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
28346 {
28347 rtx pro;
28348 pro = DEP_PRO (dep1);
28349 if (!NONDEBUG_INSN_P (pro))
28350 continue;
28351 if (pro != insn)
28352 index = -1;
28353 }
28354 if (index >= 0)
28355 break;
28356 }
28357 }
28358 if (index >= 0)
28359 break;
28360 }
28361 return index;
28362 }
28363
28364 /* Try to find the best candidate on the top of ready list if two insns
28365 have the same priority - candidate is best if its dependees were
28366 scheduled earlier. Applied for Silvermont only.
28367 Return true if top 2 insns must be interchanged. */
28368 static bool
28369 swap_top_of_ready_list (rtx_insn **ready, int n_ready)
28370 {
28371 rtx_insn *top = ready[n_ready - 1];
28372 rtx_insn *next = ready[n_ready - 2];
28373 rtx set;
28374 sd_iterator_def sd_it;
28375 dep_t dep;
28376 int clock1 = -1;
28377 int clock2 = -1;
28378 #define INSN_TICK(INSN) (HID (INSN)->tick)
28379
28380 if (!TARGET_SILVERMONT && !TARGET_INTEL)
28381 return false;
28382
28383 if (!NONDEBUG_INSN_P (top))
28384 return false;
28385 if (!NONJUMP_INSN_P (top))
28386 return false;
28387 if (!NONDEBUG_INSN_P (next))
28388 return false;
28389 if (!NONJUMP_INSN_P (next))
28390 return false;
28391 set = single_set (top);
28392 if (!set)
28393 return false;
28394 set = single_set (next);
28395 if (!set)
28396 return false;
28397
28398 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
28399 {
28400 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
28401 return false;
28402 /* Determine winner more precise. */
28403 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
28404 {
28405 rtx pro;
28406 pro = DEP_PRO (dep);
28407 if (!NONDEBUG_INSN_P (pro))
28408 continue;
28409 if (INSN_TICK (pro) > clock1)
28410 clock1 = INSN_TICK (pro);
28411 }
28412 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
28413 {
28414 rtx pro;
28415 pro = DEP_PRO (dep);
28416 if (!NONDEBUG_INSN_P (pro))
28417 continue;
28418 if (INSN_TICK (pro) > clock2)
28419 clock2 = INSN_TICK (pro);
28420 }
28421
28422 if (clock1 == clock2)
28423 {
28424 /* Determine winner - load must win. */
28425 enum attr_memory memory1, memory2;
28426 memory1 = get_attr_memory (top);
28427 memory2 = get_attr_memory (next);
28428 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
28429 return true;
28430 }
28431 return (bool) (clock2 < clock1);
28432 }
28433 return false;
28434 #undef INSN_TICK
28435 }
28436
28437 /* Perform possible reodering of ready list for Atom/Silvermont only.
28438 Return issue rate. */
28439 static int
28440 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
28441 int *pn_ready, int clock_var)
28442 {
28443 int issue_rate = -1;
28444 int n_ready = *pn_ready;
28445 int i;
28446 rtx_insn *insn;
28447 int index = -1;
28448
28449 /* Set up issue rate. */
28450 issue_rate = ix86_issue_rate ();
28451
28452 /* Do reodering for BONNELL/SILVERMONT only. */
28453 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
28454 return issue_rate;
28455
28456 /* Nothing to do if ready list contains only 1 instruction. */
28457 if (n_ready <= 1)
28458 return issue_rate;
28459
28460 /* Do reodering for post-reload scheduler only. */
28461 if (!reload_completed)
28462 return issue_rate;
28463
28464 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
28465 {
28466 if (sched_verbose > 1)
28467 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
28468 INSN_UID (ready[index]));
28469
28470 /* Put IMUL producer (ready[index]) at the top of ready list. */
28471 insn = ready[index];
28472 for (i = index; i < n_ready - 1; i++)
28473 ready[i] = ready[i + 1];
28474 ready[n_ready - 1] = insn;
28475 return issue_rate;
28476 }
28477
28478 /* Skip selective scheduling since HID is not populated in it. */
28479 if (clock_var != 0
28480 && !sel_sched_p ()
28481 && swap_top_of_ready_list (ready, n_ready))
28482 {
28483 if (sched_verbose > 1)
28484 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
28485 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
28486 /* Swap 2 top elements of ready list. */
28487 insn = ready[n_ready - 1];
28488 ready[n_ready - 1] = ready[n_ready - 2];
28489 ready[n_ready - 2] = insn;
28490 }
28491 return issue_rate;
28492 }
28493
28494 static bool
28495 ix86_class_likely_spilled_p (reg_class_t);
28496
28497 /* Returns true if lhs of insn is HW function argument register and set up
28498 is_spilled to true if it is likely spilled HW register. */
28499 static bool
28500 insn_is_function_arg (rtx insn, bool* is_spilled)
28501 {
28502 rtx dst;
28503
28504 if (!NONDEBUG_INSN_P (insn))
28505 return false;
28506 /* Call instructions are not movable, ignore it. */
28507 if (CALL_P (insn))
28508 return false;
28509 insn = PATTERN (insn);
28510 if (GET_CODE (insn) == PARALLEL)
28511 insn = XVECEXP (insn, 0, 0);
28512 if (GET_CODE (insn) != SET)
28513 return false;
28514 dst = SET_DEST (insn);
28515 if (REG_P (dst) && HARD_REGISTER_P (dst)
28516 && ix86_function_arg_regno_p (REGNO (dst)))
28517 {
28518 /* Is it likely spilled HW register? */
28519 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
28520 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
28521 *is_spilled = true;
28522 return true;
28523 }
28524 return false;
28525 }
28526
28527 /* Add output dependencies for chain of function adjacent arguments if only
28528 there is a move to likely spilled HW register. Return first argument
28529 if at least one dependence was added or NULL otherwise. */
28530 static rtx_insn *
28531 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
28532 {
28533 rtx_insn *insn;
28534 rtx_insn *last = call;
28535 rtx_insn *first_arg = NULL;
28536 bool is_spilled = false;
28537
28538 head = PREV_INSN (head);
28539
28540 /* Find nearest to call argument passing instruction. */
28541 while (true)
28542 {
28543 last = PREV_INSN (last);
28544 if (last == head)
28545 return NULL;
28546 if (!NONDEBUG_INSN_P (last))
28547 continue;
28548 if (insn_is_function_arg (last, &is_spilled))
28549 break;
28550 return NULL;
28551 }
28552
28553 first_arg = last;
28554 while (true)
28555 {
28556 insn = PREV_INSN (last);
28557 if (!INSN_P (insn))
28558 break;
28559 if (insn == head)
28560 break;
28561 if (!NONDEBUG_INSN_P (insn))
28562 {
28563 last = insn;
28564 continue;
28565 }
28566 if (insn_is_function_arg (insn, &is_spilled))
28567 {
28568 /* Add output depdendence between two function arguments if chain
28569 of output arguments contains likely spilled HW registers. */
28570 if (is_spilled)
28571 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
28572 first_arg = last = insn;
28573 }
28574 else
28575 break;
28576 }
28577 if (!is_spilled)
28578 return NULL;
28579 return first_arg;
28580 }
28581
28582 /* Add output or anti dependency from insn to first_arg to restrict its code
28583 motion. */
28584 static void
28585 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
28586 {
28587 rtx set;
28588 rtx tmp;
28589
28590 /* Add anti dependencies for bounds stores. */
28591 if (INSN_P (insn)
28592 && GET_CODE (PATTERN (insn)) == PARALLEL
28593 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
28594 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_BNDSTX)
28595 {
28596 add_dependence (first_arg, insn, REG_DEP_ANTI);
28597 return;
28598 }
28599
28600 set = single_set (insn);
28601 if (!set)
28602 return;
28603 tmp = SET_DEST (set);
28604 if (REG_P (tmp))
28605 {
28606 /* Add output dependency to the first function argument. */
28607 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
28608 return;
28609 }
28610 /* Add anti dependency. */
28611 add_dependence (first_arg, insn, REG_DEP_ANTI);
28612 }
28613
28614 /* Avoid cross block motion of function argument through adding dependency
28615 from the first non-jump instruction in bb. */
28616 static void
28617 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
28618 {
28619 rtx_insn *insn = BB_END (bb);
28620
28621 while (insn)
28622 {
28623 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
28624 {
28625 rtx set = single_set (insn);
28626 if (set)
28627 {
28628 avoid_func_arg_motion (arg, insn);
28629 return;
28630 }
28631 }
28632 if (insn == BB_HEAD (bb))
28633 return;
28634 insn = PREV_INSN (insn);
28635 }
28636 }
28637
28638 /* Hook for pre-reload schedule - avoid motion of function arguments
28639 passed in likely spilled HW registers. */
28640 static void
28641 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
28642 {
28643 rtx_insn *insn;
28644 rtx_insn *first_arg = NULL;
28645 if (reload_completed)
28646 return;
28647 while (head != tail && DEBUG_INSN_P (head))
28648 head = NEXT_INSN (head);
28649 for (insn = tail; insn != head; insn = PREV_INSN (insn))
28650 if (INSN_P (insn) && CALL_P (insn))
28651 {
28652 first_arg = add_parameter_dependencies (insn, head);
28653 if (first_arg)
28654 {
28655 /* Add dependee for first argument to predecessors if only
28656 region contains more than one block. */
28657 basic_block bb = BLOCK_FOR_INSN (insn);
28658 int rgn = CONTAINING_RGN (bb->index);
28659 int nr_blks = RGN_NR_BLOCKS (rgn);
28660 /* Skip trivial regions and region head blocks that can have
28661 predecessors outside of region. */
28662 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
28663 {
28664 edge e;
28665 edge_iterator ei;
28666
28667 /* Regions are SCCs with the exception of selective
28668 scheduling with pipelining of outer blocks enabled.
28669 So also check that immediate predecessors of a non-head
28670 block are in the same region. */
28671 FOR_EACH_EDGE (e, ei, bb->preds)
28672 {
28673 /* Avoid creating of loop-carried dependencies through
28674 using topological ordering in the region. */
28675 if (rgn == CONTAINING_RGN (e->src->index)
28676 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
28677 add_dependee_for_func_arg (first_arg, e->src);
28678 }
28679 }
28680 insn = first_arg;
28681 if (insn == head)
28682 break;
28683 }
28684 }
28685 else if (first_arg)
28686 avoid_func_arg_motion (first_arg, insn);
28687 }
28688
28689 /* Hook for pre-reload schedule - set priority of moves from likely spilled
28690 HW registers to maximum, to schedule them at soon as possible. These are
28691 moves from function argument registers at the top of the function entry
28692 and moves from function return value registers after call. */
28693 static int
28694 ix86_adjust_priority (rtx_insn *insn, int priority)
28695 {
28696 rtx set;
28697
28698 if (reload_completed)
28699 return priority;
28700
28701 if (!NONDEBUG_INSN_P (insn))
28702 return priority;
28703
28704 set = single_set (insn);
28705 if (set)
28706 {
28707 rtx tmp = SET_SRC (set);
28708 if (REG_P (tmp)
28709 && HARD_REGISTER_P (tmp)
28710 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
28711 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
28712 return current_sched_info->sched_max_insns_priority;
28713 }
28714
28715 return priority;
28716 }
28717
28718 /* Model decoder of Core 2/i7.
28719 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
28720 track the instruction fetch block boundaries and make sure that long
28721 (9+ bytes) instructions are assigned to D0. */
28722
28723 /* Maximum length of an insn that can be handled by
28724 a secondary decoder unit. '8' for Core 2/i7. */
28725 static int core2i7_secondary_decoder_max_insn_size;
28726
28727 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
28728 '16' for Core 2/i7. */
28729 static int core2i7_ifetch_block_size;
28730
28731 /* Maximum number of instructions decoder can handle per cycle.
28732 '6' for Core 2/i7. */
28733 static int core2i7_ifetch_block_max_insns;
28734
28735 typedef struct ix86_first_cycle_multipass_data_ *
28736 ix86_first_cycle_multipass_data_t;
28737 typedef const struct ix86_first_cycle_multipass_data_ *
28738 const_ix86_first_cycle_multipass_data_t;
28739
28740 /* A variable to store target state across calls to max_issue within
28741 one cycle. */
28742 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
28743 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
28744
28745 /* Initialize DATA. */
28746 static void
28747 core2i7_first_cycle_multipass_init (void *_data)
28748 {
28749 ix86_first_cycle_multipass_data_t data
28750 = (ix86_first_cycle_multipass_data_t) _data;
28751
28752 data->ifetch_block_len = 0;
28753 data->ifetch_block_n_insns = 0;
28754 data->ready_try_change = NULL;
28755 data->ready_try_change_size = 0;
28756 }
28757
28758 /* Advancing the cycle; reset ifetch block counts. */
28759 static void
28760 core2i7_dfa_post_advance_cycle (void)
28761 {
28762 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
28763
28764 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
28765
28766 data->ifetch_block_len = 0;
28767 data->ifetch_block_n_insns = 0;
28768 }
28769
28770 static int min_insn_size (rtx_insn *);
28771
28772 /* Filter out insns from ready_try that the core will not be able to issue
28773 on current cycle due to decoder. */
28774 static void
28775 core2i7_first_cycle_multipass_filter_ready_try
28776 (const_ix86_first_cycle_multipass_data_t data,
28777 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
28778 {
28779 while (n_ready--)
28780 {
28781 rtx_insn *insn;
28782 int insn_size;
28783
28784 if (ready_try[n_ready])
28785 continue;
28786
28787 insn = get_ready_element (n_ready);
28788 insn_size = min_insn_size (insn);
28789
28790 if (/* If this is a too long an insn for a secondary decoder ... */
28791 (!first_cycle_insn_p
28792 && insn_size > core2i7_secondary_decoder_max_insn_size)
28793 /* ... or it would not fit into the ifetch block ... */
28794 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
28795 /* ... or the decoder is full already ... */
28796 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
28797 /* ... mask the insn out. */
28798 {
28799 ready_try[n_ready] = 1;
28800
28801 if (data->ready_try_change)
28802 bitmap_set_bit (data->ready_try_change, n_ready);
28803 }
28804 }
28805 }
28806
28807 /* Prepare for a new round of multipass lookahead scheduling. */
28808 static void
28809 core2i7_first_cycle_multipass_begin (void *_data,
28810 signed char *ready_try, int n_ready,
28811 bool first_cycle_insn_p)
28812 {
28813 ix86_first_cycle_multipass_data_t data
28814 = (ix86_first_cycle_multipass_data_t) _data;
28815 const_ix86_first_cycle_multipass_data_t prev_data
28816 = ix86_first_cycle_multipass_data;
28817
28818 /* Restore the state from the end of the previous round. */
28819 data->ifetch_block_len = prev_data->ifetch_block_len;
28820 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
28821
28822 /* Filter instructions that cannot be issued on current cycle due to
28823 decoder restrictions. */
28824 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
28825 first_cycle_insn_p);
28826 }
28827
28828 /* INSN is being issued in current solution. Account for its impact on
28829 the decoder model. */
28830 static void
28831 core2i7_first_cycle_multipass_issue (void *_data,
28832 signed char *ready_try, int n_ready,
28833 rtx_insn *insn, const void *_prev_data)
28834 {
28835 ix86_first_cycle_multipass_data_t data
28836 = (ix86_first_cycle_multipass_data_t) _data;
28837 const_ix86_first_cycle_multipass_data_t prev_data
28838 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
28839
28840 int insn_size = min_insn_size (insn);
28841
28842 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
28843 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
28844 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
28845 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
28846
28847 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
28848 if (!data->ready_try_change)
28849 {
28850 data->ready_try_change = sbitmap_alloc (n_ready);
28851 data->ready_try_change_size = n_ready;
28852 }
28853 else if (data->ready_try_change_size < n_ready)
28854 {
28855 data->ready_try_change = sbitmap_resize (data->ready_try_change,
28856 n_ready, 0);
28857 data->ready_try_change_size = n_ready;
28858 }
28859 bitmap_clear (data->ready_try_change);
28860
28861 /* Filter out insns from ready_try that the core will not be able to issue
28862 on current cycle due to decoder. */
28863 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
28864 false);
28865 }
28866
28867 /* Revert the effect on ready_try. */
28868 static void
28869 core2i7_first_cycle_multipass_backtrack (const void *_data,
28870 signed char *ready_try,
28871 int n_ready ATTRIBUTE_UNUSED)
28872 {
28873 const_ix86_first_cycle_multipass_data_t data
28874 = (const_ix86_first_cycle_multipass_data_t) _data;
28875 unsigned int i = 0;
28876 sbitmap_iterator sbi;
28877
28878 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
28879 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
28880 {
28881 ready_try[i] = 0;
28882 }
28883 }
28884
28885 /* Save the result of multipass lookahead scheduling for the next round. */
28886 static void
28887 core2i7_first_cycle_multipass_end (const void *_data)
28888 {
28889 const_ix86_first_cycle_multipass_data_t data
28890 = (const_ix86_first_cycle_multipass_data_t) _data;
28891 ix86_first_cycle_multipass_data_t next_data
28892 = ix86_first_cycle_multipass_data;
28893
28894 if (data != NULL)
28895 {
28896 next_data->ifetch_block_len = data->ifetch_block_len;
28897 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
28898 }
28899 }
28900
28901 /* Deallocate target data. */
28902 static void
28903 core2i7_first_cycle_multipass_fini (void *_data)
28904 {
28905 ix86_first_cycle_multipass_data_t data
28906 = (ix86_first_cycle_multipass_data_t) _data;
28907
28908 if (data->ready_try_change)
28909 {
28910 sbitmap_free (data->ready_try_change);
28911 data->ready_try_change = NULL;
28912 data->ready_try_change_size = 0;
28913 }
28914 }
28915
28916 /* Prepare for scheduling pass. */
28917 static void
28918 ix86_sched_init_global (FILE *, int, int)
28919 {
28920 /* Install scheduling hooks for current CPU. Some of these hooks are used
28921 in time-critical parts of the scheduler, so we only set them up when
28922 they are actually used. */
28923 switch (ix86_tune)
28924 {
28925 case PROCESSOR_CORE2:
28926 case PROCESSOR_NEHALEM:
28927 case PROCESSOR_SANDYBRIDGE:
28928 case PROCESSOR_HASWELL:
28929 /* Do not perform multipass scheduling for pre-reload schedule
28930 to save compile time. */
28931 if (reload_completed)
28932 {
28933 targetm.sched.dfa_post_advance_cycle
28934 = core2i7_dfa_post_advance_cycle;
28935 targetm.sched.first_cycle_multipass_init
28936 = core2i7_first_cycle_multipass_init;
28937 targetm.sched.first_cycle_multipass_begin
28938 = core2i7_first_cycle_multipass_begin;
28939 targetm.sched.first_cycle_multipass_issue
28940 = core2i7_first_cycle_multipass_issue;
28941 targetm.sched.first_cycle_multipass_backtrack
28942 = core2i7_first_cycle_multipass_backtrack;
28943 targetm.sched.first_cycle_multipass_end
28944 = core2i7_first_cycle_multipass_end;
28945 targetm.sched.first_cycle_multipass_fini
28946 = core2i7_first_cycle_multipass_fini;
28947
28948 /* Set decoder parameters. */
28949 core2i7_secondary_decoder_max_insn_size = 8;
28950 core2i7_ifetch_block_size = 16;
28951 core2i7_ifetch_block_max_insns = 6;
28952 break;
28953 }
28954 /* ... Fall through ... */
28955 default:
28956 targetm.sched.dfa_post_advance_cycle = NULL;
28957 targetm.sched.first_cycle_multipass_init = NULL;
28958 targetm.sched.first_cycle_multipass_begin = NULL;
28959 targetm.sched.first_cycle_multipass_issue = NULL;
28960 targetm.sched.first_cycle_multipass_backtrack = NULL;
28961 targetm.sched.first_cycle_multipass_end = NULL;
28962 targetm.sched.first_cycle_multipass_fini = NULL;
28963 break;
28964 }
28965 }
28966
28967 \f
28968 /* Compute the alignment given to a constant that is being placed in memory.
28969 EXP is the constant and ALIGN is the alignment that the object would
28970 ordinarily have.
28971 The value of this function is used instead of that alignment to align
28972 the object. */
28973
28974 int
28975 ix86_constant_alignment (tree exp, int align)
28976 {
28977 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
28978 || TREE_CODE (exp) == INTEGER_CST)
28979 {
28980 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
28981 return 64;
28982 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
28983 return 128;
28984 }
28985 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
28986 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
28987 return BITS_PER_WORD;
28988
28989 return align;
28990 }
28991
28992 /* Compute the alignment for a variable for Intel MCU psABI. TYPE is
28993 the data type, and ALIGN is the alignment that the object would
28994 ordinarily have. */
28995
28996 static int
28997 iamcu_alignment (tree type, int align)
28998 {
28999 enum machine_mode mode;
29000
29001 if (align < 32 || TYPE_USER_ALIGN (type))
29002 return align;
29003
29004 /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
29005 bytes. */
29006 mode = TYPE_MODE (strip_array_types (type));
29007 switch (GET_MODE_CLASS (mode))
29008 {
29009 case MODE_INT:
29010 case MODE_COMPLEX_INT:
29011 case MODE_COMPLEX_FLOAT:
29012 case MODE_FLOAT:
29013 case MODE_DECIMAL_FLOAT:
29014 return 32;
29015 default:
29016 return align;
29017 }
29018 }
29019
29020 /* Compute the alignment for a static variable.
29021 TYPE is the data type, and ALIGN is the alignment that
29022 the object would ordinarily have. The value of this function is used
29023 instead of that alignment to align the object. */
29024
29025 int
29026 ix86_data_alignment (tree type, int align, bool opt)
29027 {
29028 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
29029 for symbols from other compilation units or symbols that don't need
29030 to bind locally. In order to preserve some ABI compatibility with
29031 those compilers, ensure we don't decrease alignment from what we
29032 used to assume. */
29033
29034 int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
29035
29036 /* A data structure, equal or greater than the size of a cache line
29037 (64 bytes in the Pentium 4 and other recent Intel processors, including
29038 processors based on Intel Core microarchitecture) should be aligned
29039 so that its base address is a multiple of a cache line size. */
29040
29041 int max_align
29042 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
29043
29044 if (max_align < BITS_PER_WORD)
29045 max_align = BITS_PER_WORD;
29046
29047 switch (ix86_align_data_type)
29048 {
29049 case ix86_align_data_type_abi: opt = false; break;
29050 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
29051 case ix86_align_data_type_cacheline: break;
29052 }
29053
29054 if (TARGET_IAMCU)
29055 align = iamcu_alignment (type, align);
29056
29057 if (opt
29058 && AGGREGATE_TYPE_P (type)
29059 && TYPE_SIZE (type)
29060 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
29061 {
29062 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
29063 && align < max_align_compat)
29064 align = max_align_compat;
29065 if (wi::geu_p (TYPE_SIZE (type), max_align)
29066 && align < max_align)
29067 align = max_align;
29068 }
29069
29070 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
29071 to 16byte boundary. */
29072 if (TARGET_64BIT)
29073 {
29074 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
29075 && TYPE_SIZE (type)
29076 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
29077 && wi::geu_p (TYPE_SIZE (type), 128)
29078 && align < 128)
29079 return 128;
29080 }
29081
29082 if (!opt)
29083 return align;
29084
29085 if (TREE_CODE (type) == ARRAY_TYPE)
29086 {
29087 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
29088 return 64;
29089 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
29090 return 128;
29091 }
29092 else if (TREE_CODE (type) == COMPLEX_TYPE)
29093 {
29094
29095 if (TYPE_MODE (type) == DCmode && align < 64)
29096 return 64;
29097 if ((TYPE_MODE (type) == XCmode
29098 || TYPE_MODE (type) == TCmode) && align < 128)
29099 return 128;
29100 }
29101 else if ((TREE_CODE (type) == RECORD_TYPE
29102 || TREE_CODE (type) == UNION_TYPE
29103 || TREE_CODE (type) == QUAL_UNION_TYPE)
29104 && TYPE_FIELDS (type))
29105 {
29106 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
29107 return 64;
29108 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
29109 return 128;
29110 }
29111 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
29112 || TREE_CODE (type) == INTEGER_TYPE)
29113 {
29114 if (TYPE_MODE (type) == DFmode && align < 64)
29115 return 64;
29116 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
29117 return 128;
29118 }
29119
29120 return align;
29121 }
29122
29123 /* Compute the alignment for a local variable or a stack slot. EXP is
29124 the data type or decl itself, MODE is the widest mode available and
29125 ALIGN is the alignment that the object would ordinarily have. The
29126 value of this macro is used instead of that alignment to align the
29127 object. */
29128
29129 unsigned int
29130 ix86_local_alignment (tree exp, machine_mode mode,
29131 unsigned int align)
29132 {
29133 tree type, decl;
29134
29135 if (exp && DECL_P (exp))
29136 {
29137 type = TREE_TYPE (exp);
29138 decl = exp;
29139 }
29140 else
29141 {
29142 type = exp;
29143 decl = NULL;
29144 }
29145
29146 /* Don't do dynamic stack realignment for long long objects with
29147 -mpreferred-stack-boundary=2. */
29148 if (!TARGET_64BIT
29149 && align == 64
29150 && ix86_preferred_stack_boundary < 64
29151 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
29152 && (!type || !TYPE_USER_ALIGN (type))
29153 && (!decl || !DECL_USER_ALIGN (decl)))
29154 align = 32;
29155
29156 /* If TYPE is NULL, we are allocating a stack slot for caller-save
29157 register in MODE. We will return the largest alignment of XF
29158 and DF. */
29159 if (!type)
29160 {
29161 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
29162 align = GET_MODE_ALIGNMENT (DFmode);
29163 return align;
29164 }
29165
29166 /* Don't increase alignment for Intel MCU psABI. */
29167 if (TARGET_IAMCU)
29168 return align;
29169
29170 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
29171 to 16byte boundary. Exact wording is:
29172
29173 An array uses the same alignment as its elements, except that a local or
29174 global array variable of length at least 16 bytes or
29175 a C99 variable-length array variable always has alignment of at least 16 bytes.
29176
29177 This was added to allow use of aligned SSE instructions at arrays. This
29178 rule is meant for static storage (where compiler can not do the analysis
29179 by itself). We follow it for automatic variables only when convenient.
29180 We fully control everything in the function compiled and functions from
29181 other unit can not rely on the alignment.
29182
29183 Exclude va_list type. It is the common case of local array where
29184 we can not benefit from the alignment.
29185
29186 TODO: Probably one should optimize for size only when var is not escaping. */
29187 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
29188 && TARGET_SSE)
29189 {
29190 if (AGGREGATE_TYPE_P (type)
29191 && (va_list_type_node == NULL_TREE
29192 || (TYPE_MAIN_VARIANT (type)
29193 != TYPE_MAIN_VARIANT (va_list_type_node)))
29194 && TYPE_SIZE (type)
29195 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
29196 && wi::geu_p (TYPE_SIZE (type), 16)
29197 && align < 128)
29198 return 128;
29199 }
29200 if (TREE_CODE (type) == ARRAY_TYPE)
29201 {
29202 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
29203 return 64;
29204 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
29205 return 128;
29206 }
29207 else if (TREE_CODE (type) == COMPLEX_TYPE)
29208 {
29209 if (TYPE_MODE (type) == DCmode && align < 64)
29210 return 64;
29211 if ((TYPE_MODE (type) == XCmode
29212 || TYPE_MODE (type) == TCmode) && align < 128)
29213 return 128;
29214 }
29215 else if ((TREE_CODE (type) == RECORD_TYPE
29216 || TREE_CODE (type) == UNION_TYPE
29217 || TREE_CODE (type) == QUAL_UNION_TYPE)
29218 && TYPE_FIELDS (type))
29219 {
29220 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
29221 return 64;
29222 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
29223 return 128;
29224 }
29225 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
29226 || TREE_CODE (type) == INTEGER_TYPE)
29227 {
29228
29229 if (TYPE_MODE (type) == DFmode && align < 64)
29230 return 64;
29231 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
29232 return 128;
29233 }
29234 return align;
29235 }
29236
29237 /* Compute the minimum required alignment for dynamic stack realignment
29238 purposes for a local variable, parameter or a stack slot. EXP is
29239 the data type or decl itself, MODE is its mode and ALIGN is the
29240 alignment that the object would ordinarily have. */
29241
29242 unsigned int
29243 ix86_minimum_alignment (tree exp, machine_mode mode,
29244 unsigned int align)
29245 {
29246 tree type, decl;
29247
29248 if (exp && DECL_P (exp))
29249 {
29250 type = TREE_TYPE (exp);
29251 decl = exp;
29252 }
29253 else
29254 {
29255 type = exp;
29256 decl = NULL;
29257 }
29258
29259 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
29260 return align;
29261
29262 /* Don't do dynamic stack realignment for long long objects with
29263 -mpreferred-stack-boundary=2. */
29264 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
29265 && (!type || !TYPE_USER_ALIGN (type))
29266 && (!decl || !DECL_USER_ALIGN (decl)))
29267 return 32;
29268
29269 return align;
29270 }
29271 \f
29272 /* Find a location for the static chain incoming to a nested function.
29273 This is a register, unless all free registers are used by arguments. */
29274
29275 static rtx
29276 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
29277 {
29278 unsigned regno;
29279
29280 /* While this function won't be called by the middle-end when a static
29281 chain isn't needed, it's also used throughout the backend so it's
29282 easiest to keep this check centralized. */
29283 if (DECL_P (fndecl_or_type) && !DECL_STATIC_CHAIN (fndecl_or_type))
29284 return NULL;
29285
29286 if (TARGET_64BIT)
29287 {
29288 /* We always use R10 in 64-bit mode. */
29289 regno = R10_REG;
29290 }
29291 else
29292 {
29293 const_tree fntype, fndecl;
29294 unsigned int ccvt;
29295
29296 /* By default in 32-bit mode we use ECX to pass the static chain. */
29297 regno = CX_REG;
29298
29299 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
29300 {
29301 fntype = TREE_TYPE (fndecl_or_type);
29302 fndecl = fndecl_or_type;
29303 }
29304 else
29305 {
29306 fntype = fndecl_or_type;
29307 fndecl = NULL;
29308 }
29309
29310 ccvt = ix86_get_callcvt (fntype);
29311 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
29312 {
29313 /* Fastcall functions use ecx/edx for arguments, which leaves
29314 us with EAX for the static chain.
29315 Thiscall functions use ecx for arguments, which also
29316 leaves us with EAX for the static chain. */
29317 regno = AX_REG;
29318 }
29319 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
29320 {
29321 /* Thiscall functions use ecx for arguments, which leaves
29322 us with EAX and EDX for the static chain.
29323 We are using for abi-compatibility EAX. */
29324 regno = AX_REG;
29325 }
29326 else if (ix86_function_regparm (fntype, fndecl) == 3)
29327 {
29328 /* For regparm 3, we have no free call-clobbered registers in
29329 which to store the static chain. In order to implement this,
29330 we have the trampoline push the static chain to the stack.
29331 However, we can't push a value below the return address when
29332 we call the nested function directly, so we have to use an
29333 alternate entry point. For this we use ESI, and have the
29334 alternate entry point push ESI, so that things appear the
29335 same once we're executing the nested function. */
29336 if (incoming_p)
29337 {
29338 if (fndecl == current_function_decl)
29339 ix86_static_chain_on_stack = true;
29340 return gen_frame_mem (SImode,
29341 plus_constant (Pmode,
29342 arg_pointer_rtx, -8));
29343 }
29344 regno = SI_REG;
29345 }
29346 }
29347
29348 return gen_rtx_REG (Pmode, regno);
29349 }
29350
29351 /* Emit RTL insns to initialize the variable parts of a trampoline.
29352 FNDECL is the decl of the target address; M_TRAMP is a MEM for
29353 the trampoline, and CHAIN_VALUE is an RTX for the static chain
29354 to be passed to the target function. */
29355
29356 static void
29357 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
29358 {
29359 rtx mem, fnaddr;
29360 int opcode;
29361 int offset = 0;
29362
29363 fnaddr = XEXP (DECL_RTL (fndecl), 0);
29364
29365 if (TARGET_64BIT)
29366 {
29367 int size;
29368
29369 /* Load the function address to r11. Try to load address using
29370 the shorter movl instead of movabs. We may want to support
29371 movq for kernel mode, but kernel does not use trampolines at
29372 the moment. FNADDR is a 32bit address and may not be in
29373 DImode when ptr_mode == SImode. Always use movl in this
29374 case. */
29375 if (ptr_mode == SImode
29376 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
29377 {
29378 fnaddr = copy_addr_to_reg (fnaddr);
29379
29380 mem = adjust_address (m_tramp, HImode, offset);
29381 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
29382
29383 mem = adjust_address (m_tramp, SImode, offset + 2);
29384 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
29385 offset += 6;
29386 }
29387 else
29388 {
29389 mem = adjust_address (m_tramp, HImode, offset);
29390 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
29391
29392 mem = adjust_address (m_tramp, DImode, offset + 2);
29393 emit_move_insn (mem, fnaddr);
29394 offset += 10;
29395 }
29396
29397 /* Load static chain using movabs to r10. Use the shorter movl
29398 instead of movabs when ptr_mode == SImode. */
29399 if (ptr_mode == SImode)
29400 {
29401 opcode = 0xba41;
29402 size = 6;
29403 }
29404 else
29405 {
29406 opcode = 0xba49;
29407 size = 10;
29408 }
29409
29410 mem = adjust_address (m_tramp, HImode, offset);
29411 emit_move_insn (mem, gen_int_mode (opcode, HImode));
29412
29413 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
29414 emit_move_insn (mem, chain_value);
29415 offset += size;
29416
29417 /* Jump to r11; the last (unused) byte is a nop, only there to
29418 pad the write out to a single 32-bit store. */
29419 mem = adjust_address (m_tramp, SImode, offset);
29420 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
29421 offset += 4;
29422 }
29423 else
29424 {
29425 rtx disp, chain;
29426
29427 /* Depending on the static chain location, either load a register
29428 with a constant, or push the constant to the stack. All of the
29429 instructions are the same size. */
29430 chain = ix86_static_chain (fndecl, true);
29431 if (REG_P (chain))
29432 {
29433 switch (REGNO (chain))
29434 {
29435 case AX_REG:
29436 opcode = 0xb8; break;
29437 case CX_REG:
29438 opcode = 0xb9; break;
29439 default:
29440 gcc_unreachable ();
29441 }
29442 }
29443 else
29444 opcode = 0x68;
29445
29446 mem = adjust_address (m_tramp, QImode, offset);
29447 emit_move_insn (mem, gen_int_mode (opcode, QImode));
29448
29449 mem = adjust_address (m_tramp, SImode, offset + 1);
29450 emit_move_insn (mem, chain_value);
29451 offset += 5;
29452
29453 mem = adjust_address (m_tramp, QImode, offset);
29454 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
29455
29456 mem = adjust_address (m_tramp, SImode, offset + 1);
29457
29458 /* Compute offset from the end of the jmp to the target function.
29459 In the case in which the trampoline stores the static chain on
29460 the stack, we need to skip the first insn which pushes the
29461 (call-saved) register static chain; this push is 1 byte. */
29462 offset += 5;
29463 disp = expand_binop (SImode, sub_optab, fnaddr,
29464 plus_constant (Pmode, XEXP (m_tramp, 0),
29465 offset - (MEM_P (chain) ? 1 : 0)),
29466 NULL_RTX, 1, OPTAB_DIRECT);
29467 emit_move_insn (mem, disp);
29468 }
29469
29470 gcc_assert (offset <= TRAMPOLINE_SIZE);
29471
29472 #ifdef HAVE_ENABLE_EXECUTE_STACK
29473 #ifdef CHECK_EXECUTE_STACK_ENABLED
29474 if (CHECK_EXECUTE_STACK_ENABLED)
29475 #endif
29476 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
29477 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
29478 #endif
29479 }
29480 \f
29481 /* The following file contains several enumerations and data structures
29482 built from the definitions in i386-builtin-types.def. */
29483
29484 #include "i386-builtin-types.inc"
29485
29486 /* Table for the ix86 builtin non-function types. */
29487 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
29488
29489 /* Retrieve an element from the above table, building some of
29490 the types lazily. */
29491
29492 static tree
29493 ix86_get_builtin_type (enum ix86_builtin_type tcode)
29494 {
29495 unsigned int index;
29496 tree type, itype;
29497
29498 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
29499
29500 type = ix86_builtin_type_tab[(int) tcode];
29501 if (type != NULL)
29502 return type;
29503
29504 gcc_assert (tcode > IX86_BT_LAST_PRIM);
29505 if (tcode <= IX86_BT_LAST_VECT)
29506 {
29507 machine_mode mode;
29508
29509 index = tcode - IX86_BT_LAST_PRIM - 1;
29510 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
29511 mode = ix86_builtin_type_vect_mode[index];
29512
29513 type = build_vector_type_for_mode (itype, mode);
29514 }
29515 else
29516 {
29517 int quals;
29518
29519 index = tcode - IX86_BT_LAST_VECT - 1;
29520 if (tcode <= IX86_BT_LAST_PTR)
29521 quals = TYPE_UNQUALIFIED;
29522 else
29523 quals = TYPE_QUAL_CONST;
29524
29525 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
29526 if (quals != TYPE_UNQUALIFIED)
29527 itype = build_qualified_type (itype, quals);
29528
29529 type = build_pointer_type (itype);
29530 }
29531
29532 ix86_builtin_type_tab[(int) tcode] = type;
29533 return type;
29534 }
29535
29536 /* Table for the ix86 builtin function types. */
29537 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
29538
29539 /* Retrieve an element from the above table, building some of
29540 the types lazily. */
29541
29542 static tree
29543 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
29544 {
29545 tree type;
29546
29547 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
29548
29549 type = ix86_builtin_func_type_tab[(int) tcode];
29550 if (type != NULL)
29551 return type;
29552
29553 if (tcode <= IX86_BT_LAST_FUNC)
29554 {
29555 unsigned start = ix86_builtin_func_start[(int) tcode];
29556 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
29557 tree rtype, atype, args = void_list_node;
29558 unsigned i;
29559
29560 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
29561 for (i = after - 1; i > start; --i)
29562 {
29563 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
29564 args = tree_cons (NULL, atype, args);
29565 }
29566
29567 type = build_function_type (rtype, args);
29568 }
29569 else
29570 {
29571 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
29572 enum ix86_builtin_func_type icode;
29573
29574 icode = ix86_builtin_func_alias_base[index];
29575 type = ix86_get_builtin_func_type (icode);
29576 }
29577
29578 ix86_builtin_func_type_tab[(int) tcode] = type;
29579 return type;
29580 }
29581
29582
29583 /* Codes for all the SSE/MMX builtins. */
29584 enum ix86_builtins
29585 {
29586 IX86_BUILTIN_ADDPS,
29587 IX86_BUILTIN_ADDSS,
29588 IX86_BUILTIN_DIVPS,
29589 IX86_BUILTIN_DIVSS,
29590 IX86_BUILTIN_MULPS,
29591 IX86_BUILTIN_MULSS,
29592 IX86_BUILTIN_SUBPS,
29593 IX86_BUILTIN_SUBSS,
29594
29595 IX86_BUILTIN_CMPEQPS,
29596 IX86_BUILTIN_CMPLTPS,
29597 IX86_BUILTIN_CMPLEPS,
29598 IX86_BUILTIN_CMPGTPS,
29599 IX86_BUILTIN_CMPGEPS,
29600 IX86_BUILTIN_CMPNEQPS,
29601 IX86_BUILTIN_CMPNLTPS,
29602 IX86_BUILTIN_CMPNLEPS,
29603 IX86_BUILTIN_CMPNGTPS,
29604 IX86_BUILTIN_CMPNGEPS,
29605 IX86_BUILTIN_CMPORDPS,
29606 IX86_BUILTIN_CMPUNORDPS,
29607 IX86_BUILTIN_CMPEQSS,
29608 IX86_BUILTIN_CMPLTSS,
29609 IX86_BUILTIN_CMPLESS,
29610 IX86_BUILTIN_CMPNEQSS,
29611 IX86_BUILTIN_CMPNLTSS,
29612 IX86_BUILTIN_CMPNLESS,
29613 IX86_BUILTIN_CMPORDSS,
29614 IX86_BUILTIN_CMPUNORDSS,
29615
29616 IX86_BUILTIN_COMIEQSS,
29617 IX86_BUILTIN_COMILTSS,
29618 IX86_BUILTIN_COMILESS,
29619 IX86_BUILTIN_COMIGTSS,
29620 IX86_BUILTIN_COMIGESS,
29621 IX86_BUILTIN_COMINEQSS,
29622 IX86_BUILTIN_UCOMIEQSS,
29623 IX86_BUILTIN_UCOMILTSS,
29624 IX86_BUILTIN_UCOMILESS,
29625 IX86_BUILTIN_UCOMIGTSS,
29626 IX86_BUILTIN_UCOMIGESS,
29627 IX86_BUILTIN_UCOMINEQSS,
29628
29629 IX86_BUILTIN_CVTPI2PS,
29630 IX86_BUILTIN_CVTPS2PI,
29631 IX86_BUILTIN_CVTSI2SS,
29632 IX86_BUILTIN_CVTSI642SS,
29633 IX86_BUILTIN_CVTSS2SI,
29634 IX86_BUILTIN_CVTSS2SI64,
29635 IX86_BUILTIN_CVTTPS2PI,
29636 IX86_BUILTIN_CVTTSS2SI,
29637 IX86_BUILTIN_CVTTSS2SI64,
29638
29639 IX86_BUILTIN_MAXPS,
29640 IX86_BUILTIN_MAXSS,
29641 IX86_BUILTIN_MINPS,
29642 IX86_BUILTIN_MINSS,
29643
29644 IX86_BUILTIN_LOADUPS,
29645 IX86_BUILTIN_STOREUPS,
29646 IX86_BUILTIN_MOVSS,
29647
29648 IX86_BUILTIN_MOVHLPS,
29649 IX86_BUILTIN_MOVLHPS,
29650 IX86_BUILTIN_LOADHPS,
29651 IX86_BUILTIN_LOADLPS,
29652 IX86_BUILTIN_STOREHPS,
29653 IX86_BUILTIN_STORELPS,
29654
29655 IX86_BUILTIN_MASKMOVQ,
29656 IX86_BUILTIN_MOVMSKPS,
29657 IX86_BUILTIN_PMOVMSKB,
29658
29659 IX86_BUILTIN_MOVNTPS,
29660 IX86_BUILTIN_MOVNTQ,
29661
29662 IX86_BUILTIN_LOADDQU,
29663 IX86_BUILTIN_STOREDQU,
29664
29665 IX86_BUILTIN_PACKSSWB,
29666 IX86_BUILTIN_PACKSSDW,
29667 IX86_BUILTIN_PACKUSWB,
29668
29669 IX86_BUILTIN_PADDB,
29670 IX86_BUILTIN_PADDW,
29671 IX86_BUILTIN_PADDD,
29672 IX86_BUILTIN_PADDQ,
29673 IX86_BUILTIN_PADDSB,
29674 IX86_BUILTIN_PADDSW,
29675 IX86_BUILTIN_PADDUSB,
29676 IX86_BUILTIN_PADDUSW,
29677 IX86_BUILTIN_PSUBB,
29678 IX86_BUILTIN_PSUBW,
29679 IX86_BUILTIN_PSUBD,
29680 IX86_BUILTIN_PSUBQ,
29681 IX86_BUILTIN_PSUBSB,
29682 IX86_BUILTIN_PSUBSW,
29683 IX86_BUILTIN_PSUBUSB,
29684 IX86_BUILTIN_PSUBUSW,
29685
29686 IX86_BUILTIN_PAND,
29687 IX86_BUILTIN_PANDN,
29688 IX86_BUILTIN_POR,
29689 IX86_BUILTIN_PXOR,
29690
29691 IX86_BUILTIN_PAVGB,
29692 IX86_BUILTIN_PAVGW,
29693
29694 IX86_BUILTIN_PCMPEQB,
29695 IX86_BUILTIN_PCMPEQW,
29696 IX86_BUILTIN_PCMPEQD,
29697 IX86_BUILTIN_PCMPGTB,
29698 IX86_BUILTIN_PCMPGTW,
29699 IX86_BUILTIN_PCMPGTD,
29700
29701 IX86_BUILTIN_PMADDWD,
29702
29703 IX86_BUILTIN_PMAXSW,
29704 IX86_BUILTIN_PMAXUB,
29705 IX86_BUILTIN_PMINSW,
29706 IX86_BUILTIN_PMINUB,
29707
29708 IX86_BUILTIN_PMULHUW,
29709 IX86_BUILTIN_PMULHW,
29710 IX86_BUILTIN_PMULLW,
29711
29712 IX86_BUILTIN_PSADBW,
29713 IX86_BUILTIN_PSHUFW,
29714
29715 IX86_BUILTIN_PSLLW,
29716 IX86_BUILTIN_PSLLD,
29717 IX86_BUILTIN_PSLLQ,
29718 IX86_BUILTIN_PSRAW,
29719 IX86_BUILTIN_PSRAD,
29720 IX86_BUILTIN_PSRLW,
29721 IX86_BUILTIN_PSRLD,
29722 IX86_BUILTIN_PSRLQ,
29723 IX86_BUILTIN_PSLLWI,
29724 IX86_BUILTIN_PSLLDI,
29725 IX86_BUILTIN_PSLLQI,
29726 IX86_BUILTIN_PSRAWI,
29727 IX86_BUILTIN_PSRADI,
29728 IX86_BUILTIN_PSRLWI,
29729 IX86_BUILTIN_PSRLDI,
29730 IX86_BUILTIN_PSRLQI,
29731
29732 IX86_BUILTIN_PUNPCKHBW,
29733 IX86_BUILTIN_PUNPCKHWD,
29734 IX86_BUILTIN_PUNPCKHDQ,
29735 IX86_BUILTIN_PUNPCKLBW,
29736 IX86_BUILTIN_PUNPCKLWD,
29737 IX86_BUILTIN_PUNPCKLDQ,
29738
29739 IX86_BUILTIN_SHUFPS,
29740
29741 IX86_BUILTIN_RCPPS,
29742 IX86_BUILTIN_RCPSS,
29743 IX86_BUILTIN_RSQRTPS,
29744 IX86_BUILTIN_RSQRTPS_NR,
29745 IX86_BUILTIN_RSQRTSS,
29746 IX86_BUILTIN_RSQRTF,
29747 IX86_BUILTIN_SQRTPS,
29748 IX86_BUILTIN_SQRTPS_NR,
29749 IX86_BUILTIN_SQRTSS,
29750
29751 IX86_BUILTIN_UNPCKHPS,
29752 IX86_BUILTIN_UNPCKLPS,
29753
29754 IX86_BUILTIN_ANDPS,
29755 IX86_BUILTIN_ANDNPS,
29756 IX86_BUILTIN_ORPS,
29757 IX86_BUILTIN_XORPS,
29758
29759 IX86_BUILTIN_EMMS,
29760 IX86_BUILTIN_LDMXCSR,
29761 IX86_BUILTIN_STMXCSR,
29762 IX86_BUILTIN_SFENCE,
29763
29764 IX86_BUILTIN_FXSAVE,
29765 IX86_BUILTIN_FXRSTOR,
29766 IX86_BUILTIN_FXSAVE64,
29767 IX86_BUILTIN_FXRSTOR64,
29768
29769 IX86_BUILTIN_XSAVE,
29770 IX86_BUILTIN_XRSTOR,
29771 IX86_BUILTIN_XSAVE64,
29772 IX86_BUILTIN_XRSTOR64,
29773
29774 IX86_BUILTIN_XSAVEOPT,
29775 IX86_BUILTIN_XSAVEOPT64,
29776
29777 IX86_BUILTIN_XSAVEC,
29778 IX86_BUILTIN_XSAVEC64,
29779
29780 IX86_BUILTIN_XSAVES,
29781 IX86_BUILTIN_XRSTORS,
29782 IX86_BUILTIN_XSAVES64,
29783 IX86_BUILTIN_XRSTORS64,
29784
29785 /* 3DNow! Original */
29786 IX86_BUILTIN_FEMMS,
29787 IX86_BUILTIN_PAVGUSB,
29788 IX86_BUILTIN_PF2ID,
29789 IX86_BUILTIN_PFACC,
29790 IX86_BUILTIN_PFADD,
29791 IX86_BUILTIN_PFCMPEQ,
29792 IX86_BUILTIN_PFCMPGE,
29793 IX86_BUILTIN_PFCMPGT,
29794 IX86_BUILTIN_PFMAX,
29795 IX86_BUILTIN_PFMIN,
29796 IX86_BUILTIN_PFMUL,
29797 IX86_BUILTIN_PFRCP,
29798 IX86_BUILTIN_PFRCPIT1,
29799 IX86_BUILTIN_PFRCPIT2,
29800 IX86_BUILTIN_PFRSQIT1,
29801 IX86_BUILTIN_PFRSQRT,
29802 IX86_BUILTIN_PFSUB,
29803 IX86_BUILTIN_PFSUBR,
29804 IX86_BUILTIN_PI2FD,
29805 IX86_BUILTIN_PMULHRW,
29806
29807 /* 3DNow! Athlon Extensions */
29808 IX86_BUILTIN_PF2IW,
29809 IX86_BUILTIN_PFNACC,
29810 IX86_BUILTIN_PFPNACC,
29811 IX86_BUILTIN_PI2FW,
29812 IX86_BUILTIN_PSWAPDSI,
29813 IX86_BUILTIN_PSWAPDSF,
29814
29815 /* SSE2 */
29816 IX86_BUILTIN_ADDPD,
29817 IX86_BUILTIN_ADDSD,
29818 IX86_BUILTIN_DIVPD,
29819 IX86_BUILTIN_DIVSD,
29820 IX86_BUILTIN_MULPD,
29821 IX86_BUILTIN_MULSD,
29822 IX86_BUILTIN_SUBPD,
29823 IX86_BUILTIN_SUBSD,
29824
29825 IX86_BUILTIN_CMPEQPD,
29826 IX86_BUILTIN_CMPLTPD,
29827 IX86_BUILTIN_CMPLEPD,
29828 IX86_BUILTIN_CMPGTPD,
29829 IX86_BUILTIN_CMPGEPD,
29830 IX86_BUILTIN_CMPNEQPD,
29831 IX86_BUILTIN_CMPNLTPD,
29832 IX86_BUILTIN_CMPNLEPD,
29833 IX86_BUILTIN_CMPNGTPD,
29834 IX86_BUILTIN_CMPNGEPD,
29835 IX86_BUILTIN_CMPORDPD,
29836 IX86_BUILTIN_CMPUNORDPD,
29837 IX86_BUILTIN_CMPEQSD,
29838 IX86_BUILTIN_CMPLTSD,
29839 IX86_BUILTIN_CMPLESD,
29840 IX86_BUILTIN_CMPNEQSD,
29841 IX86_BUILTIN_CMPNLTSD,
29842 IX86_BUILTIN_CMPNLESD,
29843 IX86_BUILTIN_CMPORDSD,
29844 IX86_BUILTIN_CMPUNORDSD,
29845
29846 IX86_BUILTIN_COMIEQSD,
29847 IX86_BUILTIN_COMILTSD,
29848 IX86_BUILTIN_COMILESD,
29849 IX86_BUILTIN_COMIGTSD,
29850 IX86_BUILTIN_COMIGESD,
29851 IX86_BUILTIN_COMINEQSD,
29852 IX86_BUILTIN_UCOMIEQSD,
29853 IX86_BUILTIN_UCOMILTSD,
29854 IX86_BUILTIN_UCOMILESD,
29855 IX86_BUILTIN_UCOMIGTSD,
29856 IX86_BUILTIN_UCOMIGESD,
29857 IX86_BUILTIN_UCOMINEQSD,
29858
29859 IX86_BUILTIN_MAXPD,
29860 IX86_BUILTIN_MAXSD,
29861 IX86_BUILTIN_MINPD,
29862 IX86_BUILTIN_MINSD,
29863
29864 IX86_BUILTIN_ANDPD,
29865 IX86_BUILTIN_ANDNPD,
29866 IX86_BUILTIN_ORPD,
29867 IX86_BUILTIN_XORPD,
29868
29869 IX86_BUILTIN_SQRTPD,
29870 IX86_BUILTIN_SQRTSD,
29871
29872 IX86_BUILTIN_UNPCKHPD,
29873 IX86_BUILTIN_UNPCKLPD,
29874
29875 IX86_BUILTIN_SHUFPD,
29876
29877 IX86_BUILTIN_LOADUPD,
29878 IX86_BUILTIN_STOREUPD,
29879 IX86_BUILTIN_MOVSD,
29880
29881 IX86_BUILTIN_LOADHPD,
29882 IX86_BUILTIN_LOADLPD,
29883
29884 IX86_BUILTIN_CVTDQ2PD,
29885 IX86_BUILTIN_CVTDQ2PS,
29886
29887 IX86_BUILTIN_CVTPD2DQ,
29888 IX86_BUILTIN_CVTPD2PI,
29889 IX86_BUILTIN_CVTPD2PS,
29890 IX86_BUILTIN_CVTTPD2DQ,
29891 IX86_BUILTIN_CVTTPD2PI,
29892
29893 IX86_BUILTIN_CVTPI2PD,
29894 IX86_BUILTIN_CVTSI2SD,
29895 IX86_BUILTIN_CVTSI642SD,
29896
29897 IX86_BUILTIN_CVTSD2SI,
29898 IX86_BUILTIN_CVTSD2SI64,
29899 IX86_BUILTIN_CVTSD2SS,
29900 IX86_BUILTIN_CVTSS2SD,
29901 IX86_BUILTIN_CVTTSD2SI,
29902 IX86_BUILTIN_CVTTSD2SI64,
29903
29904 IX86_BUILTIN_CVTPS2DQ,
29905 IX86_BUILTIN_CVTPS2PD,
29906 IX86_BUILTIN_CVTTPS2DQ,
29907
29908 IX86_BUILTIN_MOVNTI,
29909 IX86_BUILTIN_MOVNTI64,
29910 IX86_BUILTIN_MOVNTPD,
29911 IX86_BUILTIN_MOVNTDQ,
29912
29913 IX86_BUILTIN_MOVQ128,
29914
29915 /* SSE2 MMX */
29916 IX86_BUILTIN_MASKMOVDQU,
29917 IX86_BUILTIN_MOVMSKPD,
29918 IX86_BUILTIN_PMOVMSKB128,
29919
29920 IX86_BUILTIN_PACKSSWB128,
29921 IX86_BUILTIN_PACKSSDW128,
29922 IX86_BUILTIN_PACKUSWB128,
29923
29924 IX86_BUILTIN_PADDB128,
29925 IX86_BUILTIN_PADDW128,
29926 IX86_BUILTIN_PADDD128,
29927 IX86_BUILTIN_PADDQ128,
29928 IX86_BUILTIN_PADDSB128,
29929 IX86_BUILTIN_PADDSW128,
29930 IX86_BUILTIN_PADDUSB128,
29931 IX86_BUILTIN_PADDUSW128,
29932 IX86_BUILTIN_PSUBB128,
29933 IX86_BUILTIN_PSUBW128,
29934 IX86_BUILTIN_PSUBD128,
29935 IX86_BUILTIN_PSUBQ128,
29936 IX86_BUILTIN_PSUBSB128,
29937 IX86_BUILTIN_PSUBSW128,
29938 IX86_BUILTIN_PSUBUSB128,
29939 IX86_BUILTIN_PSUBUSW128,
29940
29941 IX86_BUILTIN_PAND128,
29942 IX86_BUILTIN_PANDN128,
29943 IX86_BUILTIN_POR128,
29944 IX86_BUILTIN_PXOR128,
29945
29946 IX86_BUILTIN_PAVGB128,
29947 IX86_BUILTIN_PAVGW128,
29948
29949 IX86_BUILTIN_PCMPEQB128,
29950 IX86_BUILTIN_PCMPEQW128,
29951 IX86_BUILTIN_PCMPEQD128,
29952 IX86_BUILTIN_PCMPGTB128,
29953 IX86_BUILTIN_PCMPGTW128,
29954 IX86_BUILTIN_PCMPGTD128,
29955
29956 IX86_BUILTIN_PMADDWD128,
29957
29958 IX86_BUILTIN_PMAXSW128,
29959 IX86_BUILTIN_PMAXUB128,
29960 IX86_BUILTIN_PMINSW128,
29961 IX86_BUILTIN_PMINUB128,
29962
29963 IX86_BUILTIN_PMULUDQ,
29964 IX86_BUILTIN_PMULUDQ128,
29965 IX86_BUILTIN_PMULHUW128,
29966 IX86_BUILTIN_PMULHW128,
29967 IX86_BUILTIN_PMULLW128,
29968
29969 IX86_BUILTIN_PSADBW128,
29970 IX86_BUILTIN_PSHUFHW,
29971 IX86_BUILTIN_PSHUFLW,
29972 IX86_BUILTIN_PSHUFD,
29973
29974 IX86_BUILTIN_PSLLDQI128,
29975 IX86_BUILTIN_PSLLWI128,
29976 IX86_BUILTIN_PSLLDI128,
29977 IX86_BUILTIN_PSLLQI128,
29978 IX86_BUILTIN_PSRAWI128,
29979 IX86_BUILTIN_PSRADI128,
29980 IX86_BUILTIN_PSRLDQI128,
29981 IX86_BUILTIN_PSRLWI128,
29982 IX86_BUILTIN_PSRLDI128,
29983 IX86_BUILTIN_PSRLQI128,
29984
29985 IX86_BUILTIN_PSLLDQ128,
29986 IX86_BUILTIN_PSLLW128,
29987 IX86_BUILTIN_PSLLD128,
29988 IX86_BUILTIN_PSLLQ128,
29989 IX86_BUILTIN_PSRAW128,
29990 IX86_BUILTIN_PSRAD128,
29991 IX86_BUILTIN_PSRLW128,
29992 IX86_BUILTIN_PSRLD128,
29993 IX86_BUILTIN_PSRLQ128,
29994
29995 IX86_BUILTIN_PUNPCKHBW128,
29996 IX86_BUILTIN_PUNPCKHWD128,
29997 IX86_BUILTIN_PUNPCKHDQ128,
29998 IX86_BUILTIN_PUNPCKHQDQ128,
29999 IX86_BUILTIN_PUNPCKLBW128,
30000 IX86_BUILTIN_PUNPCKLWD128,
30001 IX86_BUILTIN_PUNPCKLDQ128,
30002 IX86_BUILTIN_PUNPCKLQDQ128,
30003
30004 IX86_BUILTIN_CLFLUSH,
30005 IX86_BUILTIN_MFENCE,
30006 IX86_BUILTIN_LFENCE,
30007 IX86_BUILTIN_PAUSE,
30008
30009 IX86_BUILTIN_FNSTENV,
30010 IX86_BUILTIN_FLDENV,
30011 IX86_BUILTIN_FNSTSW,
30012 IX86_BUILTIN_FNCLEX,
30013
30014 IX86_BUILTIN_BSRSI,
30015 IX86_BUILTIN_BSRDI,
30016 IX86_BUILTIN_RDPMC,
30017 IX86_BUILTIN_RDTSC,
30018 IX86_BUILTIN_RDTSCP,
30019 IX86_BUILTIN_ROLQI,
30020 IX86_BUILTIN_ROLHI,
30021 IX86_BUILTIN_RORQI,
30022 IX86_BUILTIN_RORHI,
30023
30024 /* SSE3. */
30025 IX86_BUILTIN_ADDSUBPS,
30026 IX86_BUILTIN_HADDPS,
30027 IX86_BUILTIN_HSUBPS,
30028 IX86_BUILTIN_MOVSHDUP,
30029 IX86_BUILTIN_MOVSLDUP,
30030 IX86_BUILTIN_ADDSUBPD,
30031 IX86_BUILTIN_HADDPD,
30032 IX86_BUILTIN_HSUBPD,
30033 IX86_BUILTIN_LDDQU,
30034
30035 IX86_BUILTIN_MONITOR,
30036 IX86_BUILTIN_MWAIT,
30037 IX86_BUILTIN_CLZERO,
30038
30039 /* SSSE3. */
30040 IX86_BUILTIN_PHADDW,
30041 IX86_BUILTIN_PHADDD,
30042 IX86_BUILTIN_PHADDSW,
30043 IX86_BUILTIN_PHSUBW,
30044 IX86_BUILTIN_PHSUBD,
30045 IX86_BUILTIN_PHSUBSW,
30046 IX86_BUILTIN_PMADDUBSW,
30047 IX86_BUILTIN_PMULHRSW,
30048 IX86_BUILTIN_PSHUFB,
30049 IX86_BUILTIN_PSIGNB,
30050 IX86_BUILTIN_PSIGNW,
30051 IX86_BUILTIN_PSIGND,
30052 IX86_BUILTIN_PALIGNR,
30053 IX86_BUILTIN_PABSB,
30054 IX86_BUILTIN_PABSW,
30055 IX86_BUILTIN_PABSD,
30056
30057 IX86_BUILTIN_PHADDW128,
30058 IX86_BUILTIN_PHADDD128,
30059 IX86_BUILTIN_PHADDSW128,
30060 IX86_BUILTIN_PHSUBW128,
30061 IX86_BUILTIN_PHSUBD128,
30062 IX86_BUILTIN_PHSUBSW128,
30063 IX86_BUILTIN_PMADDUBSW128,
30064 IX86_BUILTIN_PMULHRSW128,
30065 IX86_BUILTIN_PSHUFB128,
30066 IX86_BUILTIN_PSIGNB128,
30067 IX86_BUILTIN_PSIGNW128,
30068 IX86_BUILTIN_PSIGND128,
30069 IX86_BUILTIN_PALIGNR128,
30070 IX86_BUILTIN_PABSB128,
30071 IX86_BUILTIN_PABSW128,
30072 IX86_BUILTIN_PABSD128,
30073
30074 /* AMDFAM10 - SSE4A New Instructions. */
30075 IX86_BUILTIN_MOVNTSD,
30076 IX86_BUILTIN_MOVNTSS,
30077 IX86_BUILTIN_EXTRQI,
30078 IX86_BUILTIN_EXTRQ,
30079 IX86_BUILTIN_INSERTQI,
30080 IX86_BUILTIN_INSERTQ,
30081
30082 /* SSE4.1. */
30083 IX86_BUILTIN_BLENDPD,
30084 IX86_BUILTIN_BLENDPS,
30085 IX86_BUILTIN_BLENDVPD,
30086 IX86_BUILTIN_BLENDVPS,
30087 IX86_BUILTIN_PBLENDVB128,
30088 IX86_BUILTIN_PBLENDW128,
30089
30090 IX86_BUILTIN_DPPD,
30091 IX86_BUILTIN_DPPS,
30092
30093 IX86_BUILTIN_INSERTPS128,
30094
30095 IX86_BUILTIN_MOVNTDQA,
30096 IX86_BUILTIN_MPSADBW128,
30097 IX86_BUILTIN_PACKUSDW128,
30098 IX86_BUILTIN_PCMPEQQ,
30099 IX86_BUILTIN_PHMINPOSUW128,
30100
30101 IX86_BUILTIN_PMAXSB128,
30102 IX86_BUILTIN_PMAXSD128,
30103 IX86_BUILTIN_PMAXUD128,
30104 IX86_BUILTIN_PMAXUW128,
30105
30106 IX86_BUILTIN_PMINSB128,
30107 IX86_BUILTIN_PMINSD128,
30108 IX86_BUILTIN_PMINUD128,
30109 IX86_BUILTIN_PMINUW128,
30110
30111 IX86_BUILTIN_PMOVSXBW128,
30112 IX86_BUILTIN_PMOVSXBD128,
30113 IX86_BUILTIN_PMOVSXBQ128,
30114 IX86_BUILTIN_PMOVSXWD128,
30115 IX86_BUILTIN_PMOVSXWQ128,
30116 IX86_BUILTIN_PMOVSXDQ128,
30117
30118 IX86_BUILTIN_PMOVZXBW128,
30119 IX86_BUILTIN_PMOVZXBD128,
30120 IX86_BUILTIN_PMOVZXBQ128,
30121 IX86_BUILTIN_PMOVZXWD128,
30122 IX86_BUILTIN_PMOVZXWQ128,
30123 IX86_BUILTIN_PMOVZXDQ128,
30124
30125 IX86_BUILTIN_PMULDQ128,
30126 IX86_BUILTIN_PMULLD128,
30127
30128 IX86_BUILTIN_ROUNDSD,
30129 IX86_BUILTIN_ROUNDSS,
30130
30131 IX86_BUILTIN_ROUNDPD,
30132 IX86_BUILTIN_ROUNDPS,
30133
30134 IX86_BUILTIN_FLOORPD,
30135 IX86_BUILTIN_CEILPD,
30136 IX86_BUILTIN_TRUNCPD,
30137 IX86_BUILTIN_RINTPD,
30138 IX86_BUILTIN_ROUNDPD_AZ,
30139
30140 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
30141 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
30142 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
30143
30144 IX86_BUILTIN_FLOORPS,
30145 IX86_BUILTIN_CEILPS,
30146 IX86_BUILTIN_TRUNCPS,
30147 IX86_BUILTIN_RINTPS,
30148 IX86_BUILTIN_ROUNDPS_AZ,
30149
30150 IX86_BUILTIN_FLOORPS_SFIX,
30151 IX86_BUILTIN_CEILPS_SFIX,
30152 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
30153
30154 IX86_BUILTIN_PTESTZ,
30155 IX86_BUILTIN_PTESTC,
30156 IX86_BUILTIN_PTESTNZC,
30157
30158 IX86_BUILTIN_VEC_INIT_V2SI,
30159 IX86_BUILTIN_VEC_INIT_V4HI,
30160 IX86_BUILTIN_VEC_INIT_V8QI,
30161 IX86_BUILTIN_VEC_EXT_V2DF,
30162 IX86_BUILTIN_VEC_EXT_V2DI,
30163 IX86_BUILTIN_VEC_EXT_V4SF,
30164 IX86_BUILTIN_VEC_EXT_V4SI,
30165 IX86_BUILTIN_VEC_EXT_V8HI,
30166 IX86_BUILTIN_VEC_EXT_V2SI,
30167 IX86_BUILTIN_VEC_EXT_V4HI,
30168 IX86_BUILTIN_VEC_EXT_V16QI,
30169 IX86_BUILTIN_VEC_SET_V2DI,
30170 IX86_BUILTIN_VEC_SET_V4SF,
30171 IX86_BUILTIN_VEC_SET_V4SI,
30172 IX86_BUILTIN_VEC_SET_V8HI,
30173 IX86_BUILTIN_VEC_SET_V4HI,
30174 IX86_BUILTIN_VEC_SET_V16QI,
30175
30176 IX86_BUILTIN_VEC_PACK_SFIX,
30177 IX86_BUILTIN_VEC_PACK_SFIX256,
30178
30179 /* SSE4.2. */
30180 IX86_BUILTIN_CRC32QI,
30181 IX86_BUILTIN_CRC32HI,
30182 IX86_BUILTIN_CRC32SI,
30183 IX86_BUILTIN_CRC32DI,
30184
30185 IX86_BUILTIN_PCMPESTRI128,
30186 IX86_BUILTIN_PCMPESTRM128,
30187 IX86_BUILTIN_PCMPESTRA128,
30188 IX86_BUILTIN_PCMPESTRC128,
30189 IX86_BUILTIN_PCMPESTRO128,
30190 IX86_BUILTIN_PCMPESTRS128,
30191 IX86_BUILTIN_PCMPESTRZ128,
30192 IX86_BUILTIN_PCMPISTRI128,
30193 IX86_BUILTIN_PCMPISTRM128,
30194 IX86_BUILTIN_PCMPISTRA128,
30195 IX86_BUILTIN_PCMPISTRC128,
30196 IX86_BUILTIN_PCMPISTRO128,
30197 IX86_BUILTIN_PCMPISTRS128,
30198 IX86_BUILTIN_PCMPISTRZ128,
30199
30200 IX86_BUILTIN_PCMPGTQ,
30201
30202 /* AES instructions */
30203 IX86_BUILTIN_AESENC128,
30204 IX86_BUILTIN_AESENCLAST128,
30205 IX86_BUILTIN_AESDEC128,
30206 IX86_BUILTIN_AESDECLAST128,
30207 IX86_BUILTIN_AESIMC128,
30208 IX86_BUILTIN_AESKEYGENASSIST128,
30209
30210 /* PCLMUL instruction */
30211 IX86_BUILTIN_PCLMULQDQ128,
30212
30213 /* AVX */
30214 IX86_BUILTIN_ADDPD256,
30215 IX86_BUILTIN_ADDPS256,
30216 IX86_BUILTIN_ADDSUBPD256,
30217 IX86_BUILTIN_ADDSUBPS256,
30218 IX86_BUILTIN_ANDPD256,
30219 IX86_BUILTIN_ANDPS256,
30220 IX86_BUILTIN_ANDNPD256,
30221 IX86_BUILTIN_ANDNPS256,
30222 IX86_BUILTIN_BLENDPD256,
30223 IX86_BUILTIN_BLENDPS256,
30224 IX86_BUILTIN_BLENDVPD256,
30225 IX86_BUILTIN_BLENDVPS256,
30226 IX86_BUILTIN_DIVPD256,
30227 IX86_BUILTIN_DIVPS256,
30228 IX86_BUILTIN_DPPS256,
30229 IX86_BUILTIN_HADDPD256,
30230 IX86_BUILTIN_HADDPS256,
30231 IX86_BUILTIN_HSUBPD256,
30232 IX86_BUILTIN_HSUBPS256,
30233 IX86_BUILTIN_MAXPD256,
30234 IX86_BUILTIN_MAXPS256,
30235 IX86_BUILTIN_MINPD256,
30236 IX86_BUILTIN_MINPS256,
30237 IX86_BUILTIN_MULPD256,
30238 IX86_BUILTIN_MULPS256,
30239 IX86_BUILTIN_ORPD256,
30240 IX86_BUILTIN_ORPS256,
30241 IX86_BUILTIN_SHUFPD256,
30242 IX86_BUILTIN_SHUFPS256,
30243 IX86_BUILTIN_SUBPD256,
30244 IX86_BUILTIN_SUBPS256,
30245 IX86_BUILTIN_XORPD256,
30246 IX86_BUILTIN_XORPS256,
30247 IX86_BUILTIN_CMPSD,
30248 IX86_BUILTIN_CMPSS,
30249 IX86_BUILTIN_CMPPD,
30250 IX86_BUILTIN_CMPPS,
30251 IX86_BUILTIN_CMPPD256,
30252 IX86_BUILTIN_CMPPS256,
30253 IX86_BUILTIN_CVTDQ2PD256,
30254 IX86_BUILTIN_CVTDQ2PS256,
30255 IX86_BUILTIN_CVTPD2PS256,
30256 IX86_BUILTIN_CVTPS2DQ256,
30257 IX86_BUILTIN_CVTPS2PD256,
30258 IX86_BUILTIN_CVTTPD2DQ256,
30259 IX86_BUILTIN_CVTPD2DQ256,
30260 IX86_BUILTIN_CVTTPS2DQ256,
30261 IX86_BUILTIN_EXTRACTF128PD256,
30262 IX86_BUILTIN_EXTRACTF128PS256,
30263 IX86_BUILTIN_EXTRACTF128SI256,
30264 IX86_BUILTIN_VZEROALL,
30265 IX86_BUILTIN_VZEROUPPER,
30266 IX86_BUILTIN_VPERMILVARPD,
30267 IX86_BUILTIN_VPERMILVARPS,
30268 IX86_BUILTIN_VPERMILVARPD256,
30269 IX86_BUILTIN_VPERMILVARPS256,
30270 IX86_BUILTIN_VPERMILPD,
30271 IX86_BUILTIN_VPERMILPS,
30272 IX86_BUILTIN_VPERMILPD256,
30273 IX86_BUILTIN_VPERMILPS256,
30274 IX86_BUILTIN_VPERMIL2PD,
30275 IX86_BUILTIN_VPERMIL2PS,
30276 IX86_BUILTIN_VPERMIL2PD256,
30277 IX86_BUILTIN_VPERMIL2PS256,
30278 IX86_BUILTIN_VPERM2F128PD256,
30279 IX86_BUILTIN_VPERM2F128PS256,
30280 IX86_BUILTIN_VPERM2F128SI256,
30281 IX86_BUILTIN_VBROADCASTSS,
30282 IX86_BUILTIN_VBROADCASTSD256,
30283 IX86_BUILTIN_VBROADCASTSS256,
30284 IX86_BUILTIN_VBROADCASTPD256,
30285 IX86_BUILTIN_VBROADCASTPS256,
30286 IX86_BUILTIN_VINSERTF128PD256,
30287 IX86_BUILTIN_VINSERTF128PS256,
30288 IX86_BUILTIN_VINSERTF128SI256,
30289 IX86_BUILTIN_LOADUPD256,
30290 IX86_BUILTIN_LOADUPS256,
30291 IX86_BUILTIN_STOREUPD256,
30292 IX86_BUILTIN_STOREUPS256,
30293 IX86_BUILTIN_LDDQU256,
30294 IX86_BUILTIN_MOVNTDQ256,
30295 IX86_BUILTIN_MOVNTPD256,
30296 IX86_BUILTIN_MOVNTPS256,
30297 IX86_BUILTIN_LOADDQU256,
30298 IX86_BUILTIN_STOREDQU256,
30299 IX86_BUILTIN_MASKLOADPD,
30300 IX86_BUILTIN_MASKLOADPS,
30301 IX86_BUILTIN_MASKSTOREPD,
30302 IX86_BUILTIN_MASKSTOREPS,
30303 IX86_BUILTIN_MASKLOADPD256,
30304 IX86_BUILTIN_MASKLOADPS256,
30305 IX86_BUILTIN_MASKSTOREPD256,
30306 IX86_BUILTIN_MASKSTOREPS256,
30307 IX86_BUILTIN_MOVSHDUP256,
30308 IX86_BUILTIN_MOVSLDUP256,
30309 IX86_BUILTIN_MOVDDUP256,
30310
30311 IX86_BUILTIN_SQRTPD256,
30312 IX86_BUILTIN_SQRTPS256,
30313 IX86_BUILTIN_SQRTPS_NR256,
30314 IX86_BUILTIN_RSQRTPS256,
30315 IX86_BUILTIN_RSQRTPS_NR256,
30316
30317 IX86_BUILTIN_RCPPS256,
30318
30319 IX86_BUILTIN_ROUNDPD256,
30320 IX86_BUILTIN_ROUNDPS256,
30321
30322 IX86_BUILTIN_FLOORPD256,
30323 IX86_BUILTIN_CEILPD256,
30324 IX86_BUILTIN_TRUNCPD256,
30325 IX86_BUILTIN_RINTPD256,
30326 IX86_BUILTIN_ROUNDPD_AZ256,
30327
30328 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
30329 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
30330 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
30331
30332 IX86_BUILTIN_FLOORPS256,
30333 IX86_BUILTIN_CEILPS256,
30334 IX86_BUILTIN_TRUNCPS256,
30335 IX86_BUILTIN_RINTPS256,
30336 IX86_BUILTIN_ROUNDPS_AZ256,
30337
30338 IX86_BUILTIN_FLOORPS_SFIX256,
30339 IX86_BUILTIN_CEILPS_SFIX256,
30340 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
30341
30342 IX86_BUILTIN_UNPCKHPD256,
30343 IX86_BUILTIN_UNPCKLPD256,
30344 IX86_BUILTIN_UNPCKHPS256,
30345 IX86_BUILTIN_UNPCKLPS256,
30346
30347 IX86_BUILTIN_SI256_SI,
30348 IX86_BUILTIN_PS256_PS,
30349 IX86_BUILTIN_PD256_PD,
30350 IX86_BUILTIN_SI_SI256,
30351 IX86_BUILTIN_PS_PS256,
30352 IX86_BUILTIN_PD_PD256,
30353
30354 IX86_BUILTIN_VTESTZPD,
30355 IX86_BUILTIN_VTESTCPD,
30356 IX86_BUILTIN_VTESTNZCPD,
30357 IX86_BUILTIN_VTESTZPS,
30358 IX86_BUILTIN_VTESTCPS,
30359 IX86_BUILTIN_VTESTNZCPS,
30360 IX86_BUILTIN_VTESTZPD256,
30361 IX86_BUILTIN_VTESTCPD256,
30362 IX86_BUILTIN_VTESTNZCPD256,
30363 IX86_BUILTIN_VTESTZPS256,
30364 IX86_BUILTIN_VTESTCPS256,
30365 IX86_BUILTIN_VTESTNZCPS256,
30366 IX86_BUILTIN_PTESTZ256,
30367 IX86_BUILTIN_PTESTC256,
30368 IX86_BUILTIN_PTESTNZC256,
30369
30370 IX86_BUILTIN_MOVMSKPD256,
30371 IX86_BUILTIN_MOVMSKPS256,
30372
30373 /* AVX2 */
30374 IX86_BUILTIN_MPSADBW256,
30375 IX86_BUILTIN_PABSB256,
30376 IX86_BUILTIN_PABSW256,
30377 IX86_BUILTIN_PABSD256,
30378 IX86_BUILTIN_PACKSSDW256,
30379 IX86_BUILTIN_PACKSSWB256,
30380 IX86_BUILTIN_PACKUSDW256,
30381 IX86_BUILTIN_PACKUSWB256,
30382 IX86_BUILTIN_PADDB256,
30383 IX86_BUILTIN_PADDW256,
30384 IX86_BUILTIN_PADDD256,
30385 IX86_BUILTIN_PADDQ256,
30386 IX86_BUILTIN_PADDSB256,
30387 IX86_BUILTIN_PADDSW256,
30388 IX86_BUILTIN_PADDUSB256,
30389 IX86_BUILTIN_PADDUSW256,
30390 IX86_BUILTIN_PALIGNR256,
30391 IX86_BUILTIN_AND256I,
30392 IX86_BUILTIN_ANDNOT256I,
30393 IX86_BUILTIN_PAVGB256,
30394 IX86_BUILTIN_PAVGW256,
30395 IX86_BUILTIN_PBLENDVB256,
30396 IX86_BUILTIN_PBLENDVW256,
30397 IX86_BUILTIN_PCMPEQB256,
30398 IX86_BUILTIN_PCMPEQW256,
30399 IX86_BUILTIN_PCMPEQD256,
30400 IX86_BUILTIN_PCMPEQQ256,
30401 IX86_BUILTIN_PCMPGTB256,
30402 IX86_BUILTIN_PCMPGTW256,
30403 IX86_BUILTIN_PCMPGTD256,
30404 IX86_BUILTIN_PCMPGTQ256,
30405 IX86_BUILTIN_PHADDW256,
30406 IX86_BUILTIN_PHADDD256,
30407 IX86_BUILTIN_PHADDSW256,
30408 IX86_BUILTIN_PHSUBW256,
30409 IX86_BUILTIN_PHSUBD256,
30410 IX86_BUILTIN_PHSUBSW256,
30411 IX86_BUILTIN_PMADDUBSW256,
30412 IX86_BUILTIN_PMADDWD256,
30413 IX86_BUILTIN_PMAXSB256,
30414 IX86_BUILTIN_PMAXSW256,
30415 IX86_BUILTIN_PMAXSD256,
30416 IX86_BUILTIN_PMAXUB256,
30417 IX86_BUILTIN_PMAXUW256,
30418 IX86_BUILTIN_PMAXUD256,
30419 IX86_BUILTIN_PMINSB256,
30420 IX86_BUILTIN_PMINSW256,
30421 IX86_BUILTIN_PMINSD256,
30422 IX86_BUILTIN_PMINUB256,
30423 IX86_BUILTIN_PMINUW256,
30424 IX86_BUILTIN_PMINUD256,
30425 IX86_BUILTIN_PMOVMSKB256,
30426 IX86_BUILTIN_PMOVSXBW256,
30427 IX86_BUILTIN_PMOVSXBD256,
30428 IX86_BUILTIN_PMOVSXBQ256,
30429 IX86_BUILTIN_PMOVSXWD256,
30430 IX86_BUILTIN_PMOVSXWQ256,
30431 IX86_BUILTIN_PMOVSXDQ256,
30432 IX86_BUILTIN_PMOVZXBW256,
30433 IX86_BUILTIN_PMOVZXBD256,
30434 IX86_BUILTIN_PMOVZXBQ256,
30435 IX86_BUILTIN_PMOVZXWD256,
30436 IX86_BUILTIN_PMOVZXWQ256,
30437 IX86_BUILTIN_PMOVZXDQ256,
30438 IX86_BUILTIN_PMULDQ256,
30439 IX86_BUILTIN_PMULHRSW256,
30440 IX86_BUILTIN_PMULHUW256,
30441 IX86_BUILTIN_PMULHW256,
30442 IX86_BUILTIN_PMULLW256,
30443 IX86_BUILTIN_PMULLD256,
30444 IX86_BUILTIN_PMULUDQ256,
30445 IX86_BUILTIN_POR256,
30446 IX86_BUILTIN_PSADBW256,
30447 IX86_BUILTIN_PSHUFB256,
30448 IX86_BUILTIN_PSHUFD256,
30449 IX86_BUILTIN_PSHUFHW256,
30450 IX86_BUILTIN_PSHUFLW256,
30451 IX86_BUILTIN_PSIGNB256,
30452 IX86_BUILTIN_PSIGNW256,
30453 IX86_BUILTIN_PSIGND256,
30454 IX86_BUILTIN_PSLLDQI256,
30455 IX86_BUILTIN_PSLLWI256,
30456 IX86_BUILTIN_PSLLW256,
30457 IX86_BUILTIN_PSLLDI256,
30458 IX86_BUILTIN_PSLLD256,
30459 IX86_BUILTIN_PSLLQI256,
30460 IX86_BUILTIN_PSLLQ256,
30461 IX86_BUILTIN_PSRAWI256,
30462 IX86_BUILTIN_PSRAW256,
30463 IX86_BUILTIN_PSRADI256,
30464 IX86_BUILTIN_PSRAD256,
30465 IX86_BUILTIN_PSRLDQI256,
30466 IX86_BUILTIN_PSRLWI256,
30467 IX86_BUILTIN_PSRLW256,
30468 IX86_BUILTIN_PSRLDI256,
30469 IX86_BUILTIN_PSRLD256,
30470 IX86_BUILTIN_PSRLQI256,
30471 IX86_BUILTIN_PSRLQ256,
30472 IX86_BUILTIN_PSUBB256,
30473 IX86_BUILTIN_PSUBW256,
30474 IX86_BUILTIN_PSUBD256,
30475 IX86_BUILTIN_PSUBQ256,
30476 IX86_BUILTIN_PSUBSB256,
30477 IX86_BUILTIN_PSUBSW256,
30478 IX86_BUILTIN_PSUBUSB256,
30479 IX86_BUILTIN_PSUBUSW256,
30480 IX86_BUILTIN_PUNPCKHBW256,
30481 IX86_BUILTIN_PUNPCKHWD256,
30482 IX86_BUILTIN_PUNPCKHDQ256,
30483 IX86_BUILTIN_PUNPCKHQDQ256,
30484 IX86_BUILTIN_PUNPCKLBW256,
30485 IX86_BUILTIN_PUNPCKLWD256,
30486 IX86_BUILTIN_PUNPCKLDQ256,
30487 IX86_BUILTIN_PUNPCKLQDQ256,
30488 IX86_BUILTIN_PXOR256,
30489 IX86_BUILTIN_MOVNTDQA256,
30490 IX86_BUILTIN_VBROADCASTSS_PS,
30491 IX86_BUILTIN_VBROADCASTSS_PS256,
30492 IX86_BUILTIN_VBROADCASTSD_PD256,
30493 IX86_BUILTIN_VBROADCASTSI256,
30494 IX86_BUILTIN_PBLENDD256,
30495 IX86_BUILTIN_PBLENDD128,
30496 IX86_BUILTIN_PBROADCASTB256,
30497 IX86_BUILTIN_PBROADCASTW256,
30498 IX86_BUILTIN_PBROADCASTD256,
30499 IX86_BUILTIN_PBROADCASTQ256,
30500 IX86_BUILTIN_PBROADCASTB128,
30501 IX86_BUILTIN_PBROADCASTW128,
30502 IX86_BUILTIN_PBROADCASTD128,
30503 IX86_BUILTIN_PBROADCASTQ128,
30504 IX86_BUILTIN_VPERMVARSI256,
30505 IX86_BUILTIN_VPERMDF256,
30506 IX86_BUILTIN_VPERMVARSF256,
30507 IX86_BUILTIN_VPERMDI256,
30508 IX86_BUILTIN_VPERMTI256,
30509 IX86_BUILTIN_VEXTRACT128I256,
30510 IX86_BUILTIN_VINSERT128I256,
30511 IX86_BUILTIN_MASKLOADD,
30512 IX86_BUILTIN_MASKLOADQ,
30513 IX86_BUILTIN_MASKLOADD256,
30514 IX86_BUILTIN_MASKLOADQ256,
30515 IX86_BUILTIN_MASKSTORED,
30516 IX86_BUILTIN_MASKSTOREQ,
30517 IX86_BUILTIN_MASKSTORED256,
30518 IX86_BUILTIN_MASKSTOREQ256,
30519 IX86_BUILTIN_PSLLVV4DI,
30520 IX86_BUILTIN_PSLLVV2DI,
30521 IX86_BUILTIN_PSLLVV8SI,
30522 IX86_BUILTIN_PSLLVV4SI,
30523 IX86_BUILTIN_PSRAVV8SI,
30524 IX86_BUILTIN_PSRAVV4SI,
30525 IX86_BUILTIN_PSRLVV4DI,
30526 IX86_BUILTIN_PSRLVV2DI,
30527 IX86_BUILTIN_PSRLVV8SI,
30528 IX86_BUILTIN_PSRLVV4SI,
30529
30530 IX86_BUILTIN_GATHERSIV2DF,
30531 IX86_BUILTIN_GATHERSIV4DF,
30532 IX86_BUILTIN_GATHERDIV2DF,
30533 IX86_BUILTIN_GATHERDIV4DF,
30534 IX86_BUILTIN_GATHERSIV4SF,
30535 IX86_BUILTIN_GATHERSIV8SF,
30536 IX86_BUILTIN_GATHERDIV4SF,
30537 IX86_BUILTIN_GATHERDIV8SF,
30538 IX86_BUILTIN_GATHERSIV2DI,
30539 IX86_BUILTIN_GATHERSIV4DI,
30540 IX86_BUILTIN_GATHERDIV2DI,
30541 IX86_BUILTIN_GATHERDIV4DI,
30542 IX86_BUILTIN_GATHERSIV4SI,
30543 IX86_BUILTIN_GATHERSIV8SI,
30544 IX86_BUILTIN_GATHERDIV4SI,
30545 IX86_BUILTIN_GATHERDIV8SI,
30546
30547 /* AVX512F */
30548 IX86_BUILTIN_SI512_SI256,
30549 IX86_BUILTIN_PD512_PD256,
30550 IX86_BUILTIN_PS512_PS256,
30551 IX86_BUILTIN_SI512_SI,
30552 IX86_BUILTIN_PD512_PD,
30553 IX86_BUILTIN_PS512_PS,
30554 IX86_BUILTIN_ADDPD512,
30555 IX86_BUILTIN_ADDPS512,
30556 IX86_BUILTIN_ADDSD_ROUND,
30557 IX86_BUILTIN_ADDSS_ROUND,
30558 IX86_BUILTIN_ALIGND512,
30559 IX86_BUILTIN_ALIGNQ512,
30560 IX86_BUILTIN_BLENDMD512,
30561 IX86_BUILTIN_BLENDMPD512,
30562 IX86_BUILTIN_BLENDMPS512,
30563 IX86_BUILTIN_BLENDMQ512,
30564 IX86_BUILTIN_BROADCASTF32X4_512,
30565 IX86_BUILTIN_BROADCASTF64X4_512,
30566 IX86_BUILTIN_BROADCASTI32X4_512,
30567 IX86_BUILTIN_BROADCASTI64X4_512,
30568 IX86_BUILTIN_BROADCASTSD512,
30569 IX86_BUILTIN_BROADCASTSS512,
30570 IX86_BUILTIN_CMPD512,
30571 IX86_BUILTIN_CMPPD512,
30572 IX86_BUILTIN_CMPPS512,
30573 IX86_BUILTIN_CMPQ512,
30574 IX86_BUILTIN_CMPSD_MASK,
30575 IX86_BUILTIN_CMPSS_MASK,
30576 IX86_BUILTIN_COMIDF,
30577 IX86_BUILTIN_COMISF,
30578 IX86_BUILTIN_COMPRESSPD512,
30579 IX86_BUILTIN_COMPRESSPDSTORE512,
30580 IX86_BUILTIN_COMPRESSPS512,
30581 IX86_BUILTIN_COMPRESSPSSTORE512,
30582 IX86_BUILTIN_CVTDQ2PD512,
30583 IX86_BUILTIN_CVTDQ2PS512,
30584 IX86_BUILTIN_CVTPD2DQ512,
30585 IX86_BUILTIN_CVTPD2PS512,
30586 IX86_BUILTIN_CVTPD2UDQ512,
30587 IX86_BUILTIN_CVTPH2PS512,
30588 IX86_BUILTIN_CVTPS2DQ512,
30589 IX86_BUILTIN_CVTPS2PD512,
30590 IX86_BUILTIN_CVTPS2PH512,
30591 IX86_BUILTIN_CVTPS2UDQ512,
30592 IX86_BUILTIN_CVTSD2SS_ROUND,
30593 IX86_BUILTIN_CVTSI2SD64,
30594 IX86_BUILTIN_CVTSI2SS32,
30595 IX86_BUILTIN_CVTSI2SS64,
30596 IX86_BUILTIN_CVTSS2SD_ROUND,
30597 IX86_BUILTIN_CVTTPD2DQ512,
30598 IX86_BUILTIN_CVTTPD2UDQ512,
30599 IX86_BUILTIN_CVTTPS2DQ512,
30600 IX86_BUILTIN_CVTTPS2UDQ512,
30601 IX86_BUILTIN_CVTUDQ2PD512,
30602 IX86_BUILTIN_CVTUDQ2PS512,
30603 IX86_BUILTIN_CVTUSI2SD32,
30604 IX86_BUILTIN_CVTUSI2SD64,
30605 IX86_BUILTIN_CVTUSI2SS32,
30606 IX86_BUILTIN_CVTUSI2SS64,
30607 IX86_BUILTIN_DIVPD512,
30608 IX86_BUILTIN_DIVPS512,
30609 IX86_BUILTIN_DIVSD_ROUND,
30610 IX86_BUILTIN_DIVSS_ROUND,
30611 IX86_BUILTIN_EXPANDPD512,
30612 IX86_BUILTIN_EXPANDPD512Z,
30613 IX86_BUILTIN_EXPANDPDLOAD512,
30614 IX86_BUILTIN_EXPANDPDLOAD512Z,
30615 IX86_BUILTIN_EXPANDPS512,
30616 IX86_BUILTIN_EXPANDPS512Z,
30617 IX86_BUILTIN_EXPANDPSLOAD512,
30618 IX86_BUILTIN_EXPANDPSLOAD512Z,
30619 IX86_BUILTIN_EXTRACTF32X4,
30620 IX86_BUILTIN_EXTRACTF64X4,
30621 IX86_BUILTIN_EXTRACTI32X4,
30622 IX86_BUILTIN_EXTRACTI64X4,
30623 IX86_BUILTIN_FIXUPIMMPD512_MASK,
30624 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
30625 IX86_BUILTIN_FIXUPIMMPS512_MASK,
30626 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
30627 IX86_BUILTIN_FIXUPIMMSD128_MASK,
30628 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
30629 IX86_BUILTIN_FIXUPIMMSS128_MASK,
30630 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
30631 IX86_BUILTIN_GETEXPPD512,
30632 IX86_BUILTIN_GETEXPPS512,
30633 IX86_BUILTIN_GETEXPSD128,
30634 IX86_BUILTIN_GETEXPSS128,
30635 IX86_BUILTIN_GETMANTPD512,
30636 IX86_BUILTIN_GETMANTPS512,
30637 IX86_BUILTIN_GETMANTSD128,
30638 IX86_BUILTIN_GETMANTSS128,
30639 IX86_BUILTIN_INSERTF32X4,
30640 IX86_BUILTIN_INSERTF64X4,
30641 IX86_BUILTIN_INSERTI32X4,
30642 IX86_BUILTIN_INSERTI64X4,
30643 IX86_BUILTIN_LOADAPD512,
30644 IX86_BUILTIN_LOADAPS512,
30645 IX86_BUILTIN_LOADDQUDI512,
30646 IX86_BUILTIN_LOADDQUSI512,
30647 IX86_BUILTIN_LOADUPD512,
30648 IX86_BUILTIN_LOADUPS512,
30649 IX86_BUILTIN_MAXPD512,
30650 IX86_BUILTIN_MAXPS512,
30651 IX86_BUILTIN_MAXSD_ROUND,
30652 IX86_BUILTIN_MAXSS_ROUND,
30653 IX86_BUILTIN_MINPD512,
30654 IX86_BUILTIN_MINPS512,
30655 IX86_BUILTIN_MINSD_ROUND,
30656 IX86_BUILTIN_MINSS_ROUND,
30657 IX86_BUILTIN_MOVAPD512,
30658 IX86_BUILTIN_MOVAPS512,
30659 IX86_BUILTIN_MOVDDUP512,
30660 IX86_BUILTIN_MOVDQA32LOAD512,
30661 IX86_BUILTIN_MOVDQA32STORE512,
30662 IX86_BUILTIN_MOVDQA32_512,
30663 IX86_BUILTIN_MOVDQA64LOAD512,
30664 IX86_BUILTIN_MOVDQA64STORE512,
30665 IX86_BUILTIN_MOVDQA64_512,
30666 IX86_BUILTIN_MOVNTDQ512,
30667 IX86_BUILTIN_MOVNTDQA512,
30668 IX86_BUILTIN_MOVNTPD512,
30669 IX86_BUILTIN_MOVNTPS512,
30670 IX86_BUILTIN_MOVSHDUP512,
30671 IX86_BUILTIN_MOVSLDUP512,
30672 IX86_BUILTIN_MULPD512,
30673 IX86_BUILTIN_MULPS512,
30674 IX86_BUILTIN_MULSD_ROUND,
30675 IX86_BUILTIN_MULSS_ROUND,
30676 IX86_BUILTIN_PABSD512,
30677 IX86_BUILTIN_PABSQ512,
30678 IX86_BUILTIN_PADDD512,
30679 IX86_BUILTIN_PADDQ512,
30680 IX86_BUILTIN_PANDD512,
30681 IX86_BUILTIN_PANDND512,
30682 IX86_BUILTIN_PANDNQ512,
30683 IX86_BUILTIN_PANDQ512,
30684 IX86_BUILTIN_PBROADCASTD512,
30685 IX86_BUILTIN_PBROADCASTD512_GPR,
30686 IX86_BUILTIN_PBROADCASTMB512,
30687 IX86_BUILTIN_PBROADCASTMW512,
30688 IX86_BUILTIN_PBROADCASTQ512,
30689 IX86_BUILTIN_PBROADCASTQ512_GPR,
30690 IX86_BUILTIN_PCMPEQD512_MASK,
30691 IX86_BUILTIN_PCMPEQQ512_MASK,
30692 IX86_BUILTIN_PCMPGTD512_MASK,
30693 IX86_BUILTIN_PCMPGTQ512_MASK,
30694 IX86_BUILTIN_PCOMPRESSD512,
30695 IX86_BUILTIN_PCOMPRESSDSTORE512,
30696 IX86_BUILTIN_PCOMPRESSQ512,
30697 IX86_BUILTIN_PCOMPRESSQSTORE512,
30698 IX86_BUILTIN_PEXPANDD512,
30699 IX86_BUILTIN_PEXPANDD512Z,
30700 IX86_BUILTIN_PEXPANDDLOAD512,
30701 IX86_BUILTIN_PEXPANDDLOAD512Z,
30702 IX86_BUILTIN_PEXPANDQ512,
30703 IX86_BUILTIN_PEXPANDQ512Z,
30704 IX86_BUILTIN_PEXPANDQLOAD512,
30705 IX86_BUILTIN_PEXPANDQLOAD512Z,
30706 IX86_BUILTIN_PMAXSD512,
30707 IX86_BUILTIN_PMAXSQ512,
30708 IX86_BUILTIN_PMAXUD512,
30709 IX86_BUILTIN_PMAXUQ512,
30710 IX86_BUILTIN_PMINSD512,
30711 IX86_BUILTIN_PMINSQ512,
30712 IX86_BUILTIN_PMINUD512,
30713 IX86_BUILTIN_PMINUQ512,
30714 IX86_BUILTIN_PMOVDB512,
30715 IX86_BUILTIN_PMOVDB512_MEM,
30716 IX86_BUILTIN_PMOVDW512,
30717 IX86_BUILTIN_PMOVDW512_MEM,
30718 IX86_BUILTIN_PMOVQB512,
30719 IX86_BUILTIN_PMOVQB512_MEM,
30720 IX86_BUILTIN_PMOVQD512,
30721 IX86_BUILTIN_PMOVQD512_MEM,
30722 IX86_BUILTIN_PMOVQW512,
30723 IX86_BUILTIN_PMOVQW512_MEM,
30724 IX86_BUILTIN_PMOVSDB512,
30725 IX86_BUILTIN_PMOVSDB512_MEM,
30726 IX86_BUILTIN_PMOVSDW512,
30727 IX86_BUILTIN_PMOVSDW512_MEM,
30728 IX86_BUILTIN_PMOVSQB512,
30729 IX86_BUILTIN_PMOVSQB512_MEM,
30730 IX86_BUILTIN_PMOVSQD512,
30731 IX86_BUILTIN_PMOVSQD512_MEM,
30732 IX86_BUILTIN_PMOVSQW512,
30733 IX86_BUILTIN_PMOVSQW512_MEM,
30734 IX86_BUILTIN_PMOVSXBD512,
30735 IX86_BUILTIN_PMOVSXBQ512,
30736 IX86_BUILTIN_PMOVSXDQ512,
30737 IX86_BUILTIN_PMOVSXWD512,
30738 IX86_BUILTIN_PMOVSXWQ512,
30739 IX86_BUILTIN_PMOVUSDB512,
30740 IX86_BUILTIN_PMOVUSDB512_MEM,
30741 IX86_BUILTIN_PMOVUSDW512,
30742 IX86_BUILTIN_PMOVUSDW512_MEM,
30743 IX86_BUILTIN_PMOVUSQB512,
30744 IX86_BUILTIN_PMOVUSQB512_MEM,
30745 IX86_BUILTIN_PMOVUSQD512,
30746 IX86_BUILTIN_PMOVUSQD512_MEM,
30747 IX86_BUILTIN_PMOVUSQW512,
30748 IX86_BUILTIN_PMOVUSQW512_MEM,
30749 IX86_BUILTIN_PMOVZXBD512,
30750 IX86_BUILTIN_PMOVZXBQ512,
30751 IX86_BUILTIN_PMOVZXDQ512,
30752 IX86_BUILTIN_PMOVZXWD512,
30753 IX86_BUILTIN_PMOVZXWQ512,
30754 IX86_BUILTIN_PMULDQ512,
30755 IX86_BUILTIN_PMULLD512,
30756 IX86_BUILTIN_PMULUDQ512,
30757 IX86_BUILTIN_PORD512,
30758 IX86_BUILTIN_PORQ512,
30759 IX86_BUILTIN_PROLD512,
30760 IX86_BUILTIN_PROLQ512,
30761 IX86_BUILTIN_PROLVD512,
30762 IX86_BUILTIN_PROLVQ512,
30763 IX86_BUILTIN_PRORD512,
30764 IX86_BUILTIN_PRORQ512,
30765 IX86_BUILTIN_PRORVD512,
30766 IX86_BUILTIN_PRORVQ512,
30767 IX86_BUILTIN_PSHUFD512,
30768 IX86_BUILTIN_PSLLD512,
30769 IX86_BUILTIN_PSLLDI512,
30770 IX86_BUILTIN_PSLLQ512,
30771 IX86_BUILTIN_PSLLQI512,
30772 IX86_BUILTIN_PSLLVV16SI,
30773 IX86_BUILTIN_PSLLVV8DI,
30774 IX86_BUILTIN_PSRAD512,
30775 IX86_BUILTIN_PSRADI512,
30776 IX86_BUILTIN_PSRAQ512,
30777 IX86_BUILTIN_PSRAQI512,
30778 IX86_BUILTIN_PSRAVV16SI,
30779 IX86_BUILTIN_PSRAVV8DI,
30780 IX86_BUILTIN_PSRLD512,
30781 IX86_BUILTIN_PSRLDI512,
30782 IX86_BUILTIN_PSRLQ512,
30783 IX86_BUILTIN_PSRLQI512,
30784 IX86_BUILTIN_PSRLVV16SI,
30785 IX86_BUILTIN_PSRLVV8DI,
30786 IX86_BUILTIN_PSUBD512,
30787 IX86_BUILTIN_PSUBQ512,
30788 IX86_BUILTIN_PTESTMD512,
30789 IX86_BUILTIN_PTESTMQ512,
30790 IX86_BUILTIN_PTESTNMD512,
30791 IX86_BUILTIN_PTESTNMQ512,
30792 IX86_BUILTIN_PUNPCKHDQ512,
30793 IX86_BUILTIN_PUNPCKHQDQ512,
30794 IX86_BUILTIN_PUNPCKLDQ512,
30795 IX86_BUILTIN_PUNPCKLQDQ512,
30796 IX86_BUILTIN_PXORD512,
30797 IX86_BUILTIN_PXORQ512,
30798 IX86_BUILTIN_RCP14PD512,
30799 IX86_BUILTIN_RCP14PS512,
30800 IX86_BUILTIN_RCP14SD,
30801 IX86_BUILTIN_RCP14SS,
30802 IX86_BUILTIN_RNDSCALEPD,
30803 IX86_BUILTIN_RNDSCALEPS,
30804 IX86_BUILTIN_RNDSCALESD,
30805 IX86_BUILTIN_RNDSCALESS,
30806 IX86_BUILTIN_RSQRT14PD512,
30807 IX86_BUILTIN_RSQRT14PS512,
30808 IX86_BUILTIN_RSQRT14SD,
30809 IX86_BUILTIN_RSQRT14SS,
30810 IX86_BUILTIN_SCALEFPD512,
30811 IX86_BUILTIN_SCALEFPS512,
30812 IX86_BUILTIN_SCALEFSD,
30813 IX86_BUILTIN_SCALEFSS,
30814 IX86_BUILTIN_SHUFPD512,
30815 IX86_BUILTIN_SHUFPS512,
30816 IX86_BUILTIN_SHUF_F32x4,
30817 IX86_BUILTIN_SHUF_F64x2,
30818 IX86_BUILTIN_SHUF_I32x4,
30819 IX86_BUILTIN_SHUF_I64x2,
30820 IX86_BUILTIN_SQRTPD512,
30821 IX86_BUILTIN_SQRTPD512_MASK,
30822 IX86_BUILTIN_SQRTPS512_MASK,
30823 IX86_BUILTIN_SQRTPS_NR512,
30824 IX86_BUILTIN_SQRTSD_ROUND,
30825 IX86_BUILTIN_SQRTSS_ROUND,
30826 IX86_BUILTIN_STOREAPD512,
30827 IX86_BUILTIN_STOREAPS512,
30828 IX86_BUILTIN_STOREDQUDI512,
30829 IX86_BUILTIN_STOREDQUSI512,
30830 IX86_BUILTIN_STOREUPD512,
30831 IX86_BUILTIN_STOREUPS512,
30832 IX86_BUILTIN_SUBPD512,
30833 IX86_BUILTIN_SUBPS512,
30834 IX86_BUILTIN_SUBSD_ROUND,
30835 IX86_BUILTIN_SUBSS_ROUND,
30836 IX86_BUILTIN_UCMPD512,
30837 IX86_BUILTIN_UCMPQ512,
30838 IX86_BUILTIN_UNPCKHPD512,
30839 IX86_BUILTIN_UNPCKHPS512,
30840 IX86_BUILTIN_UNPCKLPD512,
30841 IX86_BUILTIN_UNPCKLPS512,
30842 IX86_BUILTIN_VCVTSD2SI32,
30843 IX86_BUILTIN_VCVTSD2SI64,
30844 IX86_BUILTIN_VCVTSD2USI32,
30845 IX86_BUILTIN_VCVTSD2USI64,
30846 IX86_BUILTIN_VCVTSS2SI32,
30847 IX86_BUILTIN_VCVTSS2SI64,
30848 IX86_BUILTIN_VCVTSS2USI32,
30849 IX86_BUILTIN_VCVTSS2USI64,
30850 IX86_BUILTIN_VCVTTSD2SI32,
30851 IX86_BUILTIN_VCVTTSD2SI64,
30852 IX86_BUILTIN_VCVTTSD2USI32,
30853 IX86_BUILTIN_VCVTTSD2USI64,
30854 IX86_BUILTIN_VCVTTSS2SI32,
30855 IX86_BUILTIN_VCVTTSS2SI64,
30856 IX86_BUILTIN_VCVTTSS2USI32,
30857 IX86_BUILTIN_VCVTTSS2USI64,
30858 IX86_BUILTIN_VFMADDPD512_MASK,
30859 IX86_BUILTIN_VFMADDPD512_MASK3,
30860 IX86_BUILTIN_VFMADDPD512_MASKZ,
30861 IX86_BUILTIN_VFMADDPS512_MASK,
30862 IX86_BUILTIN_VFMADDPS512_MASK3,
30863 IX86_BUILTIN_VFMADDPS512_MASKZ,
30864 IX86_BUILTIN_VFMADDSD3_ROUND,
30865 IX86_BUILTIN_VFMADDSS3_ROUND,
30866 IX86_BUILTIN_VFMADDSUBPD512_MASK,
30867 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
30868 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
30869 IX86_BUILTIN_VFMADDSUBPS512_MASK,
30870 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
30871 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
30872 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
30873 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
30874 IX86_BUILTIN_VFMSUBPD512_MASK3,
30875 IX86_BUILTIN_VFMSUBPS512_MASK3,
30876 IX86_BUILTIN_VFMSUBSD3_MASK3,
30877 IX86_BUILTIN_VFMSUBSS3_MASK3,
30878 IX86_BUILTIN_VFNMADDPD512_MASK,
30879 IX86_BUILTIN_VFNMADDPS512_MASK,
30880 IX86_BUILTIN_VFNMSUBPD512_MASK,
30881 IX86_BUILTIN_VFNMSUBPD512_MASK3,
30882 IX86_BUILTIN_VFNMSUBPS512_MASK,
30883 IX86_BUILTIN_VFNMSUBPS512_MASK3,
30884 IX86_BUILTIN_VPCLZCNTD512,
30885 IX86_BUILTIN_VPCLZCNTQ512,
30886 IX86_BUILTIN_VPCONFLICTD512,
30887 IX86_BUILTIN_VPCONFLICTQ512,
30888 IX86_BUILTIN_VPERMDF512,
30889 IX86_BUILTIN_VPERMDI512,
30890 IX86_BUILTIN_VPERMI2VARD512,
30891 IX86_BUILTIN_VPERMI2VARPD512,
30892 IX86_BUILTIN_VPERMI2VARPS512,
30893 IX86_BUILTIN_VPERMI2VARQ512,
30894 IX86_BUILTIN_VPERMILPD512,
30895 IX86_BUILTIN_VPERMILPS512,
30896 IX86_BUILTIN_VPERMILVARPD512,
30897 IX86_BUILTIN_VPERMILVARPS512,
30898 IX86_BUILTIN_VPERMT2VARD512,
30899 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
30900 IX86_BUILTIN_VPERMT2VARPD512,
30901 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
30902 IX86_BUILTIN_VPERMT2VARPS512,
30903 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
30904 IX86_BUILTIN_VPERMT2VARQ512,
30905 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
30906 IX86_BUILTIN_VPERMVARDF512,
30907 IX86_BUILTIN_VPERMVARDI512,
30908 IX86_BUILTIN_VPERMVARSF512,
30909 IX86_BUILTIN_VPERMVARSI512,
30910 IX86_BUILTIN_VTERNLOGD512_MASK,
30911 IX86_BUILTIN_VTERNLOGD512_MASKZ,
30912 IX86_BUILTIN_VTERNLOGQ512_MASK,
30913 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
30914
30915 /* Mask arithmetic operations */
30916 IX86_BUILTIN_KAND16,
30917 IX86_BUILTIN_KANDN16,
30918 IX86_BUILTIN_KNOT16,
30919 IX86_BUILTIN_KOR16,
30920 IX86_BUILTIN_KORTESTC16,
30921 IX86_BUILTIN_KORTESTZ16,
30922 IX86_BUILTIN_KUNPCKBW,
30923 IX86_BUILTIN_KXNOR16,
30924 IX86_BUILTIN_KXOR16,
30925 IX86_BUILTIN_KMOV16,
30926
30927 /* AVX512VL. */
30928 IX86_BUILTIN_PMOVUSQD256_MEM,
30929 IX86_BUILTIN_PMOVUSQD128_MEM,
30930 IX86_BUILTIN_PMOVSQD256_MEM,
30931 IX86_BUILTIN_PMOVSQD128_MEM,
30932 IX86_BUILTIN_PMOVQD256_MEM,
30933 IX86_BUILTIN_PMOVQD128_MEM,
30934 IX86_BUILTIN_PMOVUSQW256_MEM,
30935 IX86_BUILTIN_PMOVUSQW128_MEM,
30936 IX86_BUILTIN_PMOVSQW256_MEM,
30937 IX86_BUILTIN_PMOVSQW128_MEM,
30938 IX86_BUILTIN_PMOVQW256_MEM,
30939 IX86_BUILTIN_PMOVQW128_MEM,
30940 IX86_BUILTIN_PMOVUSQB256_MEM,
30941 IX86_BUILTIN_PMOVUSQB128_MEM,
30942 IX86_BUILTIN_PMOVSQB256_MEM,
30943 IX86_BUILTIN_PMOVSQB128_MEM,
30944 IX86_BUILTIN_PMOVQB256_MEM,
30945 IX86_BUILTIN_PMOVQB128_MEM,
30946 IX86_BUILTIN_PMOVUSDW256_MEM,
30947 IX86_BUILTIN_PMOVUSDW128_MEM,
30948 IX86_BUILTIN_PMOVSDW256_MEM,
30949 IX86_BUILTIN_PMOVSDW128_MEM,
30950 IX86_BUILTIN_PMOVDW256_MEM,
30951 IX86_BUILTIN_PMOVDW128_MEM,
30952 IX86_BUILTIN_PMOVUSDB256_MEM,
30953 IX86_BUILTIN_PMOVUSDB128_MEM,
30954 IX86_BUILTIN_PMOVSDB256_MEM,
30955 IX86_BUILTIN_PMOVSDB128_MEM,
30956 IX86_BUILTIN_PMOVDB256_MEM,
30957 IX86_BUILTIN_PMOVDB128_MEM,
30958 IX86_BUILTIN_MOVDQA64LOAD256_MASK,
30959 IX86_BUILTIN_MOVDQA64LOAD128_MASK,
30960 IX86_BUILTIN_MOVDQA32LOAD256_MASK,
30961 IX86_BUILTIN_MOVDQA32LOAD128_MASK,
30962 IX86_BUILTIN_MOVDQA64STORE256_MASK,
30963 IX86_BUILTIN_MOVDQA64STORE128_MASK,
30964 IX86_BUILTIN_MOVDQA32STORE256_MASK,
30965 IX86_BUILTIN_MOVDQA32STORE128_MASK,
30966 IX86_BUILTIN_LOADAPD256_MASK,
30967 IX86_BUILTIN_LOADAPD128_MASK,
30968 IX86_BUILTIN_LOADAPS256_MASK,
30969 IX86_BUILTIN_LOADAPS128_MASK,
30970 IX86_BUILTIN_STOREAPD256_MASK,
30971 IX86_BUILTIN_STOREAPD128_MASK,
30972 IX86_BUILTIN_STOREAPS256_MASK,
30973 IX86_BUILTIN_STOREAPS128_MASK,
30974 IX86_BUILTIN_LOADUPD256_MASK,
30975 IX86_BUILTIN_LOADUPD128_MASK,
30976 IX86_BUILTIN_LOADUPS256_MASK,
30977 IX86_BUILTIN_LOADUPS128_MASK,
30978 IX86_BUILTIN_STOREUPD256_MASK,
30979 IX86_BUILTIN_STOREUPD128_MASK,
30980 IX86_BUILTIN_STOREUPS256_MASK,
30981 IX86_BUILTIN_STOREUPS128_MASK,
30982 IX86_BUILTIN_LOADDQUDI256_MASK,
30983 IX86_BUILTIN_LOADDQUDI128_MASK,
30984 IX86_BUILTIN_LOADDQUSI256_MASK,
30985 IX86_BUILTIN_LOADDQUSI128_MASK,
30986 IX86_BUILTIN_LOADDQUHI256_MASK,
30987 IX86_BUILTIN_LOADDQUHI128_MASK,
30988 IX86_BUILTIN_LOADDQUQI256_MASK,
30989 IX86_BUILTIN_LOADDQUQI128_MASK,
30990 IX86_BUILTIN_STOREDQUDI256_MASK,
30991 IX86_BUILTIN_STOREDQUDI128_MASK,
30992 IX86_BUILTIN_STOREDQUSI256_MASK,
30993 IX86_BUILTIN_STOREDQUSI128_MASK,
30994 IX86_BUILTIN_STOREDQUHI256_MASK,
30995 IX86_BUILTIN_STOREDQUHI128_MASK,
30996 IX86_BUILTIN_STOREDQUQI256_MASK,
30997 IX86_BUILTIN_STOREDQUQI128_MASK,
30998 IX86_BUILTIN_COMPRESSPDSTORE256,
30999 IX86_BUILTIN_COMPRESSPDSTORE128,
31000 IX86_BUILTIN_COMPRESSPSSTORE256,
31001 IX86_BUILTIN_COMPRESSPSSTORE128,
31002 IX86_BUILTIN_PCOMPRESSQSTORE256,
31003 IX86_BUILTIN_PCOMPRESSQSTORE128,
31004 IX86_BUILTIN_PCOMPRESSDSTORE256,
31005 IX86_BUILTIN_PCOMPRESSDSTORE128,
31006 IX86_BUILTIN_EXPANDPDLOAD256,
31007 IX86_BUILTIN_EXPANDPDLOAD128,
31008 IX86_BUILTIN_EXPANDPSLOAD256,
31009 IX86_BUILTIN_EXPANDPSLOAD128,
31010 IX86_BUILTIN_PEXPANDQLOAD256,
31011 IX86_BUILTIN_PEXPANDQLOAD128,
31012 IX86_BUILTIN_PEXPANDDLOAD256,
31013 IX86_BUILTIN_PEXPANDDLOAD128,
31014 IX86_BUILTIN_EXPANDPDLOAD256Z,
31015 IX86_BUILTIN_EXPANDPDLOAD128Z,
31016 IX86_BUILTIN_EXPANDPSLOAD256Z,
31017 IX86_BUILTIN_EXPANDPSLOAD128Z,
31018 IX86_BUILTIN_PEXPANDQLOAD256Z,
31019 IX86_BUILTIN_PEXPANDQLOAD128Z,
31020 IX86_BUILTIN_PEXPANDDLOAD256Z,
31021 IX86_BUILTIN_PEXPANDDLOAD128Z,
31022 IX86_BUILTIN_PALIGNR256_MASK,
31023 IX86_BUILTIN_PALIGNR128_MASK,
31024 IX86_BUILTIN_MOVDQA64_256_MASK,
31025 IX86_BUILTIN_MOVDQA64_128_MASK,
31026 IX86_BUILTIN_MOVDQA32_256_MASK,
31027 IX86_BUILTIN_MOVDQA32_128_MASK,
31028 IX86_BUILTIN_MOVAPD256_MASK,
31029 IX86_BUILTIN_MOVAPD128_MASK,
31030 IX86_BUILTIN_MOVAPS256_MASK,
31031 IX86_BUILTIN_MOVAPS128_MASK,
31032 IX86_BUILTIN_MOVDQUHI256_MASK,
31033 IX86_BUILTIN_MOVDQUHI128_MASK,
31034 IX86_BUILTIN_MOVDQUQI256_MASK,
31035 IX86_BUILTIN_MOVDQUQI128_MASK,
31036 IX86_BUILTIN_MINPS128_MASK,
31037 IX86_BUILTIN_MAXPS128_MASK,
31038 IX86_BUILTIN_MINPD128_MASK,
31039 IX86_BUILTIN_MAXPD128_MASK,
31040 IX86_BUILTIN_MAXPD256_MASK,
31041 IX86_BUILTIN_MAXPS256_MASK,
31042 IX86_BUILTIN_MINPD256_MASK,
31043 IX86_BUILTIN_MINPS256_MASK,
31044 IX86_BUILTIN_MULPS128_MASK,
31045 IX86_BUILTIN_DIVPS128_MASK,
31046 IX86_BUILTIN_MULPD128_MASK,
31047 IX86_BUILTIN_DIVPD128_MASK,
31048 IX86_BUILTIN_DIVPD256_MASK,
31049 IX86_BUILTIN_DIVPS256_MASK,
31050 IX86_BUILTIN_MULPD256_MASK,
31051 IX86_BUILTIN_MULPS256_MASK,
31052 IX86_BUILTIN_ADDPD128_MASK,
31053 IX86_BUILTIN_ADDPD256_MASK,
31054 IX86_BUILTIN_ADDPS128_MASK,
31055 IX86_BUILTIN_ADDPS256_MASK,
31056 IX86_BUILTIN_SUBPD128_MASK,
31057 IX86_BUILTIN_SUBPD256_MASK,
31058 IX86_BUILTIN_SUBPS128_MASK,
31059 IX86_BUILTIN_SUBPS256_MASK,
31060 IX86_BUILTIN_XORPD256_MASK,
31061 IX86_BUILTIN_XORPD128_MASK,
31062 IX86_BUILTIN_XORPS256_MASK,
31063 IX86_BUILTIN_XORPS128_MASK,
31064 IX86_BUILTIN_ORPD256_MASK,
31065 IX86_BUILTIN_ORPD128_MASK,
31066 IX86_BUILTIN_ORPS256_MASK,
31067 IX86_BUILTIN_ORPS128_MASK,
31068 IX86_BUILTIN_BROADCASTF32x2_256,
31069 IX86_BUILTIN_BROADCASTI32x2_256,
31070 IX86_BUILTIN_BROADCASTI32x2_128,
31071 IX86_BUILTIN_BROADCASTF64X2_256,
31072 IX86_BUILTIN_BROADCASTI64X2_256,
31073 IX86_BUILTIN_BROADCASTF32X4_256,
31074 IX86_BUILTIN_BROADCASTI32X4_256,
31075 IX86_BUILTIN_EXTRACTF32X4_256,
31076 IX86_BUILTIN_EXTRACTI32X4_256,
31077 IX86_BUILTIN_DBPSADBW256,
31078 IX86_BUILTIN_DBPSADBW128,
31079 IX86_BUILTIN_CVTTPD2QQ256,
31080 IX86_BUILTIN_CVTTPD2QQ128,
31081 IX86_BUILTIN_CVTTPD2UQQ256,
31082 IX86_BUILTIN_CVTTPD2UQQ128,
31083 IX86_BUILTIN_CVTPD2QQ256,
31084 IX86_BUILTIN_CVTPD2QQ128,
31085 IX86_BUILTIN_CVTPD2UQQ256,
31086 IX86_BUILTIN_CVTPD2UQQ128,
31087 IX86_BUILTIN_CVTPD2UDQ256_MASK,
31088 IX86_BUILTIN_CVTPD2UDQ128_MASK,
31089 IX86_BUILTIN_CVTTPS2QQ256,
31090 IX86_BUILTIN_CVTTPS2QQ128,
31091 IX86_BUILTIN_CVTTPS2UQQ256,
31092 IX86_BUILTIN_CVTTPS2UQQ128,
31093 IX86_BUILTIN_CVTTPS2DQ256_MASK,
31094 IX86_BUILTIN_CVTTPS2DQ128_MASK,
31095 IX86_BUILTIN_CVTTPS2UDQ256,
31096 IX86_BUILTIN_CVTTPS2UDQ128,
31097 IX86_BUILTIN_CVTTPD2DQ256_MASK,
31098 IX86_BUILTIN_CVTTPD2DQ128_MASK,
31099 IX86_BUILTIN_CVTTPD2UDQ256_MASK,
31100 IX86_BUILTIN_CVTTPD2UDQ128_MASK,
31101 IX86_BUILTIN_CVTPD2DQ256_MASK,
31102 IX86_BUILTIN_CVTPD2DQ128_MASK,
31103 IX86_BUILTIN_CVTDQ2PD256_MASK,
31104 IX86_BUILTIN_CVTDQ2PD128_MASK,
31105 IX86_BUILTIN_CVTUDQ2PD256_MASK,
31106 IX86_BUILTIN_CVTUDQ2PD128_MASK,
31107 IX86_BUILTIN_CVTDQ2PS256_MASK,
31108 IX86_BUILTIN_CVTDQ2PS128_MASK,
31109 IX86_BUILTIN_CVTUDQ2PS256_MASK,
31110 IX86_BUILTIN_CVTUDQ2PS128_MASK,
31111 IX86_BUILTIN_CVTPS2PD256_MASK,
31112 IX86_BUILTIN_CVTPS2PD128_MASK,
31113 IX86_BUILTIN_PBROADCASTB256_MASK,
31114 IX86_BUILTIN_PBROADCASTB256_GPR_MASK,
31115 IX86_BUILTIN_PBROADCASTB128_MASK,
31116 IX86_BUILTIN_PBROADCASTB128_GPR_MASK,
31117 IX86_BUILTIN_PBROADCASTW256_MASK,
31118 IX86_BUILTIN_PBROADCASTW256_GPR_MASK,
31119 IX86_BUILTIN_PBROADCASTW128_MASK,
31120 IX86_BUILTIN_PBROADCASTW128_GPR_MASK,
31121 IX86_BUILTIN_PBROADCASTD256_MASK,
31122 IX86_BUILTIN_PBROADCASTD256_GPR_MASK,
31123 IX86_BUILTIN_PBROADCASTD128_MASK,
31124 IX86_BUILTIN_PBROADCASTD128_GPR_MASK,
31125 IX86_BUILTIN_PBROADCASTQ256_MASK,
31126 IX86_BUILTIN_PBROADCASTQ256_GPR_MASK,
31127 IX86_BUILTIN_PBROADCASTQ128_MASK,
31128 IX86_BUILTIN_PBROADCASTQ128_GPR_MASK,
31129 IX86_BUILTIN_BROADCASTSS256,
31130 IX86_BUILTIN_BROADCASTSS128,
31131 IX86_BUILTIN_BROADCASTSD256,
31132 IX86_BUILTIN_EXTRACTF64X2_256,
31133 IX86_BUILTIN_EXTRACTI64X2_256,
31134 IX86_BUILTIN_INSERTF32X4_256,
31135 IX86_BUILTIN_INSERTI32X4_256,
31136 IX86_BUILTIN_PMOVSXBW256_MASK,
31137 IX86_BUILTIN_PMOVSXBW128_MASK,
31138 IX86_BUILTIN_PMOVSXBD256_MASK,
31139 IX86_BUILTIN_PMOVSXBD128_MASK,
31140 IX86_BUILTIN_PMOVSXBQ256_MASK,
31141 IX86_BUILTIN_PMOVSXBQ128_MASK,
31142 IX86_BUILTIN_PMOVSXWD256_MASK,
31143 IX86_BUILTIN_PMOVSXWD128_MASK,
31144 IX86_BUILTIN_PMOVSXWQ256_MASK,
31145 IX86_BUILTIN_PMOVSXWQ128_MASK,
31146 IX86_BUILTIN_PMOVSXDQ256_MASK,
31147 IX86_BUILTIN_PMOVSXDQ128_MASK,
31148 IX86_BUILTIN_PMOVZXBW256_MASK,
31149 IX86_BUILTIN_PMOVZXBW128_MASK,
31150 IX86_BUILTIN_PMOVZXBD256_MASK,
31151 IX86_BUILTIN_PMOVZXBD128_MASK,
31152 IX86_BUILTIN_PMOVZXBQ256_MASK,
31153 IX86_BUILTIN_PMOVZXBQ128_MASK,
31154 IX86_BUILTIN_PMOVZXWD256_MASK,
31155 IX86_BUILTIN_PMOVZXWD128_MASK,
31156 IX86_BUILTIN_PMOVZXWQ256_MASK,
31157 IX86_BUILTIN_PMOVZXWQ128_MASK,
31158 IX86_BUILTIN_PMOVZXDQ256_MASK,
31159 IX86_BUILTIN_PMOVZXDQ128_MASK,
31160 IX86_BUILTIN_REDUCEPD256_MASK,
31161 IX86_BUILTIN_REDUCEPD128_MASK,
31162 IX86_BUILTIN_REDUCEPS256_MASK,
31163 IX86_BUILTIN_REDUCEPS128_MASK,
31164 IX86_BUILTIN_REDUCESD_MASK,
31165 IX86_BUILTIN_REDUCESS_MASK,
31166 IX86_BUILTIN_VPERMVARHI256_MASK,
31167 IX86_BUILTIN_VPERMVARHI128_MASK,
31168 IX86_BUILTIN_VPERMT2VARHI256,
31169 IX86_BUILTIN_VPERMT2VARHI256_MASKZ,
31170 IX86_BUILTIN_VPERMT2VARHI128,
31171 IX86_BUILTIN_VPERMT2VARHI128_MASKZ,
31172 IX86_BUILTIN_VPERMI2VARHI256,
31173 IX86_BUILTIN_VPERMI2VARHI128,
31174 IX86_BUILTIN_RCP14PD256,
31175 IX86_BUILTIN_RCP14PD128,
31176 IX86_BUILTIN_RCP14PS256,
31177 IX86_BUILTIN_RCP14PS128,
31178 IX86_BUILTIN_RSQRT14PD256_MASK,
31179 IX86_BUILTIN_RSQRT14PD128_MASK,
31180 IX86_BUILTIN_RSQRT14PS256_MASK,
31181 IX86_BUILTIN_RSQRT14PS128_MASK,
31182 IX86_BUILTIN_SQRTPD256_MASK,
31183 IX86_BUILTIN_SQRTPD128_MASK,
31184 IX86_BUILTIN_SQRTPS256_MASK,
31185 IX86_BUILTIN_SQRTPS128_MASK,
31186 IX86_BUILTIN_PADDB128_MASK,
31187 IX86_BUILTIN_PADDW128_MASK,
31188 IX86_BUILTIN_PADDD128_MASK,
31189 IX86_BUILTIN_PADDQ128_MASK,
31190 IX86_BUILTIN_PSUBB128_MASK,
31191 IX86_BUILTIN_PSUBW128_MASK,
31192 IX86_BUILTIN_PSUBD128_MASK,
31193 IX86_BUILTIN_PSUBQ128_MASK,
31194 IX86_BUILTIN_PADDSB128_MASK,
31195 IX86_BUILTIN_PADDSW128_MASK,
31196 IX86_BUILTIN_PSUBSB128_MASK,
31197 IX86_BUILTIN_PSUBSW128_MASK,
31198 IX86_BUILTIN_PADDUSB128_MASK,
31199 IX86_BUILTIN_PADDUSW128_MASK,
31200 IX86_BUILTIN_PSUBUSB128_MASK,
31201 IX86_BUILTIN_PSUBUSW128_MASK,
31202 IX86_BUILTIN_PADDB256_MASK,
31203 IX86_BUILTIN_PADDW256_MASK,
31204 IX86_BUILTIN_PADDD256_MASK,
31205 IX86_BUILTIN_PADDQ256_MASK,
31206 IX86_BUILTIN_PADDSB256_MASK,
31207 IX86_BUILTIN_PADDSW256_MASK,
31208 IX86_BUILTIN_PADDUSB256_MASK,
31209 IX86_BUILTIN_PADDUSW256_MASK,
31210 IX86_BUILTIN_PSUBB256_MASK,
31211 IX86_BUILTIN_PSUBW256_MASK,
31212 IX86_BUILTIN_PSUBD256_MASK,
31213 IX86_BUILTIN_PSUBQ256_MASK,
31214 IX86_BUILTIN_PSUBSB256_MASK,
31215 IX86_BUILTIN_PSUBSW256_MASK,
31216 IX86_BUILTIN_PSUBUSB256_MASK,
31217 IX86_BUILTIN_PSUBUSW256_MASK,
31218 IX86_BUILTIN_SHUF_F64x2_256,
31219 IX86_BUILTIN_SHUF_I64x2_256,
31220 IX86_BUILTIN_SHUF_I32x4_256,
31221 IX86_BUILTIN_SHUF_F32x4_256,
31222 IX86_BUILTIN_PMOVWB128,
31223 IX86_BUILTIN_PMOVWB256,
31224 IX86_BUILTIN_PMOVSWB128,
31225 IX86_BUILTIN_PMOVSWB256,
31226 IX86_BUILTIN_PMOVUSWB128,
31227 IX86_BUILTIN_PMOVUSWB256,
31228 IX86_BUILTIN_PMOVDB128,
31229 IX86_BUILTIN_PMOVDB256,
31230 IX86_BUILTIN_PMOVSDB128,
31231 IX86_BUILTIN_PMOVSDB256,
31232 IX86_BUILTIN_PMOVUSDB128,
31233 IX86_BUILTIN_PMOVUSDB256,
31234 IX86_BUILTIN_PMOVDW128,
31235 IX86_BUILTIN_PMOVDW256,
31236 IX86_BUILTIN_PMOVSDW128,
31237 IX86_BUILTIN_PMOVSDW256,
31238 IX86_BUILTIN_PMOVUSDW128,
31239 IX86_BUILTIN_PMOVUSDW256,
31240 IX86_BUILTIN_PMOVQB128,
31241 IX86_BUILTIN_PMOVQB256,
31242 IX86_BUILTIN_PMOVSQB128,
31243 IX86_BUILTIN_PMOVSQB256,
31244 IX86_BUILTIN_PMOVUSQB128,
31245 IX86_BUILTIN_PMOVUSQB256,
31246 IX86_BUILTIN_PMOVQW128,
31247 IX86_BUILTIN_PMOVQW256,
31248 IX86_BUILTIN_PMOVSQW128,
31249 IX86_BUILTIN_PMOVSQW256,
31250 IX86_BUILTIN_PMOVUSQW128,
31251 IX86_BUILTIN_PMOVUSQW256,
31252 IX86_BUILTIN_PMOVQD128,
31253 IX86_BUILTIN_PMOVQD256,
31254 IX86_BUILTIN_PMOVSQD128,
31255 IX86_BUILTIN_PMOVSQD256,
31256 IX86_BUILTIN_PMOVUSQD128,
31257 IX86_BUILTIN_PMOVUSQD256,
31258 IX86_BUILTIN_RANGEPD256,
31259 IX86_BUILTIN_RANGEPD128,
31260 IX86_BUILTIN_RANGEPS256,
31261 IX86_BUILTIN_RANGEPS128,
31262 IX86_BUILTIN_GETEXPPS256,
31263 IX86_BUILTIN_GETEXPPD256,
31264 IX86_BUILTIN_GETEXPPS128,
31265 IX86_BUILTIN_GETEXPPD128,
31266 IX86_BUILTIN_FIXUPIMMPD256_MASK,
31267 IX86_BUILTIN_FIXUPIMMPD256_MASKZ,
31268 IX86_BUILTIN_FIXUPIMMPS256_MASK,
31269 IX86_BUILTIN_FIXUPIMMPS256_MASKZ,
31270 IX86_BUILTIN_FIXUPIMMPD128_MASK,
31271 IX86_BUILTIN_FIXUPIMMPD128_MASKZ,
31272 IX86_BUILTIN_FIXUPIMMPS128_MASK,
31273 IX86_BUILTIN_FIXUPIMMPS128_MASKZ,
31274 IX86_BUILTIN_PABSQ256,
31275 IX86_BUILTIN_PABSQ128,
31276 IX86_BUILTIN_PABSD256_MASK,
31277 IX86_BUILTIN_PABSD128_MASK,
31278 IX86_BUILTIN_PMULHRSW256_MASK,
31279 IX86_BUILTIN_PMULHRSW128_MASK,
31280 IX86_BUILTIN_PMULHUW128_MASK,
31281 IX86_BUILTIN_PMULHUW256_MASK,
31282 IX86_BUILTIN_PMULHW256_MASK,
31283 IX86_BUILTIN_PMULHW128_MASK,
31284 IX86_BUILTIN_PMULLW256_MASK,
31285 IX86_BUILTIN_PMULLW128_MASK,
31286 IX86_BUILTIN_PMULLQ256,
31287 IX86_BUILTIN_PMULLQ128,
31288 IX86_BUILTIN_ANDPD256_MASK,
31289 IX86_BUILTIN_ANDPD128_MASK,
31290 IX86_BUILTIN_ANDPS256_MASK,
31291 IX86_BUILTIN_ANDPS128_MASK,
31292 IX86_BUILTIN_ANDNPD256_MASK,
31293 IX86_BUILTIN_ANDNPD128_MASK,
31294 IX86_BUILTIN_ANDNPS256_MASK,
31295 IX86_BUILTIN_ANDNPS128_MASK,
31296 IX86_BUILTIN_PSLLWI128_MASK,
31297 IX86_BUILTIN_PSLLDI128_MASK,
31298 IX86_BUILTIN_PSLLQI128_MASK,
31299 IX86_BUILTIN_PSLLW128_MASK,
31300 IX86_BUILTIN_PSLLD128_MASK,
31301 IX86_BUILTIN_PSLLQ128_MASK,
31302 IX86_BUILTIN_PSLLWI256_MASK ,
31303 IX86_BUILTIN_PSLLW256_MASK,
31304 IX86_BUILTIN_PSLLDI256_MASK,
31305 IX86_BUILTIN_PSLLD256_MASK,
31306 IX86_BUILTIN_PSLLQI256_MASK,
31307 IX86_BUILTIN_PSLLQ256_MASK,
31308 IX86_BUILTIN_PSRADI128_MASK,
31309 IX86_BUILTIN_PSRAD128_MASK,
31310 IX86_BUILTIN_PSRADI256_MASK,
31311 IX86_BUILTIN_PSRAD256_MASK,
31312 IX86_BUILTIN_PSRAQI128_MASK,
31313 IX86_BUILTIN_PSRAQ128_MASK,
31314 IX86_BUILTIN_PSRAQI256_MASK,
31315 IX86_BUILTIN_PSRAQ256_MASK,
31316 IX86_BUILTIN_PANDD256,
31317 IX86_BUILTIN_PANDD128,
31318 IX86_BUILTIN_PSRLDI128_MASK,
31319 IX86_BUILTIN_PSRLD128_MASK,
31320 IX86_BUILTIN_PSRLDI256_MASK,
31321 IX86_BUILTIN_PSRLD256_MASK,
31322 IX86_BUILTIN_PSRLQI128_MASK,
31323 IX86_BUILTIN_PSRLQ128_MASK,
31324 IX86_BUILTIN_PSRLQI256_MASK,
31325 IX86_BUILTIN_PSRLQ256_MASK,
31326 IX86_BUILTIN_PANDQ256,
31327 IX86_BUILTIN_PANDQ128,
31328 IX86_BUILTIN_PANDND256,
31329 IX86_BUILTIN_PANDND128,
31330 IX86_BUILTIN_PANDNQ256,
31331 IX86_BUILTIN_PANDNQ128,
31332 IX86_BUILTIN_PORD256,
31333 IX86_BUILTIN_PORD128,
31334 IX86_BUILTIN_PORQ256,
31335 IX86_BUILTIN_PORQ128,
31336 IX86_BUILTIN_PXORD256,
31337 IX86_BUILTIN_PXORD128,
31338 IX86_BUILTIN_PXORQ256,
31339 IX86_BUILTIN_PXORQ128,
31340 IX86_BUILTIN_PACKSSWB256_MASK,
31341 IX86_BUILTIN_PACKSSWB128_MASK,
31342 IX86_BUILTIN_PACKUSWB256_MASK,
31343 IX86_BUILTIN_PACKUSWB128_MASK,
31344 IX86_BUILTIN_RNDSCALEPS256,
31345 IX86_BUILTIN_RNDSCALEPD256,
31346 IX86_BUILTIN_RNDSCALEPS128,
31347 IX86_BUILTIN_RNDSCALEPD128,
31348 IX86_BUILTIN_VTERNLOGQ256_MASK,
31349 IX86_BUILTIN_VTERNLOGQ256_MASKZ,
31350 IX86_BUILTIN_VTERNLOGD256_MASK,
31351 IX86_BUILTIN_VTERNLOGD256_MASKZ,
31352 IX86_BUILTIN_VTERNLOGQ128_MASK,
31353 IX86_BUILTIN_VTERNLOGQ128_MASKZ,
31354 IX86_BUILTIN_VTERNLOGD128_MASK,
31355 IX86_BUILTIN_VTERNLOGD128_MASKZ,
31356 IX86_BUILTIN_SCALEFPD256,
31357 IX86_BUILTIN_SCALEFPS256,
31358 IX86_BUILTIN_SCALEFPD128,
31359 IX86_BUILTIN_SCALEFPS128,
31360 IX86_BUILTIN_VFMADDPD256_MASK,
31361 IX86_BUILTIN_VFMADDPD256_MASK3,
31362 IX86_BUILTIN_VFMADDPD256_MASKZ,
31363 IX86_BUILTIN_VFMADDPD128_MASK,
31364 IX86_BUILTIN_VFMADDPD128_MASK3,
31365 IX86_BUILTIN_VFMADDPD128_MASKZ,
31366 IX86_BUILTIN_VFMADDPS256_MASK,
31367 IX86_BUILTIN_VFMADDPS256_MASK3,
31368 IX86_BUILTIN_VFMADDPS256_MASKZ,
31369 IX86_BUILTIN_VFMADDPS128_MASK,
31370 IX86_BUILTIN_VFMADDPS128_MASK3,
31371 IX86_BUILTIN_VFMADDPS128_MASKZ,
31372 IX86_BUILTIN_VFMSUBPD256_MASK3,
31373 IX86_BUILTIN_VFMSUBPD128_MASK3,
31374 IX86_BUILTIN_VFMSUBPS256_MASK3,
31375 IX86_BUILTIN_VFMSUBPS128_MASK3,
31376 IX86_BUILTIN_VFNMADDPD256_MASK,
31377 IX86_BUILTIN_VFNMADDPD128_MASK,
31378 IX86_BUILTIN_VFNMADDPS256_MASK,
31379 IX86_BUILTIN_VFNMADDPS128_MASK,
31380 IX86_BUILTIN_VFNMSUBPD256_MASK,
31381 IX86_BUILTIN_VFNMSUBPD256_MASK3,
31382 IX86_BUILTIN_VFNMSUBPD128_MASK,
31383 IX86_BUILTIN_VFNMSUBPD128_MASK3,
31384 IX86_BUILTIN_VFNMSUBPS256_MASK,
31385 IX86_BUILTIN_VFNMSUBPS256_MASK3,
31386 IX86_BUILTIN_VFNMSUBPS128_MASK,
31387 IX86_BUILTIN_VFNMSUBPS128_MASK3,
31388 IX86_BUILTIN_VFMADDSUBPD256_MASK,
31389 IX86_BUILTIN_VFMADDSUBPD256_MASK3,
31390 IX86_BUILTIN_VFMADDSUBPD256_MASKZ,
31391 IX86_BUILTIN_VFMADDSUBPD128_MASK,
31392 IX86_BUILTIN_VFMADDSUBPD128_MASK3,
31393 IX86_BUILTIN_VFMADDSUBPD128_MASKZ,
31394 IX86_BUILTIN_VFMADDSUBPS256_MASK,
31395 IX86_BUILTIN_VFMADDSUBPS256_MASK3,
31396 IX86_BUILTIN_VFMADDSUBPS256_MASKZ,
31397 IX86_BUILTIN_VFMADDSUBPS128_MASK,
31398 IX86_BUILTIN_VFMADDSUBPS128_MASK3,
31399 IX86_BUILTIN_VFMADDSUBPS128_MASKZ,
31400 IX86_BUILTIN_VFMSUBADDPD256_MASK3,
31401 IX86_BUILTIN_VFMSUBADDPD128_MASK3,
31402 IX86_BUILTIN_VFMSUBADDPS256_MASK3,
31403 IX86_BUILTIN_VFMSUBADDPS128_MASK3,
31404 IX86_BUILTIN_INSERTF64X2_256,
31405 IX86_BUILTIN_INSERTI64X2_256,
31406 IX86_BUILTIN_PSRAVV16HI,
31407 IX86_BUILTIN_PSRAVV8HI,
31408 IX86_BUILTIN_PMADDUBSW256_MASK,
31409 IX86_BUILTIN_PMADDUBSW128_MASK,
31410 IX86_BUILTIN_PMADDWD256_MASK,
31411 IX86_BUILTIN_PMADDWD128_MASK,
31412 IX86_BUILTIN_PSRLVV16HI,
31413 IX86_BUILTIN_PSRLVV8HI,
31414 IX86_BUILTIN_CVTPS2DQ256_MASK,
31415 IX86_BUILTIN_CVTPS2DQ128_MASK,
31416 IX86_BUILTIN_CVTPS2UDQ256,
31417 IX86_BUILTIN_CVTPS2UDQ128,
31418 IX86_BUILTIN_CVTPS2QQ256,
31419 IX86_BUILTIN_CVTPS2QQ128,
31420 IX86_BUILTIN_CVTPS2UQQ256,
31421 IX86_BUILTIN_CVTPS2UQQ128,
31422 IX86_BUILTIN_GETMANTPS256,
31423 IX86_BUILTIN_GETMANTPS128,
31424 IX86_BUILTIN_GETMANTPD256,
31425 IX86_BUILTIN_GETMANTPD128,
31426 IX86_BUILTIN_MOVDDUP256_MASK,
31427 IX86_BUILTIN_MOVDDUP128_MASK,
31428 IX86_BUILTIN_MOVSHDUP256_MASK,
31429 IX86_BUILTIN_MOVSHDUP128_MASK,
31430 IX86_BUILTIN_MOVSLDUP256_MASK,
31431 IX86_BUILTIN_MOVSLDUP128_MASK,
31432 IX86_BUILTIN_CVTQQ2PS256,
31433 IX86_BUILTIN_CVTQQ2PS128,
31434 IX86_BUILTIN_CVTUQQ2PS256,
31435 IX86_BUILTIN_CVTUQQ2PS128,
31436 IX86_BUILTIN_CVTQQ2PD256,
31437 IX86_BUILTIN_CVTQQ2PD128,
31438 IX86_BUILTIN_CVTUQQ2PD256,
31439 IX86_BUILTIN_CVTUQQ2PD128,
31440 IX86_BUILTIN_VPERMT2VARQ256,
31441 IX86_BUILTIN_VPERMT2VARQ256_MASKZ,
31442 IX86_BUILTIN_VPERMT2VARD256,
31443 IX86_BUILTIN_VPERMT2VARD256_MASKZ,
31444 IX86_BUILTIN_VPERMI2VARQ256,
31445 IX86_BUILTIN_VPERMI2VARD256,
31446 IX86_BUILTIN_VPERMT2VARPD256,
31447 IX86_BUILTIN_VPERMT2VARPD256_MASKZ,
31448 IX86_BUILTIN_VPERMT2VARPS256,
31449 IX86_BUILTIN_VPERMT2VARPS256_MASKZ,
31450 IX86_BUILTIN_VPERMI2VARPD256,
31451 IX86_BUILTIN_VPERMI2VARPS256,
31452 IX86_BUILTIN_VPERMT2VARQ128,
31453 IX86_BUILTIN_VPERMT2VARQ128_MASKZ,
31454 IX86_BUILTIN_VPERMT2VARD128,
31455 IX86_BUILTIN_VPERMT2VARD128_MASKZ,
31456 IX86_BUILTIN_VPERMI2VARQ128,
31457 IX86_BUILTIN_VPERMI2VARD128,
31458 IX86_BUILTIN_VPERMT2VARPD128,
31459 IX86_BUILTIN_VPERMT2VARPD128_MASKZ,
31460 IX86_BUILTIN_VPERMT2VARPS128,
31461 IX86_BUILTIN_VPERMT2VARPS128_MASKZ,
31462 IX86_BUILTIN_VPERMI2VARPD128,
31463 IX86_BUILTIN_VPERMI2VARPS128,
31464 IX86_BUILTIN_PSHUFB256_MASK,
31465 IX86_BUILTIN_PSHUFB128_MASK,
31466 IX86_BUILTIN_PSHUFHW256_MASK,
31467 IX86_BUILTIN_PSHUFHW128_MASK,
31468 IX86_BUILTIN_PSHUFLW256_MASK,
31469 IX86_BUILTIN_PSHUFLW128_MASK,
31470 IX86_BUILTIN_PSHUFD256_MASK,
31471 IX86_BUILTIN_PSHUFD128_MASK,
31472 IX86_BUILTIN_SHUFPD256_MASK,
31473 IX86_BUILTIN_SHUFPD128_MASK,
31474 IX86_BUILTIN_SHUFPS256_MASK,
31475 IX86_BUILTIN_SHUFPS128_MASK,
31476 IX86_BUILTIN_PROLVQ256,
31477 IX86_BUILTIN_PROLVQ128,
31478 IX86_BUILTIN_PROLQ256,
31479 IX86_BUILTIN_PROLQ128,
31480 IX86_BUILTIN_PRORVQ256,
31481 IX86_BUILTIN_PRORVQ128,
31482 IX86_BUILTIN_PRORQ256,
31483 IX86_BUILTIN_PRORQ128,
31484 IX86_BUILTIN_PSRAVQ128,
31485 IX86_BUILTIN_PSRAVQ256,
31486 IX86_BUILTIN_PSLLVV4DI_MASK,
31487 IX86_BUILTIN_PSLLVV2DI_MASK,
31488 IX86_BUILTIN_PSLLVV8SI_MASK,
31489 IX86_BUILTIN_PSLLVV4SI_MASK,
31490 IX86_BUILTIN_PSRAVV8SI_MASK,
31491 IX86_BUILTIN_PSRAVV4SI_MASK,
31492 IX86_BUILTIN_PSRLVV4DI_MASK,
31493 IX86_BUILTIN_PSRLVV2DI_MASK,
31494 IX86_BUILTIN_PSRLVV8SI_MASK,
31495 IX86_BUILTIN_PSRLVV4SI_MASK,
31496 IX86_BUILTIN_PSRAWI256_MASK,
31497 IX86_BUILTIN_PSRAW256_MASK,
31498 IX86_BUILTIN_PSRAWI128_MASK,
31499 IX86_BUILTIN_PSRAW128_MASK,
31500 IX86_BUILTIN_PSRLWI256_MASK,
31501 IX86_BUILTIN_PSRLW256_MASK,
31502 IX86_BUILTIN_PSRLWI128_MASK,
31503 IX86_BUILTIN_PSRLW128_MASK,
31504 IX86_BUILTIN_PRORVD256,
31505 IX86_BUILTIN_PROLVD256,
31506 IX86_BUILTIN_PRORD256,
31507 IX86_BUILTIN_PROLD256,
31508 IX86_BUILTIN_PRORVD128,
31509 IX86_BUILTIN_PROLVD128,
31510 IX86_BUILTIN_PRORD128,
31511 IX86_BUILTIN_PROLD128,
31512 IX86_BUILTIN_FPCLASSPD256,
31513 IX86_BUILTIN_FPCLASSPD128,
31514 IX86_BUILTIN_FPCLASSSD,
31515 IX86_BUILTIN_FPCLASSPS256,
31516 IX86_BUILTIN_FPCLASSPS128,
31517 IX86_BUILTIN_FPCLASSSS,
31518 IX86_BUILTIN_CVTB2MASK128,
31519 IX86_BUILTIN_CVTB2MASK256,
31520 IX86_BUILTIN_CVTW2MASK128,
31521 IX86_BUILTIN_CVTW2MASK256,
31522 IX86_BUILTIN_CVTD2MASK128,
31523 IX86_BUILTIN_CVTD2MASK256,
31524 IX86_BUILTIN_CVTQ2MASK128,
31525 IX86_BUILTIN_CVTQ2MASK256,
31526 IX86_BUILTIN_CVTMASK2B128,
31527 IX86_BUILTIN_CVTMASK2B256,
31528 IX86_BUILTIN_CVTMASK2W128,
31529 IX86_BUILTIN_CVTMASK2W256,
31530 IX86_BUILTIN_CVTMASK2D128,
31531 IX86_BUILTIN_CVTMASK2D256,
31532 IX86_BUILTIN_CVTMASK2Q128,
31533 IX86_BUILTIN_CVTMASK2Q256,
31534 IX86_BUILTIN_PCMPEQB128_MASK,
31535 IX86_BUILTIN_PCMPEQB256_MASK,
31536 IX86_BUILTIN_PCMPEQW128_MASK,
31537 IX86_BUILTIN_PCMPEQW256_MASK,
31538 IX86_BUILTIN_PCMPEQD128_MASK,
31539 IX86_BUILTIN_PCMPEQD256_MASK,
31540 IX86_BUILTIN_PCMPEQQ128_MASK,
31541 IX86_BUILTIN_PCMPEQQ256_MASK,
31542 IX86_BUILTIN_PCMPGTB128_MASK,
31543 IX86_BUILTIN_PCMPGTB256_MASK,
31544 IX86_BUILTIN_PCMPGTW128_MASK,
31545 IX86_BUILTIN_PCMPGTW256_MASK,
31546 IX86_BUILTIN_PCMPGTD128_MASK,
31547 IX86_BUILTIN_PCMPGTD256_MASK,
31548 IX86_BUILTIN_PCMPGTQ128_MASK,
31549 IX86_BUILTIN_PCMPGTQ256_MASK,
31550 IX86_BUILTIN_PTESTMB128,
31551 IX86_BUILTIN_PTESTMB256,
31552 IX86_BUILTIN_PTESTMW128,
31553 IX86_BUILTIN_PTESTMW256,
31554 IX86_BUILTIN_PTESTMD128,
31555 IX86_BUILTIN_PTESTMD256,
31556 IX86_BUILTIN_PTESTMQ128,
31557 IX86_BUILTIN_PTESTMQ256,
31558 IX86_BUILTIN_PTESTNMB128,
31559 IX86_BUILTIN_PTESTNMB256,
31560 IX86_BUILTIN_PTESTNMW128,
31561 IX86_BUILTIN_PTESTNMW256,
31562 IX86_BUILTIN_PTESTNMD128,
31563 IX86_BUILTIN_PTESTNMD256,
31564 IX86_BUILTIN_PTESTNMQ128,
31565 IX86_BUILTIN_PTESTNMQ256,
31566 IX86_BUILTIN_PBROADCASTMB128,
31567 IX86_BUILTIN_PBROADCASTMB256,
31568 IX86_BUILTIN_PBROADCASTMW128,
31569 IX86_BUILTIN_PBROADCASTMW256,
31570 IX86_BUILTIN_COMPRESSPD256,
31571 IX86_BUILTIN_COMPRESSPD128,
31572 IX86_BUILTIN_COMPRESSPS256,
31573 IX86_BUILTIN_COMPRESSPS128,
31574 IX86_BUILTIN_PCOMPRESSQ256,
31575 IX86_BUILTIN_PCOMPRESSQ128,
31576 IX86_BUILTIN_PCOMPRESSD256,
31577 IX86_BUILTIN_PCOMPRESSD128,
31578 IX86_BUILTIN_EXPANDPD256,
31579 IX86_BUILTIN_EXPANDPD128,
31580 IX86_BUILTIN_EXPANDPS256,
31581 IX86_BUILTIN_EXPANDPS128,
31582 IX86_BUILTIN_PEXPANDQ256,
31583 IX86_BUILTIN_PEXPANDQ128,
31584 IX86_BUILTIN_PEXPANDD256,
31585 IX86_BUILTIN_PEXPANDD128,
31586 IX86_BUILTIN_EXPANDPD256Z,
31587 IX86_BUILTIN_EXPANDPD128Z,
31588 IX86_BUILTIN_EXPANDPS256Z,
31589 IX86_BUILTIN_EXPANDPS128Z,
31590 IX86_BUILTIN_PEXPANDQ256Z,
31591 IX86_BUILTIN_PEXPANDQ128Z,
31592 IX86_BUILTIN_PEXPANDD256Z,
31593 IX86_BUILTIN_PEXPANDD128Z,
31594 IX86_BUILTIN_PMAXSD256_MASK,
31595 IX86_BUILTIN_PMINSD256_MASK,
31596 IX86_BUILTIN_PMAXUD256_MASK,
31597 IX86_BUILTIN_PMINUD256_MASK,
31598 IX86_BUILTIN_PMAXSD128_MASK,
31599 IX86_BUILTIN_PMINSD128_MASK,
31600 IX86_BUILTIN_PMAXUD128_MASK,
31601 IX86_BUILTIN_PMINUD128_MASK,
31602 IX86_BUILTIN_PMAXSQ256_MASK,
31603 IX86_BUILTIN_PMINSQ256_MASK,
31604 IX86_BUILTIN_PMAXUQ256_MASK,
31605 IX86_BUILTIN_PMINUQ256_MASK,
31606 IX86_BUILTIN_PMAXSQ128_MASK,
31607 IX86_BUILTIN_PMINSQ128_MASK,
31608 IX86_BUILTIN_PMAXUQ128_MASK,
31609 IX86_BUILTIN_PMINUQ128_MASK,
31610 IX86_BUILTIN_PMINSB256_MASK,
31611 IX86_BUILTIN_PMINUB256_MASK,
31612 IX86_BUILTIN_PMAXSB256_MASK,
31613 IX86_BUILTIN_PMAXUB256_MASK,
31614 IX86_BUILTIN_PMINSB128_MASK,
31615 IX86_BUILTIN_PMINUB128_MASK,
31616 IX86_BUILTIN_PMAXSB128_MASK,
31617 IX86_BUILTIN_PMAXUB128_MASK,
31618 IX86_BUILTIN_PMINSW256_MASK,
31619 IX86_BUILTIN_PMINUW256_MASK,
31620 IX86_BUILTIN_PMAXSW256_MASK,
31621 IX86_BUILTIN_PMAXUW256_MASK,
31622 IX86_BUILTIN_PMINSW128_MASK,
31623 IX86_BUILTIN_PMINUW128_MASK,
31624 IX86_BUILTIN_PMAXSW128_MASK,
31625 IX86_BUILTIN_PMAXUW128_MASK,
31626 IX86_BUILTIN_VPCONFLICTQ256,
31627 IX86_BUILTIN_VPCONFLICTD256,
31628 IX86_BUILTIN_VPCLZCNTQ256,
31629 IX86_BUILTIN_VPCLZCNTD256,
31630 IX86_BUILTIN_UNPCKHPD256_MASK,
31631 IX86_BUILTIN_UNPCKHPD128_MASK,
31632 IX86_BUILTIN_UNPCKHPS256_MASK,
31633 IX86_BUILTIN_UNPCKHPS128_MASK,
31634 IX86_BUILTIN_UNPCKLPD256_MASK,
31635 IX86_BUILTIN_UNPCKLPD128_MASK,
31636 IX86_BUILTIN_UNPCKLPS256_MASK,
31637 IX86_BUILTIN_VPCONFLICTQ128,
31638 IX86_BUILTIN_VPCONFLICTD128,
31639 IX86_BUILTIN_VPCLZCNTQ128,
31640 IX86_BUILTIN_VPCLZCNTD128,
31641 IX86_BUILTIN_UNPCKLPS128_MASK,
31642 IX86_BUILTIN_ALIGND256,
31643 IX86_BUILTIN_ALIGNQ256,
31644 IX86_BUILTIN_ALIGND128,
31645 IX86_BUILTIN_ALIGNQ128,
31646 IX86_BUILTIN_CVTPS2PH256_MASK,
31647 IX86_BUILTIN_CVTPS2PH_MASK,
31648 IX86_BUILTIN_CVTPH2PS_MASK,
31649 IX86_BUILTIN_CVTPH2PS256_MASK,
31650 IX86_BUILTIN_PUNPCKHDQ128_MASK,
31651 IX86_BUILTIN_PUNPCKHDQ256_MASK,
31652 IX86_BUILTIN_PUNPCKHQDQ128_MASK,
31653 IX86_BUILTIN_PUNPCKHQDQ256_MASK,
31654 IX86_BUILTIN_PUNPCKLDQ128_MASK,
31655 IX86_BUILTIN_PUNPCKLDQ256_MASK,
31656 IX86_BUILTIN_PUNPCKLQDQ128_MASK,
31657 IX86_BUILTIN_PUNPCKLQDQ256_MASK,
31658 IX86_BUILTIN_PUNPCKHBW128_MASK,
31659 IX86_BUILTIN_PUNPCKHBW256_MASK,
31660 IX86_BUILTIN_PUNPCKHWD128_MASK,
31661 IX86_BUILTIN_PUNPCKHWD256_MASK,
31662 IX86_BUILTIN_PUNPCKLBW128_MASK,
31663 IX86_BUILTIN_PUNPCKLBW256_MASK,
31664 IX86_BUILTIN_PUNPCKLWD128_MASK,
31665 IX86_BUILTIN_PUNPCKLWD256_MASK,
31666 IX86_BUILTIN_PSLLVV16HI,
31667 IX86_BUILTIN_PSLLVV8HI,
31668 IX86_BUILTIN_PACKSSDW256_MASK,
31669 IX86_BUILTIN_PACKSSDW128_MASK,
31670 IX86_BUILTIN_PACKUSDW256_MASK,
31671 IX86_BUILTIN_PACKUSDW128_MASK,
31672 IX86_BUILTIN_PAVGB256_MASK,
31673 IX86_BUILTIN_PAVGW256_MASK,
31674 IX86_BUILTIN_PAVGB128_MASK,
31675 IX86_BUILTIN_PAVGW128_MASK,
31676 IX86_BUILTIN_VPERMVARSF256_MASK,
31677 IX86_BUILTIN_VPERMVARDF256_MASK,
31678 IX86_BUILTIN_VPERMDF256_MASK,
31679 IX86_BUILTIN_PABSB256_MASK,
31680 IX86_BUILTIN_PABSB128_MASK,
31681 IX86_BUILTIN_PABSW256_MASK,
31682 IX86_BUILTIN_PABSW128_MASK,
31683 IX86_BUILTIN_VPERMILVARPD_MASK,
31684 IX86_BUILTIN_VPERMILVARPS_MASK,
31685 IX86_BUILTIN_VPERMILVARPD256_MASK,
31686 IX86_BUILTIN_VPERMILVARPS256_MASK,
31687 IX86_BUILTIN_VPERMILPD_MASK,
31688 IX86_BUILTIN_VPERMILPS_MASK,
31689 IX86_BUILTIN_VPERMILPD256_MASK,
31690 IX86_BUILTIN_VPERMILPS256_MASK,
31691 IX86_BUILTIN_BLENDMQ256,
31692 IX86_BUILTIN_BLENDMD256,
31693 IX86_BUILTIN_BLENDMPD256,
31694 IX86_BUILTIN_BLENDMPS256,
31695 IX86_BUILTIN_BLENDMQ128,
31696 IX86_BUILTIN_BLENDMD128,
31697 IX86_BUILTIN_BLENDMPD128,
31698 IX86_BUILTIN_BLENDMPS128,
31699 IX86_BUILTIN_BLENDMW256,
31700 IX86_BUILTIN_BLENDMB256,
31701 IX86_BUILTIN_BLENDMW128,
31702 IX86_BUILTIN_BLENDMB128,
31703 IX86_BUILTIN_PMULLD256_MASK,
31704 IX86_BUILTIN_PMULLD128_MASK,
31705 IX86_BUILTIN_PMULUDQ256_MASK,
31706 IX86_BUILTIN_PMULDQ256_MASK,
31707 IX86_BUILTIN_PMULDQ128_MASK,
31708 IX86_BUILTIN_PMULUDQ128_MASK,
31709 IX86_BUILTIN_CVTPD2PS256_MASK,
31710 IX86_BUILTIN_CVTPD2PS_MASK,
31711 IX86_BUILTIN_VPERMVARSI256_MASK,
31712 IX86_BUILTIN_VPERMVARDI256_MASK,
31713 IX86_BUILTIN_VPERMDI256_MASK,
31714 IX86_BUILTIN_CMPQ256,
31715 IX86_BUILTIN_CMPD256,
31716 IX86_BUILTIN_UCMPQ256,
31717 IX86_BUILTIN_UCMPD256,
31718 IX86_BUILTIN_CMPB256,
31719 IX86_BUILTIN_CMPW256,
31720 IX86_BUILTIN_UCMPB256,
31721 IX86_BUILTIN_UCMPW256,
31722 IX86_BUILTIN_CMPPD256_MASK,
31723 IX86_BUILTIN_CMPPS256_MASK,
31724 IX86_BUILTIN_CMPQ128,
31725 IX86_BUILTIN_CMPD128,
31726 IX86_BUILTIN_UCMPQ128,
31727 IX86_BUILTIN_UCMPD128,
31728 IX86_BUILTIN_CMPB128,
31729 IX86_BUILTIN_CMPW128,
31730 IX86_BUILTIN_UCMPB128,
31731 IX86_BUILTIN_UCMPW128,
31732 IX86_BUILTIN_CMPPD128_MASK,
31733 IX86_BUILTIN_CMPPS128_MASK,
31734
31735 IX86_BUILTIN_GATHER3SIV8SF,
31736 IX86_BUILTIN_GATHER3SIV4SF,
31737 IX86_BUILTIN_GATHER3SIV4DF,
31738 IX86_BUILTIN_GATHER3SIV2DF,
31739 IX86_BUILTIN_GATHER3DIV8SF,
31740 IX86_BUILTIN_GATHER3DIV4SF,
31741 IX86_BUILTIN_GATHER3DIV4DF,
31742 IX86_BUILTIN_GATHER3DIV2DF,
31743 IX86_BUILTIN_GATHER3SIV8SI,
31744 IX86_BUILTIN_GATHER3SIV4SI,
31745 IX86_BUILTIN_GATHER3SIV4DI,
31746 IX86_BUILTIN_GATHER3SIV2DI,
31747 IX86_BUILTIN_GATHER3DIV8SI,
31748 IX86_BUILTIN_GATHER3DIV4SI,
31749 IX86_BUILTIN_GATHER3DIV4DI,
31750 IX86_BUILTIN_GATHER3DIV2DI,
31751 IX86_BUILTIN_SCATTERSIV8SF,
31752 IX86_BUILTIN_SCATTERSIV4SF,
31753 IX86_BUILTIN_SCATTERSIV4DF,
31754 IX86_BUILTIN_SCATTERSIV2DF,
31755 IX86_BUILTIN_SCATTERDIV8SF,
31756 IX86_BUILTIN_SCATTERDIV4SF,
31757 IX86_BUILTIN_SCATTERDIV4DF,
31758 IX86_BUILTIN_SCATTERDIV2DF,
31759 IX86_BUILTIN_SCATTERSIV8SI,
31760 IX86_BUILTIN_SCATTERSIV4SI,
31761 IX86_BUILTIN_SCATTERSIV4DI,
31762 IX86_BUILTIN_SCATTERSIV2DI,
31763 IX86_BUILTIN_SCATTERDIV8SI,
31764 IX86_BUILTIN_SCATTERDIV4SI,
31765 IX86_BUILTIN_SCATTERDIV4DI,
31766 IX86_BUILTIN_SCATTERDIV2DI,
31767
31768 /* AVX512DQ. */
31769 IX86_BUILTIN_RANGESD128,
31770 IX86_BUILTIN_RANGESS128,
31771 IX86_BUILTIN_KUNPCKWD,
31772 IX86_BUILTIN_KUNPCKDQ,
31773 IX86_BUILTIN_BROADCASTF32x2_512,
31774 IX86_BUILTIN_BROADCASTI32x2_512,
31775 IX86_BUILTIN_BROADCASTF64X2_512,
31776 IX86_BUILTIN_BROADCASTI64X2_512,
31777 IX86_BUILTIN_BROADCASTF32X8_512,
31778 IX86_BUILTIN_BROADCASTI32X8_512,
31779 IX86_BUILTIN_EXTRACTF64X2_512,
31780 IX86_BUILTIN_EXTRACTF32X8,
31781 IX86_BUILTIN_EXTRACTI64X2_512,
31782 IX86_BUILTIN_EXTRACTI32X8,
31783 IX86_BUILTIN_REDUCEPD512_MASK,
31784 IX86_BUILTIN_REDUCEPS512_MASK,
31785 IX86_BUILTIN_PMULLQ512,
31786 IX86_BUILTIN_XORPD512,
31787 IX86_BUILTIN_XORPS512,
31788 IX86_BUILTIN_ORPD512,
31789 IX86_BUILTIN_ORPS512,
31790 IX86_BUILTIN_ANDPD512,
31791 IX86_BUILTIN_ANDPS512,
31792 IX86_BUILTIN_ANDNPD512,
31793 IX86_BUILTIN_ANDNPS512,
31794 IX86_BUILTIN_INSERTF32X8,
31795 IX86_BUILTIN_INSERTI32X8,
31796 IX86_BUILTIN_INSERTF64X2_512,
31797 IX86_BUILTIN_INSERTI64X2_512,
31798 IX86_BUILTIN_FPCLASSPD512,
31799 IX86_BUILTIN_FPCLASSPS512,
31800 IX86_BUILTIN_CVTD2MASK512,
31801 IX86_BUILTIN_CVTQ2MASK512,
31802 IX86_BUILTIN_CVTMASK2D512,
31803 IX86_BUILTIN_CVTMASK2Q512,
31804 IX86_BUILTIN_CVTPD2QQ512,
31805 IX86_BUILTIN_CVTPS2QQ512,
31806 IX86_BUILTIN_CVTPD2UQQ512,
31807 IX86_BUILTIN_CVTPS2UQQ512,
31808 IX86_BUILTIN_CVTQQ2PS512,
31809 IX86_BUILTIN_CVTUQQ2PS512,
31810 IX86_BUILTIN_CVTQQ2PD512,
31811 IX86_BUILTIN_CVTUQQ2PD512,
31812 IX86_BUILTIN_CVTTPS2QQ512,
31813 IX86_BUILTIN_CVTTPS2UQQ512,
31814 IX86_BUILTIN_CVTTPD2QQ512,
31815 IX86_BUILTIN_CVTTPD2UQQ512,
31816 IX86_BUILTIN_RANGEPS512,
31817 IX86_BUILTIN_RANGEPD512,
31818
31819 /* AVX512BW. */
31820 IX86_BUILTIN_PACKUSDW512,
31821 IX86_BUILTIN_PACKSSDW512,
31822 IX86_BUILTIN_LOADDQUHI512_MASK,
31823 IX86_BUILTIN_LOADDQUQI512_MASK,
31824 IX86_BUILTIN_PSLLDQ512,
31825 IX86_BUILTIN_PSRLDQ512,
31826 IX86_BUILTIN_STOREDQUHI512_MASK,
31827 IX86_BUILTIN_STOREDQUQI512_MASK,
31828 IX86_BUILTIN_PALIGNR512,
31829 IX86_BUILTIN_PALIGNR512_MASK,
31830 IX86_BUILTIN_MOVDQUHI512_MASK,
31831 IX86_BUILTIN_MOVDQUQI512_MASK,
31832 IX86_BUILTIN_PSADBW512,
31833 IX86_BUILTIN_DBPSADBW512,
31834 IX86_BUILTIN_PBROADCASTB512,
31835 IX86_BUILTIN_PBROADCASTB512_GPR,
31836 IX86_BUILTIN_PBROADCASTW512,
31837 IX86_BUILTIN_PBROADCASTW512_GPR,
31838 IX86_BUILTIN_PMOVSXBW512_MASK,
31839 IX86_BUILTIN_PMOVZXBW512_MASK,
31840 IX86_BUILTIN_VPERMVARHI512_MASK,
31841 IX86_BUILTIN_VPERMT2VARHI512,
31842 IX86_BUILTIN_VPERMT2VARHI512_MASKZ,
31843 IX86_BUILTIN_VPERMI2VARHI512,
31844 IX86_BUILTIN_PAVGB512,
31845 IX86_BUILTIN_PAVGW512,
31846 IX86_BUILTIN_PADDB512,
31847 IX86_BUILTIN_PSUBB512,
31848 IX86_BUILTIN_PSUBSB512,
31849 IX86_BUILTIN_PADDSB512,
31850 IX86_BUILTIN_PSUBUSB512,
31851 IX86_BUILTIN_PADDUSB512,
31852 IX86_BUILTIN_PSUBW512,
31853 IX86_BUILTIN_PADDW512,
31854 IX86_BUILTIN_PSUBSW512,
31855 IX86_BUILTIN_PADDSW512,
31856 IX86_BUILTIN_PSUBUSW512,
31857 IX86_BUILTIN_PADDUSW512,
31858 IX86_BUILTIN_PMAXUW512,
31859 IX86_BUILTIN_PMAXSW512,
31860 IX86_BUILTIN_PMINUW512,
31861 IX86_BUILTIN_PMINSW512,
31862 IX86_BUILTIN_PMAXUB512,
31863 IX86_BUILTIN_PMAXSB512,
31864 IX86_BUILTIN_PMINUB512,
31865 IX86_BUILTIN_PMINSB512,
31866 IX86_BUILTIN_PMOVWB512,
31867 IX86_BUILTIN_PMOVSWB512,
31868 IX86_BUILTIN_PMOVUSWB512,
31869 IX86_BUILTIN_PMULHRSW512_MASK,
31870 IX86_BUILTIN_PMULHUW512_MASK,
31871 IX86_BUILTIN_PMULHW512_MASK,
31872 IX86_BUILTIN_PMULLW512_MASK,
31873 IX86_BUILTIN_PSLLWI512_MASK,
31874 IX86_BUILTIN_PSLLW512_MASK,
31875 IX86_BUILTIN_PACKSSWB512,
31876 IX86_BUILTIN_PACKUSWB512,
31877 IX86_BUILTIN_PSRAVV32HI,
31878 IX86_BUILTIN_PMADDUBSW512_MASK,
31879 IX86_BUILTIN_PMADDWD512_MASK,
31880 IX86_BUILTIN_PSRLVV32HI,
31881 IX86_BUILTIN_PUNPCKHBW512,
31882 IX86_BUILTIN_PUNPCKHWD512,
31883 IX86_BUILTIN_PUNPCKLBW512,
31884 IX86_BUILTIN_PUNPCKLWD512,
31885 IX86_BUILTIN_PSHUFB512,
31886 IX86_BUILTIN_PSHUFHW512,
31887 IX86_BUILTIN_PSHUFLW512,
31888 IX86_BUILTIN_PSRAWI512,
31889 IX86_BUILTIN_PSRAW512,
31890 IX86_BUILTIN_PSRLWI512,
31891 IX86_BUILTIN_PSRLW512,
31892 IX86_BUILTIN_CVTB2MASK512,
31893 IX86_BUILTIN_CVTW2MASK512,
31894 IX86_BUILTIN_CVTMASK2B512,
31895 IX86_BUILTIN_CVTMASK2W512,
31896 IX86_BUILTIN_PCMPEQB512_MASK,
31897 IX86_BUILTIN_PCMPEQW512_MASK,
31898 IX86_BUILTIN_PCMPGTB512_MASK,
31899 IX86_BUILTIN_PCMPGTW512_MASK,
31900 IX86_BUILTIN_PTESTMB512,
31901 IX86_BUILTIN_PTESTMW512,
31902 IX86_BUILTIN_PTESTNMB512,
31903 IX86_BUILTIN_PTESTNMW512,
31904 IX86_BUILTIN_PSLLVV32HI,
31905 IX86_BUILTIN_PABSB512,
31906 IX86_BUILTIN_PABSW512,
31907 IX86_BUILTIN_BLENDMW512,
31908 IX86_BUILTIN_BLENDMB512,
31909 IX86_BUILTIN_CMPB512,
31910 IX86_BUILTIN_CMPW512,
31911 IX86_BUILTIN_UCMPB512,
31912 IX86_BUILTIN_UCMPW512,
31913
31914 /* Alternate 4 and 8 element gather/scatter for the vectorizer
31915 where all operands are 32-byte or 64-byte wide respectively. */
31916 IX86_BUILTIN_GATHERALTSIV4DF,
31917 IX86_BUILTIN_GATHERALTDIV8SF,
31918 IX86_BUILTIN_GATHERALTSIV4DI,
31919 IX86_BUILTIN_GATHERALTDIV8SI,
31920 IX86_BUILTIN_GATHER3ALTDIV16SF,
31921 IX86_BUILTIN_GATHER3ALTDIV16SI,
31922 IX86_BUILTIN_GATHER3ALTSIV4DF,
31923 IX86_BUILTIN_GATHER3ALTDIV8SF,
31924 IX86_BUILTIN_GATHER3ALTSIV4DI,
31925 IX86_BUILTIN_GATHER3ALTDIV8SI,
31926 IX86_BUILTIN_GATHER3ALTSIV8DF,
31927 IX86_BUILTIN_GATHER3ALTSIV8DI,
31928 IX86_BUILTIN_GATHER3DIV16SF,
31929 IX86_BUILTIN_GATHER3DIV16SI,
31930 IX86_BUILTIN_GATHER3DIV8DF,
31931 IX86_BUILTIN_GATHER3DIV8DI,
31932 IX86_BUILTIN_GATHER3SIV16SF,
31933 IX86_BUILTIN_GATHER3SIV16SI,
31934 IX86_BUILTIN_GATHER3SIV8DF,
31935 IX86_BUILTIN_GATHER3SIV8DI,
31936 IX86_BUILTIN_SCATTERALTSIV8DF,
31937 IX86_BUILTIN_SCATTERALTDIV16SF,
31938 IX86_BUILTIN_SCATTERALTSIV8DI,
31939 IX86_BUILTIN_SCATTERALTDIV16SI,
31940 IX86_BUILTIN_SCATTERDIV16SF,
31941 IX86_BUILTIN_SCATTERDIV16SI,
31942 IX86_BUILTIN_SCATTERDIV8DF,
31943 IX86_BUILTIN_SCATTERDIV8DI,
31944 IX86_BUILTIN_SCATTERSIV16SF,
31945 IX86_BUILTIN_SCATTERSIV16SI,
31946 IX86_BUILTIN_SCATTERSIV8DF,
31947 IX86_BUILTIN_SCATTERSIV8DI,
31948
31949 /* AVX512PF */
31950 IX86_BUILTIN_GATHERPFQPD,
31951 IX86_BUILTIN_GATHERPFDPS,
31952 IX86_BUILTIN_GATHERPFDPD,
31953 IX86_BUILTIN_GATHERPFQPS,
31954 IX86_BUILTIN_SCATTERPFDPD,
31955 IX86_BUILTIN_SCATTERPFDPS,
31956 IX86_BUILTIN_SCATTERPFQPD,
31957 IX86_BUILTIN_SCATTERPFQPS,
31958
31959 /* AVX-512ER */
31960 IX86_BUILTIN_EXP2PD_MASK,
31961 IX86_BUILTIN_EXP2PS_MASK,
31962 IX86_BUILTIN_EXP2PS,
31963 IX86_BUILTIN_RCP28PD,
31964 IX86_BUILTIN_RCP28PS,
31965 IX86_BUILTIN_RCP28SD,
31966 IX86_BUILTIN_RCP28SS,
31967 IX86_BUILTIN_RSQRT28PD,
31968 IX86_BUILTIN_RSQRT28PS,
31969 IX86_BUILTIN_RSQRT28SD,
31970 IX86_BUILTIN_RSQRT28SS,
31971
31972 /* AVX-512IFMA */
31973 IX86_BUILTIN_VPMADD52LUQ512,
31974 IX86_BUILTIN_VPMADD52HUQ512,
31975 IX86_BUILTIN_VPMADD52LUQ256,
31976 IX86_BUILTIN_VPMADD52HUQ256,
31977 IX86_BUILTIN_VPMADD52LUQ128,
31978 IX86_BUILTIN_VPMADD52HUQ128,
31979 IX86_BUILTIN_VPMADD52LUQ512_MASKZ,
31980 IX86_BUILTIN_VPMADD52HUQ512_MASKZ,
31981 IX86_BUILTIN_VPMADD52LUQ256_MASKZ,
31982 IX86_BUILTIN_VPMADD52HUQ256_MASKZ,
31983 IX86_BUILTIN_VPMADD52LUQ128_MASKZ,
31984 IX86_BUILTIN_VPMADD52HUQ128_MASKZ,
31985
31986 /* AVX-512VBMI */
31987 IX86_BUILTIN_VPMULTISHIFTQB512,
31988 IX86_BUILTIN_VPMULTISHIFTQB256,
31989 IX86_BUILTIN_VPMULTISHIFTQB128,
31990 IX86_BUILTIN_VPERMVARQI512_MASK,
31991 IX86_BUILTIN_VPERMT2VARQI512,
31992 IX86_BUILTIN_VPERMT2VARQI512_MASKZ,
31993 IX86_BUILTIN_VPERMI2VARQI512,
31994 IX86_BUILTIN_VPERMVARQI256_MASK,
31995 IX86_BUILTIN_VPERMVARQI128_MASK,
31996 IX86_BUILTIN_VPERMT2VARQI256,
31997 IX86_BUILTIN_VPERMT2VARQI256_MASKZ,
31998 IX86_BUILTIN_VPERMT2VARQI128,
31999 IX86_BUILTIN_VPERMT2VARQI128_MASKZ,
32000 IX86_BUILTIN_VPERMI2VARQI256,
32001 IX86_BUILTIN_VPERMI2VARQI128,
32002
32003 /* SHA builtins. */
32004 IX86_BUILTIN_SHA1MSG1,
32005 IX86_BUILTIN_SHA1MSG2,
32006 IX86_BUILTIN_SHA1NEXTE,
32007 IX86_BUILTIN_SHA1RNDS4,
32008 IX86_BUILTIN_SHA256MSG1,
32009 IX86_BUILTIN_SHA256MSG2,
32010 IX86_BUILTIN_SHA256RNDS2,
32011
32012 /* CLWB instructions. */
32013 IX86_BUILTIN_CLWB,
32014
32015 /* PCOMMIT instructions. */
32016 IX86_BUILTIN_PCOMMIT,
32017
32018 /* CLFLUSHOPT instructions. */
32019 IX86_BUILTIN_CLFLUSHOPT,
32020
32021 /* TFmode support builtins. */
32022 IX86_BUILTIN_INFQ,
32023 IX86_BUILTIN_HUGE_VALQ,
32024 IX86_BUILTIN_FABSQ,
32025 IX86_BUILTIN_COPYSIGNQ,
32026
32027 /* Vectorizer support builtins. */
32028 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
32029 IX86_BUILTIN_CPYSGNPS,
32030 IX86_BUILTIN_CPYSGNPD,
32031 IX86_BUILTIN_CPYSGNPS256,
32032 IX86_BUILTIN_CPYSGNPS512,
32033 IX86_BUILTIN_CPYSGNPD256,
32034 IX86_BUILTIN_CPYSGNPD512,
32035 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
32036 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
32037
32038
32039 /* FMA4 instructions. */
32040 IX86_BUILTIN_VFMADDSS,
32041 IX86_BUILTIN_VFMADDSD,
32042 IX86_BUILTIN_VFMADDPS,
32043 IX86_BUILTIN_VFMADDPD,
32044 IX86_BUILTIN_VFMADDPS256,
32045 IX86_BUILTIN_VFMADDPD256,
32046 IX86_BUILTIN_VFMADDSUBPS,
32047 IX86_BUILTIN_VFMADDSUBPD,
32048 IX86_BUILTIN_VFMADDSUBPS256,
32049 IX86_BUILTIN_VFMADDSUBPD256,
32050
32051 /* FMA3 instructions. */
32052 IX86_BUILTIN_VFMADDSS3,
32053 IX86_BUILTIN_VFMADDSD3,
32054
32055 /* XOP instructions. */
32056 IX86_BUILTIN_VPCMOV,
32057 IX86_BUILTIN_VPCMOV_V2DI,
32058 IX86_BUILTIN_VPCMOV_V4SI,
32059 IX86_BUILTIN_VPCMOV_V8HI,
32060 IX86_BUILTIN_VPCMOV_V16QI,
32061 IX86_BUILTIN_VPCMOV_V4SF,
32062 IX86_BUILTIN_VPCMOV_V2DF,
32063 IX86_BUILTIN_VPCMOV256,
32064 IX86_BUILTIN_VPCMOV_V4DI256,
32065 IX86_BUILTIN_VPCMOV_V8SI256,
32066 IX86_BUILTIN_VPCMOV_V16HI256,
32067 IX86_BUILTIN_VPCMOV_V32QI256,
32068 IX86_BUILTIN_VPCMOV_V8SF256,
32069 IX86_BUILTIN_VPCMOV_V4DF256,
32070
32071 IX86_BUILTIN_VPPERM,
32072
32073 IX86_BUILTIN_VPMACSSWW,
32074 IX86_BUILTIN_VPMACSWW,
32075 IX86_BUILTIN_VPMACSSWD,
32076 IX86_BUILTIN_VPMACSWD,
32077 IX86_BUILTIN_VPMACSSDD,
32078 IX86_BUILTIN_VPMACSDD,
32079 IX86_BUILTIN_VPMACSSDQL,
32080 IX86_BUILTIN_VPMACSSDQH,
32081 IX86_BUILTIN_VPMACSDQL,
32082 IX86_BUILTIN_VPMACSDQH,
32083 IX86_BUILTIN_VPMADCSSWD,
32084 IX86_BUILTIN_VPMADCSWD,
32085
32086 IX86_BUILTIN_VPHADDBW,
32087 IX86_BUILTIN_VPHADDBD,
32088 IX86_BUILTIN_VPHADDBQ,
32089 IX86_BUILTIN_VPHADDWD,
32090 IX86_BUILTIN_VPHADDWQ,
32091 IX86_BUILTIN_VPHADDDQ,
32092 IX86_BUILTIN_VPHADDUBW,
32093 IX86_BUILTIN_VPHADDUBD,
32094 IX86_BUILTIN_VPHADDUBQ,
32095 IX86_BUILTIN_VPHADDUWD,
32096 IX86_BUILTIN_VPHADDUWQ,
32097 IX86_BUILTIN_VPHADDUDQ,
32098 IX86_BUILTIN_VPHSUBBW,
32099 IX86_BUILTIN_VPHSUBWD,
32100 IX86_BUILTIN_VPHSUBDQ,
32101
32102 IX86_BUILTIN_VPROTB,
32103 IX86_BUILTIN_VPROTW,
32104 IX86_BUILTIN_VPROTD,
32105 IX86_BUILTIN_VPROTQ,
32106 IX86_BUILTIN_VPROTB_IMM,
32107 IX86_BUILTIN_VPROTW_IMM,
32108 IX86_BUILTIN_VPROTD_IMM,
32109 IX86_BUILTIN_VPROTQ_IMM,
32110
32111 IX86_BUILTIN_VPSHLB,
32112 IX86_BUILTIN_VPSHLW,
32113 IX86_BUILTIN_VPSHLD,
32114 IX86_BUILTIN_VPSHLQ,
32115 IX86_BUILTIN_VPSHAB,
32116 IX86_BUILTIN_VPSHAW,
32117 IX86_BUILTIN_VPSHAD,
32118 IX86_BUILTIN_VPSHAQ,
32119
32120 IX86_BUILTIN_VFRCZSS,
32121 IX86_BUILTIN_VFRCZSD,
32122 IX86_BUILTIN_VFRCZPS,
32123 IX86_BUILTIN_VFRCZPD,
32124 IX86_BUILTIN_VFRCZPS256,
32125 IX86_BUILTIN_VFRCZPD256,
32126
32127 IX86_BUILTIN_VPCOMEQUB,
32128 IX86_BUILTIN_VPCOMNEUB,
32129 IX86_BUILTIN_VPCOMLTUB,
32130 IX86_BUILTIN_VPCOMLEUB,
32131 IX86_BUILTIN_VPCOMGTUB,
32132 IX86_BUILTIN_VPCOMGEUB,
32133 IX86_BUILTIN_VPCOMFALSEUB,
32134 IX86_BUILTIN_VPCOMTRUEUB,
32135
32136 IX86_BUILTIN_VPCOMEQUW,
32137 IX86_BUILTIN_VPCOMNEUW,
32138 IX86_BUILTIN_VPCOMLTUW,
32139 IX86_BUILTIN_VPCOMLEUW,
32140 IX86_BUILTIN_VPCOMGTUW,
32141 IX86_BUILTIN_VPCOMGEUW,
32142 IX86_BUILTIN_VPCOMFALSEUW,
32143 IX86_BUILTIN_VPCOMTRUEUW,
32144
32145 IX86_BUILTIN_VPCOMEQUD,
32146 IX86_BUILTIN_VPCOMNEUD,
32147 IX86_BUILTIN_VPCOMLTUD,
32148 IX86_BUILTIN_VPCOMLEUD,
32149 IX86_BUILTIN_VPCOMGTUD,
32150 IX86_BUILTIN_VPCOMGEUD,
32151 IX86_BUILTIN_VPCOMFALSEUD,
32152 IX86_BUILTIN_VPCOMTRUEUD,
32153
32154 IX86_BUILTIN_VPCOMEQUQ,
32155 IX86_BUILTIN_VPCOMNEUQ,
32156 IX86_BUILTIN_VPCOMLTUQ,
32157 IX86_BUILTIN_VPCOMLEUQ,
32158 IX86_BUILTIN_VPCOMGTUQ,
32159 IX86_BUILTIN_VPCOMGEUQ,
32160 IX86_BUILTIN_VPCOMFALSEUQ,
32161 IX86_BUILTIN_VPCOMTRUEUQ,
32162
32163 IX86_BUILTIN_VPCOMEQB,
32164 IX86_BUILTIN_VPCOMNEB,
32165 IX86_BUILTIN_VPCOMLTB,
32166 IX86_BUILTIN_VPCOMLEB,
32167 IX86_BUILTIN_VPCOMGTB,
32168 IX86_BUILTIN_VPCOMGEB,
32169 IX86_BUILTIN_VPCOMFALSEB,
32170 IX86_BUILTIN_VPCOMTRUEB,
32171
32172 IX86_BUILTIN_VPCOMEQW,
32173 IX86_BUILTIN_VPCOMNEW,
32174 IX86_BUILTIN_VPCOMLTW,
32175 IX86_BUILTIN_VPCOMLEW,
32176 IX86_BUILTIN_VPCOMGTW,
32177 IX86_BUILTIN_VPCOMGEW,
32178 IX86_BUILTIN_VPCOMFALSEW,
32179 IX86_BUILTIN_VPCOMTRUEW,
32180
32181 IX86_BUILTIN_VPCOMEQD,
32182 IX86_BUILTIN_VPCOMNED,
32183 IX86_BUILTIN_VPCOMLTD,
32184 IX86_BUILTIN_VPCOMLED,
32185 IX86_BUILTIN_VPCOMGTD,
32186 IX86_BUILTIN_VPCOMGED,
32187 IX86_BUILTIN_VPCOMFALSED,
32188 IX86_BUILTIN_VPCOMTRUED,
32189
32190 IX86_BUILTIN_VPCOMEQQ,
32191 IX86_BUILTIN_VPCOMNEQ,
32192 IX86_BUILTIN_VPCOMLTQ,
32193 IX86_BUILTIN_VPCOMLEQ,
32194 IX86_BUILTIN_VPCOMGTQ,
32195 IX86_BUILTIN_VPCOMGEQ,
32196 IX86_BUILTIN_VPCOMFALSEQ,
32197 IX86_BUILTIN_VPCOMTRUEQ,
32198
32199 /* LWP instructions. */
32200 IX86_BUILTIN_LLWPCB,
32201 IX86_BUILTIN_SLWPCB,
32202 IX86_BUILTIN_LWPVAL32,
32203 IX86_BUILTIN_LWPVAL64,
32204 IX86_BUILTIN_LWPINS32,
32205 IX86_BUILTIN_LWPINS64,
32206
32207 IX86_BUILTIN_CLZS,
32208
32209 /* RTM */
32210 IX86_BUILTIN_XBEGIN,
32211 IX86_BUILTIN_XEND,
32212 IX86_BUILTIN_XABORT,
32213 IX86_BUILTIN_XTEST,
32214
32215 /* MPX */
32216 IX86_BUILTIN_BNDMK,
32217 IX86_BUILTIN_BNDSTX,
32218 IX86_BUILTIN_BNDLDX,
32219 IX86_BUILTIN_BNDCL,
32220 IX86_BUILTIN_BNDCU,
32221 IX86_BUILTIN_BNDRET,
32222 IX86_BUILTIN_BNDNARROW,
32223 IX86_BUILTIN_BNDINT,
32224 IX86_BUILTIN_SIZEOF,
32225 IX86_BUILTIN_BNDLOWER,
32226 IX86_BUILTIN_BNDUPPER,
32227
32228 /* BMI instructions. */
32229 IX86_BUILTIN_BEXTR32,
32230 IX86_BUILTIN_BEXTR64,
32231 IX86_BUILTIN_CTZS,
32232
32233 /* TBM instructions. */
32234 IX86_BUILTIN_BEXTRI32,
32235 IX86_BUILTIN_BEXTRI64,
32236
32237 /* BMI2 instructions. */
32238 IX86_BUILTIN_BZHI32,
32239 IX86_BUILTIN_BZHI64,
32240 IX86_BUILTIN_PDEP32,
32241 IX86_BUILTIN_PDEP64,
32242 IX86_BUILTIN_PEXT32,
32243 IX86_BUILTIN_PEXT64,
32244
32245 /* ADX instructions. */
32246 IX86_BUILTIN_ADDCARRYX32,
32247 IX86_BUILTIN_ADDCARRYX64,
32248
32249 /* SBB instructions. */
32250 IX86_BUILTIN_SBB32,
32251 IX86_BUILTIN_SBB64,
32252
32253 /* FSGSBASE instructions. */
32254 IX86_BUILTIN_RDFSBASE32,
32255 IX86_BUILTIN_RDFSBASE64,
32256 IX86_BUILTIN_RDGSBASE32,
32257 IX86_BUILTIN_RDGSBASE64,
32258 IX86_BUILTIN_WRFSBASE32,
32259 IX86_BUILTIN_WRFSBASE64,
32260 IX86_BUILTIN_WRGSBASE32,
32261 IX86_BUILTIN_WRGSBASE64,
32262
32263 /* RDRND instructions. */
32264 IX86_BUILTIN_RDRAND16_STEP,
32265 IX86_BUILTIN_RDRAND32_STEP,
32266 IX86_BUILTIN_RDRAND64_STEP,
32267
32268 /* RDSEED instructions. */
32269 IX86_BUILTIN_RDSEED16_STEP,
32270 IX86_BUILTIN_RDSEED32_STEP,
32271 IX86_BUILTIN_RDSEED64_STEP,
32272
32273 /* F16C instructions. */
32274 IX86_BUILTIN_CVTPH2PS,
32275 IX86_BUILTIN_CVTPH2PS256,
32276 IX86_BUILTIN_CVTPS2PH,
32277 IX86_BUILTIN_CVTPS2PH256,
32278
32279 /* MONITORX and MWAITX instrucions. */
32280 IX86_BUILTIN_MONITORX,
32281 IX86_BUILTIN_MWAITX,
32282
32283 /* CFString built-in for darwin */
32284 IX86_BUILTIN_CFSTRING,
32285
32286 /* Builtins to get CPU type and supported features. */
32287 IX86_BUILTIN_CPU_INIT,
32288 IX86_BUILTIN_CPU_IS,
32289 IX86_BUILTIN_CPU_SUPPORTS,
32290
32291 /* Read/write FLAGS register built-ins. */
32292 IX86_BUILTIN_READ_FLAGS,
32293 IX86_BUILTIN_WRITE_FLAGS,
32294
32295 /* PKU instructions. */
32296 IX86_BUILTIN_RDPKRU,
32297 IX86_BUILTIN_WRPKRU,
32298
32299 IX86_BUILTIN_MAX
32300 };
32301
32302 /* Table for the ix86 builtin decls. */
32303 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
32304
32305 /* Table of all of the builtin functions that are possible with different ISA's
32306 but are waiting to be built until a function is declared to use that
32307 ISA. */
32308 struct builtin_isa {
32309 const char *name; /* function name */
32310 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
32311 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
32312 bool const_p; /* true if the declaration is constant */
32313 bool leaf_p; /* true if the declaration has leaf attribute */
32314 bool nothrow_p; /* true if the declaration has nothrow attribute */
32315 bool set_and_not_built_p;
32316 };
32317
32318 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
32319
32320 /* Bits that can still enable any inclusion of a builtin. */
32321 static HOST_WIDE_INT deferred_isa_values = 0;
32322
32323 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
32324 of which isa_flags to use in the ix86_builtins_isa array. Stores the
32325 function decl in the ix86_builtins array. Returns the function decl or
32326 NULL_TREE, if the builtin was not added.
32327
32328 If the front end has a special hook for builtin functions, delay adding
32329 builtin functions that aren't in the current ISA until the ISA is changed
32330 with function specific optimization. Doing so, can save about 300K for the
32331 default compiler. When the builtin is expanded, check at that time whether
32332 it is valid.
32333
32334 If the front end doesn't have a special hook, record all builtins, even if
32335 it isn't an instruction set in the current ISA in case the user uses
32336 function specific options for a different ISA, so that we don't get scope
32337 errors if a builtin is added in the middle of a function scope. */
32338
32339 static inline tree
32340 def_builtin (HOST_WIDE_INT mask, const char *name,
32341 enum ix86_builtin_func_type tcode,
32342 enum ix86_builtins code)
32343 {
32344 tree decl = NULL_TREE;
32345
32346 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
32347 {
32348 ix86_builtins_isa[(int) code].isa = mask;
32349
32350 mask &= ~OPTION_MASK_ISA_64BIT;
32351 if (mask == 0
32352 || (mask & ix86_isa_flags) != 0
32353 || (lang_hooks.builtin_function
32354 == lang_hooks.builtin_function_ext_scope))
32355
32356 {
32357 tree type = ix86_get_builtin_func_type (tcode);
32358 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
32359 NULL, NULL_TREE);
32360 ix86_builtins[(int) code] = decl;
32361 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
32362 }
32363 else
32364 {
32365 /* Just a MASK where set_and_not_built_p == true can potentially
32366 include a builtin. */
32367 deferred_isa_values |= mask;
32368 ix86_builtins[(int) code] = NULL_TREE;
32369 ix86_builtins_isa[(int) code].tcode = tcode;
32370 ix86_builtins_isa[(int) code].name = name;
32371 ix86_builtins_isa[(int) code].leaf_p = false;
32372 ix86_builtins_isa[(int) code].nothrow_p = false;
32373 ix86_builtins_isa[(int) code].const_p = false;
32374 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
32375 }
32376 }
32377
32378 return decl;
32379 }
32380
32381 /* Like def_builtin, but also marks the function decl "const". */
32382
32383 static inline tree
32384 def_builtin_const (HOST_WIDE_INT mask, const char *name,
32385 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
32386 {
32387 tree decl = def_builtin (mask, name, tcode, code);
32388 if (decl)
32389 TREE_READONLY (decl) = 1;
32390 else
32391 ix86_builtins_isa[(int) code].const_p = true;
32392
32393 return decl;
32394 }
32395
32396 /* Add any new builtin functions for a given ISA that may not have been
32397 declared. This saves a bit of space compared to adding all of the
32398 declarations to the tree, even if we didn't use them. */
32399
32400 static void
32401 ix86_add_new_builtins (HOST_WIDE_INT isa)
32402 {
32403 if ((isa & deferred_isa_values) == 0)
32404 return;
32405
32406 /* Bits in ISA value can be removed from potential isa values. */
32407 deferred_isa_values &= ~isa;
32408
32409 int i;
32410 tree saved_current_target_pragma = current_target_pragma;
32411 current_target_pragma = NULL_TREE;
32412
32413 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
32414 {
32415 if ((ix86_builtins_isa[i].isa & isa) != 0
32416 && ix86_builtins_isa[i].set_and_not_built_p)
32417 {
32418 tree decl, type;
32419
32420 /* Don't define the builtin again. */
32421 ix86_builtins_isa[i].set_and_not_built_p = false;
32422
32423 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
32424 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
32425 type, i, BUILT_IN_MD, NULL,
32426 NULL_TREE);
32427
32428 ix86_builtins[i] = decl;
32429 if (ix86_builtins_isa[i].const_p)
32430 TREE_READONLY (decl) = 1;
32431 if (ix86_builtins_isa[i].leaf_p)
32432 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
32433 NULL_TREE);
32434 if (ix86_builtins_isa[i].nothrow_p)
32435 TREE_NOTHROW (decl) = 1;
32436 }
32437 }
32438
32439 current_target_pragma = saved_current_target_pragma;
32440 }
32441
32442 /* Bits for builtin_description.flag. */
32443
32444 /* Set when we don't support the comparison natively, and should
32445 swap_comparison in order to support it. */
32446 #define BUILTIN_DESC_SWAP_OPERANDS 1
32447
32448 struct builtin_description
32449 {
32450 const HOST_WIDE_INT mask;
32451 const enum insn_code icode;
32452 const char *const name;
32453 const enum ix86_builtins code;
32454 const enum rtx_code comparison;
32455 const int flag;
32456 };
32457
32458 static const struct builtin_description bdesc_comi[] =
32459 {
32460 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
32461 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
32462 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
32463 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
32464 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
32465 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
32466 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
32467 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
32468 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
32469 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
32470 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
32471 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
32472 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
32473 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
32474 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
32475 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
32476 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
32477 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
32478 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
32479 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
32480 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
32481 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
32482 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
32483 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
32484 };
32485
32486 static const struct builtin_description bdesc_pcmpestr[] =
32487 {
32488 /* SSE4.2 */
32489 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
32490 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
32491 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
32492 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
32493 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
32494 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
32495 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
32496 };
32497
32498 static const struct builtin_description bdesc_pcmpistr[] =
32499 {
32500 /* SSE4.2 */
32501 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
32502 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
32503 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
32504 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
32505 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
32506 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
32507 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
32508 };
32509
32510 /* Special builtins with variable number of arguments. */
32511 static const struct builtin_description bdesc_special_args[] =
32512 {
32513 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
32514 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
32515 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
32516
32517 /* 80387 (for use internally for atomic compound assignment). */
32518 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
32519 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
32520 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
32521 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
32522
32523 /* MMX */
32524 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
32525
32526 /* 3DNow! */
32527 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
32528
32529 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
32530 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
32531 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
32532 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32533 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32534 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32535 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32536 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32537 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32538
32539 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
32540 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
32541 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32542 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32543 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32544 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32545 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32546 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32547
32548 /* SSE */
32549 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
32550 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
32551 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
32552
32553 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
32554 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
32555 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
32556 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
32557
32558 /* SSE or 3DNow!A */
32559 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
32560 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
32561
32562 /* SSE2 */
32563 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
32564 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
32565 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
32566 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
32567 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
32568 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
32569 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
32570 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
32571 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
32572 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
32573
32574 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
32575 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
32576
32577 /* SSE3 */
32578 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
32579
32580 /* SSE4.1 */
32581 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
32582
32583 /* SSE4A */
32584 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
32585 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
32586
32587 /* AVX */
32588 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
32589 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
32590
32591 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
32592 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
32593 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
32594 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
32595 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
32596
32597 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
32598 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
32599 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
32600 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
32601 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
32602 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
32603 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
32604
32605 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
32606 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
32607 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
32608
32609 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
32610 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
32611 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
32612 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
32613 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
32614 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
32615 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
32616 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
32617
32618 /* AVX2 */
32619 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
32620 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
32621 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
32622 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
32623 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
32624 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
32625 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
32626 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
32627 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
32628
32629 /* AVX512F */
32630 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI },
32631 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI },
32632 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI },
32633 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI },
32634 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI },
32635 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI },
32636 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI },
32637 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI },
32638 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI },
32639 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI },
32640 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI },
32641 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI },
32642 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI },
32643 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI },
32644 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI },
32645 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI },
32646 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI },
32647 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI },
32648 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI },
32649 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI },
32650 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
32651 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
32652 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
32653 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
32654 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI },
32655 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI },
32656 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI },
32657 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI },
32658 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI },
32659 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI },
32660 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI },
32661 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI },
32662 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI },
32663 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI },
32664 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI },
32665 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI },
32666 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_UQI },
32667 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_UQI },
32668 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_UQI },
32669 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI },
32670 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI },
32671 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI },
32672 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI },
32673 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI },
32674 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI },
32675 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI },
32676 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI },
32677
32678 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
32679 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
32680 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
32681 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
32682 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
32683 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
32684
32685 /* FSGSBASE */
32686 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
32687 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
32688 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
32689 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
32690 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
32691 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
32692 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
32693 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
32694
32695 /* RTM */
32696 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
32697 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
32698 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
32699
32700 /* AVX512BW */
32701 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_USI },
32702 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_UDI },
32703 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_USI },
32704 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_UDI },
32705
32706 /* AVX512VL */
32707 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_UHI },
32708 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_UQI },
32709 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_USI },
32710 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_UHI },
32711 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64load256_mask", IX86_BUILTIN_MOVDQA64LOAD256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI },
32712 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64load128_mask", IX86_BUILTIN_MOVDQA64LOAD128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI },
32713 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32load256_mask", IX86_BUILTIN_MOVDQA32LOAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI },
32714 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32load128_mask", IX86_BUILTIN_MOVDQA32LOAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI },
32715 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4di_mask, "__builtin_ia32_movdqa64store256_mask", IX86_BUILTIN_MOVDQA64STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_UQI },
32716 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2di_mask, "__builtin_ia32_movdqa64store128_mask", IX86_BUILTIN_MOVDQA64STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_UQI },
32717 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8si_mask, "__builtin_ia32_movdqa32store256_mask", IX86_BUILTIN_MOVDQA32STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_UQI },
32718 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4si_mask, "__builtin_ia32_movdqa32store128_mask", IX86_BUILTIN_MOVDQA32STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_UQI },
32719 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_loadapd256_mask", IX86_BUILTIN_LOADAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI },
32720 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_loadapd128_mask", IX86_BUILTIN_LOADAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI },
32721 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_loadaps256_mask", IX86_BUILTIN_LOADAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI },
32722 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_loadaps128_mask", IX86_BUILTIN_LOADAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI },
32723 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4df_mask, "__builtin_ia32_storeapd256_mask", IX86_BUILTIN_STOREAPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_UQI },
32724 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeapd128_mask", IX86_BUILTIN_STOREAPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_UQI },
32725 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeaps256_mask", IX86_BUILTIN_STOREAPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_UQI },
32726 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeaps128_mask", IX86_BUILTIN_STOREAPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_UQI },
32727 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadupd256_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI },
32728 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loadupd_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI },
32729 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadups256_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI },
32730 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_loadups_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI },
32731 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd256_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_UQI },
32732 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_UQI },
32733 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups256_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_UQI },
32734 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_UQI },
32735 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI },
32736 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI },
32737 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI },
32738 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI },
32739 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_UQI },
32740 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_UQI },
32741 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_UQI },
32742 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_UQI },
32743 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_UHI },
32744 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_UQI },
32745 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_USI },
32746 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_UHI },
32747 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4df_mask, "__builtin_ia32_compressstoredf256_mask", IX86_BUILTIN_COMPRESSPDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_UQI },
32748 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2df_mask, "__builtin_ia32_compressstoredf128_mask", IX86_BUILTIN_COMPRESSPDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_UQI },
32749 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8sf_mask, "__builtin_ia32_compressstoresf256_mask", IX86_BUILTIN_COMPRESSPSSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_UQI },
32750 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4sf_mask, "__builtin_ia32_compressstoresf128_mask", IX86_BUILTIN_COMPRESSPSSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_UQI },
32751 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4di_mask, "__builtin_ia32_compressstoredi256_mask", IX86_BUILTIN_PCOMPRESSQSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_UQI },
32752 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2di_mask, "__builtin_ia32_compressstoredi128_mask", IX86_BUILTIN_PCOMPRESSQSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_UQI },
32753 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8si_mask, "__builtin_ia32_compressstoresi256_mask", IX86_BUILTIN_PCOMPRESSDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_UQI },
32754 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4si_mask, "__builtin_ia32_compressstoresi128_mask", IX86_BUILTIN_PCOMPRESSDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_UQI },
32755 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expandloaddf256_mask", IX86_BUILTIN_EXPANDPDLOAD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI },
32756 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expandloaddf128_mask", IX86_BUILTIN_EXPANDPDLOAD128, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI },
32757 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandloadsf256_mask", IX86_BUILTIN_EXPANDPSLOAD256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI },
32758 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandloadsf128_mask", IX86_BUILTIN_EXPANDPSLOAD128, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI },
32759 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expandloaddi256_mask", IX86_BUILTIN_PEXPANDQLOAD256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI },
32760 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expandloaddi128_mask", IX86_BUILTIN_PEXPANDQLOAD128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI },
32761 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandloadsi256_mask", IX86_BUILTIN_PEXPANDDLOAD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI },
32762 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandloadsi128_mask", IX86_BUILTIN_PEXPANDDLOAD128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI },
32763 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expandloaddf256_maskz", IX86_BUILTIN_EXPANDPDLOAD256Z, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI },
32764 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expandloaddf128_maskz", IX86_BUILTIN_EXPANDPDLOAD128Z, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI },
32765 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandloadsf256_maskz", IX86_BUILTIN_EXPANDPSLOAD256Z, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI },
32766 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandloadsf128_maskz", IX86_BUILTIN_EXPANDPSLOAD128Z, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI },
32767 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expandloaddi256_maskz", IX86_BUILTIN_PEXPANDQLOAD256Z, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI },
32768 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expandloaddi128_maskz", IX86_BUILTIN_PEXPANDQLOAD128Z, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI },
32769 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandloadsi256_maskz", IX86_BUILTIN_PEXPANDDLOAD256Z, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI },
32770 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandloadsi128_maskz", IX86_BUILTIN_PEXPANDDLOAD128Z, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI },
32771 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask_store, "__builtin_ia32_pmovqd256mem_mask", IX86_BUILTIN_PMOVQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_UQI },
32772 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask_store, "__builtin_ia32_pmovqd128mem_mask", IX86_BUILTIN_PMOVQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_UQI },
32773 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask_store, "__builtin_ia32_pmovsqd256mem_mask", IX86_BUILTIN_PMOVSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_UQI },
32774 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask_store, "__builtin_ia32_pmovsqd128mem_mask", IX86_BUILTIN_PMOVSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_UQI },
32775 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask_store, "__builtin_ia32_pmovusqd256mem_mask", IX86_BUILTIN_PMOVUSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_UQI },
32776 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask_store, "__builtin_ia32_pmovusqd128mem_mask", IX86_BUILTIN_PMOVUSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_UQI },
32777 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovqw256mem_mask", IX86_BUILTIN_PMOVQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_UQI },
32778 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovqw128mem_mask", IX86_BUILTIN_PMOVQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_UQI },
32779 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovsqw256mem_mask", IX86_BUILTIN_PMOVSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_UQI },
32780 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovsqw128mem_mask", IX86_BUILTIN_PMOVSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_UQI },
32781 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovusqw256mem_mask", IX86_BUILTIN_PMOVUSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_UQI },
32782 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovusqw128mem_mask", IX86_BUILTIN_PMOVUSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_UQI },
32783 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovqb256mem_mask", IX86_BUILTIN_PMOVQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_UQI },
32784 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovqb128mem_mask", IX86_BUILTIN_PMOVQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_UQI },
32785 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovsqb256mem_mask", IX86_BUILTIN_PMOVSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_UQI },
32786 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovsqb128mem_mask", IX86_BUILTIN_PMOVSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_UQI },
32787 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovusqb256mem_mask", IX86_BUILTIN_PMOVUSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_UQI },
32788 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovusqb128mem_mask", IX86_BUILTIN_PMOVUSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_UQI },
32789 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovdb256mem_mask", IX86_BUILTIN_PMOVDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_UQI },
32790 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovdb128mem_mask", IX86_BUILTIN_PMOVDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_UQI },
32791 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovsdb256mem_mask", IX86_BUILTIN_PMOVSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_UQI },
32792 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovsdb128mem_mask", IX86_BUILTIN_PMOVSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_UQI },
32793 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovusdb256mem_mask", IX86_BUILTIN_PMOVUSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_UQI },
32794 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovusdb128mem_mask", IX86_BUILTIN_PMOVUSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_UQI },
32795 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovdw256mem_mask", IX86_BUILTIN_PMOVDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_UQI },
32796 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovdw128mem_mask", IX86_BUILTIN_PMOVDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_UQI },
32797 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovsdw256mem_mask", IX86_BUILTIN_PMOVSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_UQI },
32798 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_UQI },
32799 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_UQI },
32800 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_UQI },
32801
32802 /* PCOMMIT. */
32803 { OPTION_MASK_ISA_PCOMMIT, CODE_FOR_pcommit, "__builtin_ia32_pcommit", IX86_BUILTIN_PCOMMIT, UNKNOWN, (int) VOID_FTYPE_VOID },
32804
32805 /* RDPKRU and WRPKRU. */
32806 { OPTION_MASK_ISA_PKU, CODE_FOR_rdpkru, "__builtin_ia32_rdpkru", IX86_BUILTIN_RDPKRU, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
32807 { OPTION_MASK_ISA_PKU, CODE_FOR_wrpkru, "__builtin_ia32_wrpkru", IX86_BUILTIN_WRPKRU, UNKNOWN, (int) VOID_FTYPE_UNSIGNED }
32808 };
32809
32810 /* Builtins with variable number of arguments. */
32811 static const struct builtin_description bdesc_args[] =
32812 {
32813 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
32814 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
32815 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
32816 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
32817 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
32818 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
32819 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
32820
32821 /* MMX */
32822 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32823 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32824 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32825 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32826 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32827 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32828
32829 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32830 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32831 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32832 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32833 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32834 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32835 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32836 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32837
32838 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32839 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32840
32841 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32842 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32843 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32844 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32845
32846 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32847 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32848 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32849 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32850 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32851 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32852
32853 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32854 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32855 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32856 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32857 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
32858 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
32859
32860 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
32861 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
32862 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
32863
32864 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
32865
32866 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
32867 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
32868 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
32869 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
32870 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
32871 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
32872
32873 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
32874 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
32875 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
32876 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
32877 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
32878 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
32879
32880 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
32881 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
32882 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
32883 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
32884
32885 /* 3DNow! */
32886 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
32887 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
32888 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
32889 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
32890
32891 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32892 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32893 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32894 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
32895 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
32896 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
32897 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32898 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32899 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32900 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32901 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32902 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32903 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32904 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32905 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32906
32907 /* 3DNow!A */
32908 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
32909 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
32910 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
32911 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
32912 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32913 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32914
32915 /* SSE */
32916 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
32917 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
32918 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
32919 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
32920 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
32921 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
32922 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
32923 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
32924 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
32925 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
32926 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
32927 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
32928
32929 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32930
32931 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32932 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32933 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32934 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32935 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32936 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32937 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32938 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32939
32940 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
32941 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
32942 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
32943 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
32944 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
32945 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
32946 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
32947 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
32948 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
32949 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
32950 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
32951 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
32952 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
32953 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
32954 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
32955 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
32956 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
32957 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
32958 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
32959 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
32960
32961 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32962 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32963 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32964 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32965
32966 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32967 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32968 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32969 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32970
32971 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32972
32973 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32974 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32975 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32976 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32977 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32978
32979 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
32980 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
32981 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
32982
32983 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
32984
32985 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
32986 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
32987 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
32988
32989 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
32990 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
32991
32992 /* SSE MMX or 3Dnow!A */
32993 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32994 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32995 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32996
32997 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32998 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32999 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
33000 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33001
33002 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
33003 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
33004
33005 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
33006
33007 /* SSE2 */
33008 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33009
33010 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
33011 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
33012 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
33013 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
33014 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
33015
33016 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
33017 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
33018 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
33019 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
33020 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
33021
33022 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
33023
33024 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
33025 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
33026 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
33027 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
33028
33029 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
33030 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
33031 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
33032
33033 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33034 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33035 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33036 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33037 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33038 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33039 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33040 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33041
33042 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
33043 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
33044 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
33045 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
33046 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
33047 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
33048 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
33049 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
33050 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
33051 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
33052 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
33053 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
33054 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
33055 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
33056 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
33057 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
33058 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
33059 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
33060 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
33061 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
33062
33063 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33064 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33065 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33066 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33067
33068 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33069 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33070 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33071 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33072
33073 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33074
33075 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33076 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33077 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33078
33079 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
33080
33081 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33082 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33083 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33084 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33085 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33086 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33087 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33088 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33089
33090 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33091 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33092 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33093 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33094 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33095 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33096 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33097 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33098
33099 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33100 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
33101
33102 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33103 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33104 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33105 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33106
33107 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33108 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33109
33110 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33111 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33112 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33113 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33114 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33115 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33116
33117 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33118 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33119 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33120 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33121
33122 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33123 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33124 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33125 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33126 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33127 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33128 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33129 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33130
33131 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
33132 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
33133 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
33134
33135 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33136 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
33137
33138 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
33139 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
33140
33141 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
33142
33143 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
33144 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
33145 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
33146 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
33147
33148 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
33149 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
33150 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
33151 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
33152 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
33153 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
33154 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
33155
33156 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
33157 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
33158 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
33159 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
33160 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
33161 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
33162 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
33163
33164 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
33165 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
33166 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
33167 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
33168
33169 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
33170 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
33171 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
33172
33173 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
33174
33175 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
33176
33177 /* SSE2 MMX */
33178 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
33179 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
33180
33181 /* SSE3 */
33182 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
33183 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
33184
33185 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33186 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33187 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33188 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33189 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33190 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33191
33192 /* SSSE3 */
33193 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
33194 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
33195 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
33196 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
33197 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
33198 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
33199
33200 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33201 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33202 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33203 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
33204 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33205 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33206 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33207 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33208 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33209 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
33210 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33211 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33212 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
33213 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
33214 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33215 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33216 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33217 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
33218 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33219 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
33220 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33221 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33222 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33223 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
33224
33225 /* SSSE3. */
33226 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
33227 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
33228
33229 /* SSE4.1 */
33230 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33231 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33232 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
33233 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
33234 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33235 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33236 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33237 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
33238 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
33239 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
33240
33241 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
33242 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
33243 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
33244 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
33245 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
33246 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
33247 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
33248 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
33249 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
33250 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
33251 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
33252 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
33253 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
33254
33255 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
33256 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33257 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33258 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33259 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33260 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33261 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33262 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33263 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33264 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33265 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
33266 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33267
33268 /* SSE4.1 */
33269 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
33270 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
33271 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33272 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33273
33274 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
33275 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
33276 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
33277 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
33278
33279 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
33280 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
33281
33282 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
33283 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
33284
33285 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
33286 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
33287 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
33288 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
33289
33290 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
33291 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
33292
33293 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
33294 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
33295
33296 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptestv2di, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
33297 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptestv2di, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
33298 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptestv2di, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
33299
33300 /* SSE4.2 */
33301 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33302 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
33303 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
33304 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
33305 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
33306
33307 /* SSE4A */
33308 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
33309 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
33310 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
33311 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33312
33313 /* AES */
33314 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
33315 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
33316
33317 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33318 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33319 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33320 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33321
33322 /* PCLMUL */
33323 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
33324
33325 /* AVX */
33326 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33327 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33328 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33329 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33330 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33331 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33332 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33333 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33334 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33335 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33336 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33337 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33338 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33339 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33340 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33341 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33342 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33343 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33344 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33345 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33346 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33347 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33348 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33349 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33350 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33351 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33352
33353 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
33354 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
33355 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
33356 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
33357
33358 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
33359 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
33360 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
33361 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
33362 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
33363 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
33364 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
33365 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33366 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33367 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33368 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33369 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
33370 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
33371 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
33372 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
33373 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
33374 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
33375 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
33376 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
33377 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
33378 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
33379 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
33380 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
33381 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
33382 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
33383 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
33384 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
33385 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
33386 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
33387 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
33388 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
33389 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
33390 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
33391 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
33392
33393 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33394 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33395 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
33396
33397 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
33398 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33399 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33400 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33401 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33402
33403 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33404
33405 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
33406 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
33407
33408 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
33409 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
33410 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
33411 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
33412
33413 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
33414 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
33415
33416 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
33417 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
33418
33419 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
33420 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
33421 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
33422 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
33423
33424 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
33425 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
33426
33427 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33428 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
33429
33430 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33431 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33432 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33433 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33434
33435 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
33436 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
33437 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
33438 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
33439 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
33440 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
33441
33442 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
33443 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
33444 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
33445 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
33446 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
33447 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
33448 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
33449 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
33450 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
33451 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
33452 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
33453 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
33454 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptestv4di, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
33455 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptestv4di, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
33456 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptestv4di, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
33457
33458 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
33459 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
33460
33461 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33462 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33463
33464 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
33465
33466 /* AVX2 */
33467 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
33468 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
33469 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
33470 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
33471 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
33472 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
33473 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
33474 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
33475 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33476 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33477 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33478 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33479 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33480 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33481 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33482 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33483 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
33484 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33485 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33486 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33487 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33488 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
33489 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
33490 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33491 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33492 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33493 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33494 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33495 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33496 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33497 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33498 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33499 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33500 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33501 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33502 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33503 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33504 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
33505 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
33506 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33507 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33508 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33509 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33510 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33511 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33512 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33513 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33514 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33515 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33516 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33517 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33518 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
33519 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
33520 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
33521 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
33522 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
33523 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
33524 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
33525 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
33526 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
33527 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
33528 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
33529 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
33530 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
33531 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
33532 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33533 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33534 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33535 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33536 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33537 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
33538 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33539 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
33540 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33541 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
33542 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
33543 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
33544 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33545 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33546 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33547 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
33548 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
33549 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
33550 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
33551 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
33552 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
33553 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
33554 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
33555 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
33556 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
33557 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
33558 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
33559 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
33560 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
33561 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
33562 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
33563 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
33564 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
33565 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33566 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33567 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33568 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33569 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33570 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33571 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33572 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33573 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33574 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33575 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33576 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33577 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33578 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33579 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33580 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33581 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33582 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
33583 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
33584 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
33585 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
33586 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
33587 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
33588 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
33589 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
33590 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
33591 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
33592 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
33593 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
33594 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
33595 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
33596 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33597 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
33598 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
33599 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
33600 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
33601 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
33602 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
33603 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33604 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33605 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33606 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33607 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33608 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33609 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33610 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33611 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33612 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33613
33614 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
33615
33616 /* BMI */
33617 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
33618 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
33619 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
33620
33621 /* TBM */
33622 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
33623 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
33624
33625 /* F16C */
33626 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
33627 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
33628 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
33629 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
33630
33631 /* BMI2 */
33632 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
33633 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
33634 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
33635 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
33636 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
33637 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
33638
33639 /* AVX512F */
33640 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
33641 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
33642 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
33643 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
33644 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
33645 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
33646 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI },
33647 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI },
33648 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33649 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33650 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33651 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33652 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI },
33653 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_UQI },
33654 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI },
33655 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_UQI },
33656 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_UQI },
33657 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI },
33658 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_INT_UHI },
33659 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_INT_UQI },
33660 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33661 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33662 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI },
33663 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_UHI },
33664 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI },
33665 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
33666 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33667 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33668 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33669 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33670 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_UQI },
33671 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_UQI },
33672 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_UQI },
33673 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_UQI },
33674 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI },
33675 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI },
33676 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI },
33677 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI },
33678 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33679 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33680 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33681 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33682 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33683 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33684 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33685 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33686 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33687 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33688 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33689 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33690 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33691 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33692 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33693 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI },
33694 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_UHI },
33695 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_UQI },
33696 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_UHI },
33697 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_UQI },
33698 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_UQI },
33699 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI },
33700 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI },
33701 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI },
33702 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI },
33703 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33704 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33705 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33706 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33707 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33708 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33709 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33710 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33711 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33712 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33713 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33714 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33715 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33716 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33717 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI },
33718 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI },
33719 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI },
33720 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI },
33721 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI },
33722 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI },
33723 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI },
33724 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI },
33725 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI },
33726 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI },
33727 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_UHI },
33728 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_UQI },
33729 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_UQI },
33730 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_UHI },
33731 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_UQI },
33732 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI },
33733 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI },
33734 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI },
33735 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI },
33736 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI },
33737 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_UHI },
33738 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_UQI },
33739 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_UQI },
33740 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_UHI },
33741 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_UQI },
33742 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_UQI },
33743 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33744 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_UQI },
33745 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33746 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33747 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
33748 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
33749 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33750 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33751 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
33752 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
33753 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33754 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33755 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
33756 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI },
33757 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
33758 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI },
33759 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
33760 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33761 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33762 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI },
33763 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
33764 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI },
33765 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
33766 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33767 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33768 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI },
33769 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
33770 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI },
33771 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
33772 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33773 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33774 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33775 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33776 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI },
33777 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI },
33778 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI },
33779 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI },
33780 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33781 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33782 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33783 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33784 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33785 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33786 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33787 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33788 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33789 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33790 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33791 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33792 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33793 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33794 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI },
33795 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI },
33796 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI },
33797 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI },
33798 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI },
33799 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI },
33800 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_INT_UHI },
33801 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_INT_UQI },
33802 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
33803 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
33804 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
33805 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
33806 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33807 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33808 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33809 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33810 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI },
33811 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
33812 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33813 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI },
33814 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI },
33815 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33816 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI },
33817 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI },
33818 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI },
33819 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI },
33820 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33821 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33822 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_UQI },
33823 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_UQI },
33824 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_UHI },
33825 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_UHI },
33826 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33827 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33828 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI },
33829 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33830 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI },
33831 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33832 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI },
33833 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI },
33834 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI },
33835 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI },
33836
33837 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
33838 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
33839 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
33840 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
33841 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
33842 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
33843 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
33844 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
33845
33846 /* Mask arithmetic operations */
33847 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33848 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33849 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) UHI_FTYPE_UHI },
33850 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33851 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33852 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33853 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33854 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33855 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33856 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) UHI_FTYPE_UHI },
33857
33858 /* SHA */
33859 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33860 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33861 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33862 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
33863 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33864 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33865 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
33866
33867 /* AVX512VL. */
33868 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_palignrv32qi_mask, "__builtin_ia32_palignr256_mask", IX86_BUILTIN_PALIGNR256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_USI_CONVERT },
33869 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_palignrv16qi_mask, "__builtin_ia32_palignr128_mask", IX86_BUILTIN_PALIGNR128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UHI_CONVERT },
33870 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64_256_mask", IX86_BUILTIN_MOVDQA64_256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
33871 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64_128_mask", IX86_BUILTIN_MOVDQA64_128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
33872 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32_256_mask", IX86_BUILTIN_MOVDQA32_256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
33873 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32_128_mask", IX86_BUILTIN_MOVDQA32_128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
33874 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_movapd256_mask", IX86_BUILTIN_MOVAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
33875 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_movapd128_mask", IX86_BUILTIN_MOVAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
33876 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_movaps256_mask", IX86_BUILTIN_MOVAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
33877 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_movaps128_mask", IX86_BUILTIN_MOVAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
33878 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI },
33879 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI },
33880 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI },
33881 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI },
33882 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4sf3_mask, "__builtin_ia32_minps_mask", IX86_BUILTIN_MINPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33883 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4sf3_mask, "__builtin_ia32_maxps_mask", IX86_BUILTIN_MAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33884 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2df3_mask, "__builtin_ia32_minpd_mask", IX86_BUILTIN_MINPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33885 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2df3_mask, "__builtin_ia32_maxpd_mask", IX86_BUILTIN_MAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33886 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4df3_mask, "__builtin_ia32_maxpd256_mask", IX86_BUILTIN_MAXPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33887 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8sf3_mask, "__builtin_ia32_maxps256_mask", IX86_BUILTIN_MAXPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33888 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4df3_mask, "__builtin_ia32_minpd256_mask", IX86_BUILTIN_MINPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33889 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8sf3_mask, "__builtin_ia32_minps256_mask", IX86_BUILTIN_MINPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33890 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4sf3_mask, "__builtin_ia32_mulps_mask", IX86_BUILTIN_MULPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33891 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_divv4sf3_mask, "__builtin_ia32_divps_mask", IX86_BUILTIN_DIVPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33892 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv2df3_mask, "__builtin_ia32_mulpd_mask", IX86_BUILTIN_MULPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33893 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_divv2df3_mask, "__builtin_ia32_divpd_mask", IX86_BUILTIN_DIVPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33894 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv4df3_mask, "__builtin_ia32_divpd256_mask", IX86_BUILTIN_DIVPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33895 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv8sf3_mask, "__builtin_ia32_divps256_mask", IX86_BUILTIN_DIVPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33896 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4df3_mask, "__builtin_ia32_mulpd256_mask", IX86_BUILTIN_MULPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33897 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8sf3_mask, "__builtin_ia32_mulps256_mask", IX86_BUILTIN_MULPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33898 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2df3_mask, "__builtin_ia32_addpd128_mask", IX86_BUILTIN_ADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33899 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4df3_mask, "__builtin_ia32_addpd256_mask", IX86_BUILTIN_ADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33900 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4sf3_mask, "__builtin_ia32_addps128_mask", IX86_BUILTIN_ADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33901 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8sf3_mask, "__builtin_ia32_addps256_mask", IX86_BUILTIN_ADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33902 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2df3_mask, "__builtin_ia32_subpd128_mask", IX86_BUILTIN_SUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33903 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4df3_mask, "__builtin_ia32_subpd256_mask", IX86_BUILTIN_SUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33904 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4sf3_mask, "__builtin_ia32_subps128_mask", IX86_BUILTIN_SUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33905 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8sf3_mask, "__builtin_ia32_subps256_mask", IX86_BUILTIN_SUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33906 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4df3_mask, "__builtin_ia32_xorpd256_mask", IX86_BUILTIN_XORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33907 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2df3_mask, "__builtin_ia32_xorpd128_mask", IX86_BUILTIN_XORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33908 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8sf3_mask, "__builtin_ia32_xorps256_mask", IX86_BUILTIN_XORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33909 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4sf3_mask, "__builtin_ia32_xorps128_mask", IX86_BUILTIN_XORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33910 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4df3_mask, "__builtin_ia32_orpd256_mask", IX86_BUILTIN_ORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33911 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2df3_mask, "__builtin_ia32_orpd128_mask", IX86_BUILTIN_ORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33912 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8sf3_mask, "__builtin_ia32_orps256_mask", IX86_BUILTIN_ORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33913 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4sf3_mask, "__builtin_ia32_orps128_mask", IX86_BUILTIN_ORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33914 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8sf_mask, "__builtin_ia32_broadcastf32x2_256_mask", IX86_BUILTIN_BROADCASTF32x2_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_UQI },
33915 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8si_mask, "__builtin_ia32_broadcasti32x2_256_mask", IX86_BUILTIN_BROADCASTI32x2_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_UQI },
33916 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4si_mask, "__builtin_ia32_broadcasti32x2_128_mask", IX86_BUILTIN_BROADCASTI32x2_128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
33917 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4df_mask_1, "__builtin_ia32_broadcastf64x2_256_mask", IX86_BUILTIN_BROADCASTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_UQI },
33918 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4di_mask_1, "__builtin_ia32_broadcasti64x2_256_mask", IX86_BUILTIN_BROADCASTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_UQI },
33919 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8sf_mask_1, "__builtin_ia32_broadcastf32x4_256_mask", IX86_BUILTIN_BROADCASTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_UQI },
33920 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8si_mask_1, "__builtin_ia32_broadcasti32x4_256_mask", IX86_BUILTIN_BROADCASTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_UQI },
33921 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8sf, "__builtin_ia32_extractf32x4_256_mask", IX86_BUILTIN_EXTRACTF32X4_256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT_V4SF_UQI },
33922 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8si, "__builtin_ia32_extracti32x4_256_mask", IX86_BUILTIN_EXTRACTI32X4_256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT_V4SI_UQI },
33923 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv16hi_mask, "__builtin_ia32_dbpsadbw256_mask", IX86_BUILTIN_DBPSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_UHI },
33924 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv8hi_mask, "__builtin_ia32_dbpsadbw128_mask", IX86_BUILTIN_DBPSADBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_UQI },
33925 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2qq256_mask", IX86_BUILTIN_CVTTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI },
33926 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2qq128_mask", IX86_BUILTIN_CVTTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI },
33927 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2uqq256_mask", IX86_BUILTIN_CVTTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI },
33928 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2uqq128_mask", IX86_BUILTIN_CVTTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI },
33929 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2qq256_mask", IX86_BUILTIN_CVTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI },
33930 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2qq128_mask", IX86_BUILTIN_CVTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI },
33931 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2uqq256_mask", IX86_BUILTIN_CVTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI },
33932 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2uqq128_mask", IX86_BUILTIN_CVTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI },
33933 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI },
33934 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI },
33935 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2qq256_mask", IX86_BUILTIN_CVTTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI },
33936 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI },
33937 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2uqq256_mask", IX86_BUILTIN_CVTTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI },
33938 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI },
33939 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2dq256_mask", IX86_BUILTIN_CVTTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI },
33940 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2dq128_mask", IX86_BUILTIN_CVTTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI },
33941 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI },
33942 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2udq128_mask", IX86_BUILTIN_CVTTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI },
33943 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2dq256_mask", IX86_BUILTIN_CVTTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI },
33944 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvttpd2dq_mask, "__builtin_ia32_cvttpd2dq128_mask", IX86_BUILTIN_CVTTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI },
33945 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2udq256_mask", IX86_BUILTIN_CVTTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI },
33946 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2si2_mask, "__builtin_ia32_cvttpd2udq128_mask", IX86_BUILTIN_CVTTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI },
33947 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2dq256_mask, "__builtin_ia32_cvtpd2dq256_mask", IX86_BUILTIN_CVTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI },
33948 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2dq_mask, "__builtin_ia32_cvtpd2dq128_mask", IX86_BUILTIN_CVTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI },
33949 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4df2_mask, "__builtin_ia32_cvtdq2pd256_mask", IX86_BUILTIN_CVTDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_UQI },
33950 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtdq2pd_mask, "__builtin_ia32_cvtdq2pd128_mask", IX86_BUILTIN_CVTDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_UQI },
33951 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4df2_mask, "__builtin_ia32_cvtudq2pd256_mask", IX86_BUILTIN_CVTUDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_UQI },
33952 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2siv2df2_mask, "__builtin_ia32_cvtudq2pd128_mask", IX86_BUILTIN_CVTUDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_UQI },
33953 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv8siv8sf2_mask, "__builtin_ia32_cvtdq2ps256_mask", IX86_BUILTIN_CVTDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_UQI },
33954 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4sf2_mask, "__builtin_ia32_cvtdq2ps128_mask", IX86_BUILTIN_CVTDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_UQI },
33955 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv8siv8sf2_mask, "__builtin_ia32_cvtudq2ps256_mask", IX86_BUILTIN_CVTUDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_UQI },
33956 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4sf2_mask, "__builtin_ia32_cvtudq2ps128_mask", IX86_BUILTIN_CVTUDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_UQI },
33957 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtps2pd256_mask, "__builtin_ia32_cvtps2pd256_mask", IX86_BUILTIN_CVTPS2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SF_V4DF_UQI },
33958 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtps2pd_mask, "__builtin_ia32_cvtps2pd128_mask", IX86_BUILTIN_CVTPS2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SF_V2DF_UQI },
33959 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv32qi_mask, "__builtin_ia32_pbroadcastb256_mask", IX86_BUILTIN_PBROADCASTB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16QI_V32QI_USI },
33960 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv32qi_mask, "__builtin_ia32_pbroadcastb256_gpr_mask", IX86_BUILTIN_PBROADCASTB256_GPR_MASK, UNKNOWN, (int) V32QI_FTYPE_QI_V32QI_USI },
33961 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16qi_mask, "__builtin_ia32_pbroadcastb128_mask", IX86_BUILTIN_PBROADCASTB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI },
33962 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16qi_mask, "__builtin_ia32_pbroadcastb128_gpr_mask", IX86_BUILTIN_PBROADCASTB128_GPR_MASK, UNKNOWN, (int) V16QI_FTYPE_QI_V16QI_UHI },
33963 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16hi_mask, "__builtin_ia32_pbroadcastw256_mask", IX86_BUILTIN_PBROADCASTW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8HI_V16HI_UHI },
33964 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16hi_mask, "__builtin_ia32_pbroadcastw256_gpr_mask", IX86_BUILTIN_PBROADCASTW256_GPR_MASK, UNKNOWN, (int) V16HI_FTYPE_HI_V16HI_UHI },
33965 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8hi_mask, "__builtin_ia32_pbroadcastw128_mask", IX86_BUILTIN_PBROADCASTW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI },
33966 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8hi_mask, "__builtin_ia32_pbroadcastw128_gpr_mask", IX86_BUILTIN_PBROADCASTW128_GPR_MASK, UNKNOWN, (int) V8HI_FTYPE_HI_V8HI_UQI },
33967 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8si_mask, "__builtin_ia32_pbroadcastd256_mask", IX86_BUILTIN_PBROADCASTD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_UQI },
33968 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8si_mask, "__builtin_ia32_pbroadcastd256_gpr_mask", IX86_BUILTIN_PBROADCASTD256_GPR_MASK, UNKNOWN, (int) V8SI_FTYPE_SI_V8SI_UQI },
33969 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
33970 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_UQI },
33971 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_UQI },
33972 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_UQI },
33973 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
33974 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_UQI },
33975 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_UQI },
33976 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
33977 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_UQI },
33978 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4df, "__builtin_ia32_extractf64x2_256_mask", IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT_V2DF_UQI },
33979 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4di, "__builtin_ia32_extracti64x2_256_mask", IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT_V2DI_UQI },
33980 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8sf, "__builtin_ia32_insertf32x4_256_mask", IX86_BUILTIN_INSERTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT_V8SF_UQI },
33981 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8si, "__builtin_ia32_inserti32x4_256_mask", IX86_BUILTIN_INSERTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT_V8SI_UQI },
33982 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv16qiv16hi2_mask, "__builtin_ia32_pmovsxbw256_mask", IX86_BUILTIN_PMOVSXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_UHI },
33983 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv8qiv8hi2_mask, "__builtin_ia32_pmovsxbw128_mask", IX86_BUILTIN_PMOVSXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_UQI },
33984 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8qiv8si2_mask, "__builtin_ia32_pmovsxbd256_mask", IX86_BUILTIN_PMOVSXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_UQI },
33985 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4qiv4si2_mask, "__builtin_ia32_pmovsxbd128_mask", IX86_BUILTIN_PMOVSXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_UQI },
33986 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4qiv4di2_mask, "__builtin_ia32_pmovsxbq256_mask", IX86_BUILTIN_PMOVSXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_UQI },
33987 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2qiv2di2_mask, "__builtin_ia32_pmovsxbq128_mask", IX86_BUILTIN_PMOVSXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_UQI },
33988 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8hiv8si2_mask, "__builtin_ia32_pmovsxwd256_mask", IX86_BUILTIN_PMOVSXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_UQI },
33989 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4hiv4si2_mask, "__builtin_ia32_pmovsxwd128_mask", IX86_BUILTIN_PMOVSXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_UQI },
33990 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4hiv4di2_mask, "__builtin_ia32_pmovsxwq256_mask", IX86_BUILTIN_PMOVSXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_UQI },
33991 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2hiv2di2_mask, "__builtin_ia32_pmovsxwq128_mask", IX86_BUILTIN_PMOVSXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_UQI },
33992 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4siv4di2_mask, "__builtin_ia32_pmovsxdq256_mask", IX86_BUILTIN_PMOVSXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_UQI },
33993 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2siv2di2_mask, "__builtin_ia32_pmovsxdq128_mask", IX86_BUILTIN_PMOVSXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_UQI },
33994 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv16qiv16hi2_mask, "__builtin_ia32_pmovzxbw256_mask", IX86_BUILTIN_PMOVZXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_UHI },
33995 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv8qiv8hi2_mask, "__builtin_ia32_pmovzxbw128_mask", IX86_BUILTIN_PMOVZXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_UQI },
33996 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8qiv8si2_mask, "__builtin_ia32_pmovzxbd256_mask", IX86_BUILTIN_PMOVZXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_UQI },
33997 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4qiv4si2_mask, "__builtin_ia32_pmovzxbd128_mask", IX86_BUILTIN_PMOVZXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_UQI },
33998 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4qiv4di2_mask, "__builtin_ia32_pmovzxbq256_mask", IX86_BUILTIN_PMOVZXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_UQI },
33999 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2qiv2di2_mask, "__builtin_ia32_pmovzxbq128_mask", IX86_BUILTIN_PMOVZXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_UQI },
34000 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8hiv8si2_mask, "__builtin_ia32_pmovzxwd256_mask", IX86_BUILTIN_PMOVZXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_UQI },
34001 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4hiv4si2_mask, "__builtin_ia32_pmovzxwd128_mask", IX86_BUILTIN_PMOVZXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_UQI },
34002 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4hiv4di2_mask, "__builtin_ia32_pmovzxwq256_mask", IX86_BUILTIN_PMOVZXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_UQI },
34003 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2hiv2di2_mask, "__builtin_ia32_pmovzxwq128_mask", IX86_BUILTIN_PMOVZXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_UQI },
34004 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4siv4di2_mask, "__builtin_ia32_pmovzxdq256_mask", IX86_BUILTIN_PMOVZXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_UQI },
34005 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2siv2di2_mask, "__builtin_ia32_pmovzxdq128_mask", IX86_BUILTIN_PMOVZXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_UQI },
34006 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4df_mask, "__builtin_ia32_reducepd256_mask", IX86_BUILTIN_REDUCEPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
34007 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv2df_mask, "__builtin_ia32_reducepd128_mask", IX86_BUILTIN_REDUCEPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI },
34008 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv8sf_mask, "__builtin_ia32_reduceps256_mask", IX86_BUILTIN_REDUCEPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI },
34009 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4sf_mask, "__builtin_ia32_reduceps128_mask", IX86_BUILTIN_REDUCEPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI },
34010 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv2df, "__builtin_ia32_reducesd", IX86_BUILTIN_REDUCESD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34011 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv4sf, "__builtin_ia32_reducess", IX86_BUILTIN_REDUCESS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34012 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16hi_mask, "__builtin_ia32_permvarhi256_mask", IX86_BUILTIN_VPERMVARHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34013 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv8hi_mask, "__builtin_ia32_permvarhi128_mask", IX86_BUILTIN_VPERMVARHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34014 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_mask, "__builtin_ia32_vpermt2varhi256_mask", IX86_BUILTIN_VPERMT2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34015 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_maskz, "__builtin_ia32_vpermt2varhi256_maskz", IX86_BUILTIN_VPERMT2VARHI256_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34016 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_mask, "__builtin_ia32_vpermt2varhi128_mask", IX86_BUILTIN_VPERMT2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34017 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_maskz, "__builtin_ia32_vpermt2varhi128_maskz", IX86_BUILTIN_VPERMT2VARHI128_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34018 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16hi3_mask, "__builtin_ia32_vpermi2varhi256_mask", IX86_BUILTIN_VPERMI2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34019 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8hi3_mask, "__builtin_ia32_vpermi2varhi128_mask", IX86_BUILTIN_VPERMI2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34020 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4df_mask, "__builtin_ia32_rcp14pd256_mask", IX86_BUILTIN_RCP14PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34021 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v2df_mask, "__builtin_ia32_rcp14pd128_mask", IX86_BUILTIN_RCP14PD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34022 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v8sf_mask, "__builtin_ia32_rcp14ps256_mask", IX86_BUILTIN_RCP14PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34023 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4sf_mask, "__builtin_ia32_rcp14ps128_mask", IX86_BUILTIN_RCP14PS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34024 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4df_mask, "__builtin_ia32_rsqrt14pd256_mask", IX86_BUILTIN_RSQRT14PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34025 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v2df_mask, "__builtin_ia32_rsqrt14pd128_mask", IX86_BUILTIN_RSQRT14PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34026 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v8sf_mask, "__builtin_ia32_rsqrt14ps256_mask", IX86_BUILTIN_RSQRT14PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34027 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4sf_mask, "__builtin_ia32_rsqrt14ps128_mask", IX86_BUILTIN_RSQRT14PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34028 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv4df2_mask, "__builtin_ia32_sqrtpd256_mask", IX86_BUILTIN_SQRTPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34029 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sqrtv2df2_mask, "__builtin_ia32_sqrtpd128_mask", IX86_BUILTIN_SQRTPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34030 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv8sf2_mask, "__builtin_ia32_sqrtps256_mask", IX86_BUILTIN_SQRTPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34031 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_sqrtv4sf2_mask, "__builtin_ia32_sqrtps128_mask", IX86_BUILTIN_SQRTPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34032 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16qi3_mask, "__builtin_ia32_paddb128_mask", IX86_BUILTIN_PADDB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34033 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8hi3_mask, "__builtin_ia32_paddw128_mask", IX86_BUILTIN_PADDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34034 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4si3_mask, "__builtin_ia32_paddd128_mask", IX86_BUILTIN_PADDD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34035 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2di3_mask, "__builtin_ia32_paddq128_mask", IX86_BUILTIN_PADDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34036 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16qi3_mask, "__builtin_ia32_psubb128_mask", IX86_BUILTIN_PSUBB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34037 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8hi3_mask, "__builtin_ia32_psubw128_mask", IX86_BUILTIN_PSUBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34038 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4si3_mask, "__builtin_ia32_psubd128_mask", IX86_BUILTIN_PSUBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34039 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2di3_mask, "__builtin_ia32_psubq128_mask", IX86_BUILTIN_PSUBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34040 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv16qi3_mask, "__builtin_ia32_paddsb128_mask", IX86_BUILTIN_PADDSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34041 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv8hi3_mask, "__builtin_ia32_paddsw128_mask", IX86_BUILTIN_PADDSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34042 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv16qi3_mask, "__builtin_ia32_psubsb128_mask", IX86_BUILTIN_PSUBSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34043 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv8hi3_mask, "__builtin_ia32_psubsw128_mask", IX86_BUILTIN_PSUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34044 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv16qi3_mask, "__builtin_ia32_paddusb128_mask", IX86_BUILTIN_PADDUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34045 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv8hi3_mask, "__builtin_ia32_paddusw128_mask", IX86_BUILTIN_PADDUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34046 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv16qi3_mask, "__builtin_ia32_psubusb128_mask", IX86_BUILTIN_PSUBUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34047 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv8hi3_mask, "__builtin_ia32_psubusw128_mask", IX86_BUILTIN_PSUBUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34048 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv32qi3_mask, "__builtin_ia32_paddb256_mask", IX86_BUILTIN_PADDB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34049 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16hi3_mask, "__builtin_ia32_paddw256_mask", IX86_BUILTIN_PADDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34050 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8si3_mask, "__builtin_ia32_paddd256_mask", IX86_BUILTIN_PADDD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34051 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4di3_mask, "__builtin_ia32_paddq256_mask", IX86_BUILTIN_PADDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34052 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv32qi3_mask, "__builtin_ia32_paddsb256_mask", IX86_BUILTIN_PADDSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34053 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv16hi3_mask, "__builtin_ia32_paddsw256_mask", IX86_BUILTIN_PADDSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34054 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv32qi3_mask, "__builtin_ia32_paddusb256_mask", IX86_BUILTIN_PADDUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34055 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv16hi3_mask, "__builtin_ia32_paddusw256_mask", IX86_BUILTIN_PADDUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34056 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv32qi3_mask, "__builtin_ia32_psubb256_mask", IX86_BUILTIN_PSUBB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34057 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16hi3_mask, "__builtin_ia32_psubw256_mask", IX86_BUILTIN_PSUBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34058 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8si3_mask, "__builtin_ia32_psubd256_mask", IX86_BUILTIN_PSUBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34059 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4di3_mask, "__builtin_ia32_psubq256_mask", IX86_BUILTIN_PSUBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34060 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv32qi3_mask, "__builtin_ia32_psubsb256_mask", IX86_BUILTIN_PSUBSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34061 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv16hi3_mask, "__builtin_ia32_psubsw256_mask", IX86_BUILTIN_PSUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34062 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv32qi3_mask, "__builtin_ia32_psubusb256_mask", IX86_BUILTIN_PSUBUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34063 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv16hi3_mask, "__builtin_ia32_psubusw256_mask", IX86_BUILTIN_PSUBUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34064 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_256_mask", IX86_BUILTIN_SHUF_F64x2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI },
34065 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_256_mask", IX86_BUILTIN_SHUF_I64x2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI },
34066 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_256_mask", IX86_BUILTIN_SHUF_I32x4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI },
34067 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_256_mask", IX86_BUILTIN_SHUF_F32x4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI },
34068 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovwb128_mask", IX86_BUILTIN_PMOVWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_UQI },
34069 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovwb256_mask", IX86_BUILTIN_PMOVWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_UHI },
34070 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovswb128_mask", IX86_BUILTIN_PMOVSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_UQI },
34071 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovswb256_mask", IX86_BUILTIN_PMOVSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_UHI },
34072 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovuswb128_mask", IX86_BUILTIN_PMOVUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_UQI },
34073 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovuswb256_mask", IX86_BUILTIN_PMOVUSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_UHI },
34074 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask, "__builtin_ia32_pmovdb128_mask", IX86_BUILTIN_PMOVDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_UQI },
34075 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask, "__builtin_ia32_pmovdb256_mask", IX86_BUILTIN_PMOVDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_UQI },
34076 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask, "__builtin_ia32_pmovsdb128_mask", IX86_BUILTIN_PMOVSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_UQI },
34077 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask, "__builtin_ia32_pmovsdb256_mask", IX86_BUILTIN_PMOVSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_UQI },
34078 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask, "__builtin_ia32_pmovusdb128_mask", IX86_BUILTIN_PMOVUSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_UQI },
34079 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask, "__builtin_ia32_pmovusdb256_mask", IX86_BUILTIN_PMOVUSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_UQI },
34080 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask, "__builtin_ia32_pmovdw128_mask", IX86_BUILTIN_PMOVDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_UQI },
34081 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask, "__builtin_ia32_pmovdw256_mask", IX86_BUILTIN_PMOVDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_UQI },
34082 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask, "__builtin_ia32_pmovsdw128_mask", IX86_BUILTIN_PMOVSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_UQI },
34083 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask, "__builtin_ia32_pmovsdw256_mask", IX86_BUILTIN_PMOVSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_UQI },
34084 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask, "__builtin_ia32_pmovusdw128_mask", IX86_BUILTIN_PMOVUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_UQI },
34085 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask, "__builtin_ia32_pmovusdw256_mask", IX86_BUILTIN_PMOVUSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_UQI },
34086 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask, "__builtin_ia32_pmovqb128_mask", IX86_BUILTIN_PMOVQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_UQI },
34087 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask, "__builtin_ia32_pmovqb256_mask", IX86_BUILTIN_PMOVQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_UQI },
34088 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask, "__builtin_ia32_pmovsqb128_mask", IX86_BUILTIN_PMOVSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_UQI },
34089 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask, "__builtin_ia32_pmovsqb256_mask", IX86_BUILTIN_PMOVSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_UQI },
34090 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask, "__builtin_ia32_pmovusqb128_mask", IX86_BUILTIN_PMOVUSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_UQI },
34091 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask, "__builtin_ia32_pmovusqb256_mask", IX86_BUILTIN_PMOVUSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_UQI },
34092 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask, "__builtin_ia32_pmovqw128_mask", IX86_BUILTIN_PMOVQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_UQI },
34093 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask, "__builtin_ia32_pmovqw256_mask", IX86_BUILTIN_PMOVQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_UQI },
34094 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask, "__builtin_ia32_pmovsqw128_mask", IX86_BUILTIN_PMOVSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_UQI },
34095 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask, "__builtin_ia32_pmovsqw256_mask", IX86_BUILTIN_PMOVSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_UQI },
34096 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask, "__builtin_ia32_pmovusqw128_mask", IX86_BUILTIN_PMOVUSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_UQI },
34097 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask, "__builtin_ia32_pmovusqw256_mask", IX86_BUILTIN_PMOVUSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_UQI },
34098 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask, "__builtin_ia32_pmovqd128_mask", IX86_BUILTIN_PMOVQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_UQI },
34099 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask, "__builtin_ia32_pmovqd256_mask", IX86_BUILTIN_PMOVQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_UQI },
34100 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask, "__builtin_ia32_pmovsqd128_mask", IX86_BUILTIN_PMOVSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_UQI },
34101 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask, "__builtin_ia32_pmovsqd256_mask", IX86_BUILTIN_PMOVSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_UQI },
34102 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask, "__builtin_ia32_pmovusqd128_mask", IX86_BUILTIN_PMOVUSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_UQI },
34103 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask, "__builtin_ia32_pmovusqd256_mask", IX86_BUILTIN_PMOVUSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_UQI },
34104 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4df_mask, "__builtin_ia32_rangepd256_mask", IX86_BUILTIN_RANGEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI },
34105 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv2df_mask, "__builtin_ia32_rangepd128_mask", IX86_BUILTIN_RANGEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI },
34106 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv8sf_mask, "__builtin_ia32_rangeps256_mask", IX86_BUILTIN_RANGEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI },
34107 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4sf_mask, "__builtin_ia32_rangeps128_mask", IX86_BUILTIN_RANGEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI },
34108 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv8sf_mask, "__builtin_ia32_getexpps256_mask", IX86_BUILTIN_GETEXPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34109 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4df_mask, "__builtin_ia32_getexppd256_mask", IX86_BUILTIN_GETEXPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34110 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4sf_mask, "__builtin_ia32_getexpps128_mask", IX86_BUILTIN_GETEXPPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34111 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv2df_mask, "__builtin_ia32_getexppd128_mask", IX86_BUILTIN_GETEXPPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34112 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_mask, "__builtin_ia32_fixupimmpd256_mask", IX86_BUILTIN_FIXUPIMMPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI },
34113 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_maskz, "__builtin_ia32_fixupimmpd256_maskz", IX86_BUILTIN_FIXUPIMMPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI },
34114 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_mask, "__builtin_ia32_fixupimmps256_mask", IX86_BUILTIN_FIXUPIMMPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI },
34115 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_maskz, "__builtin_ia32_fixupimmps256_maskz", IX86_BUILTIN_FIXUPIMMPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI },
34116 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_mask, "__builtin_ia32_fixupimmpd128_mask", IX86_BUILTIN_FIXUPIMMPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI },
34117 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_maskz, "__builtin_ia32_fixupimmpd128_maskz", IX86_BUILTIN_FIXUPIMMPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI },
34118 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_mask, "__builtin_ia32_fixupimmps128_mask", IX86_BUILTIN_FIXUPIMMPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI },
34119 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_maskz, "__builtin_ia32_fixupimmps128_maskz", IX86_BUILTIN_FIXUPIMMPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI },
34120 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4di2_mask, "__builtin_ia32_pabsq256_mask", IX86_BUILTIN_PABSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
34121 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv2di2_mask, "__builtin_ia32_pabsq128_mask", IX86_BUILTIN_PABSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34122 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8si2_mask, "__builtin_ia32_pabsd256_mask", IX86_BUILTIN_PABSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
34123 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4si2_mask, "__builtin_ia32_pabsd128_mask", IX86_BUILTIN_PABSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34124 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pmulhrswv16hi3_mask , "__builtin_ia32_pmulhrsw256_mask", IX86_BUILTIN_PMULHRSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34125 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pmulhrswv8hi3_mask, "__builtin_ia32_pmulhrsw128_mask", IX86_BUILTIN_PMULHRSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34126 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv8hi3_highpart_mask, "__builtin_ia32_pmulhuw128_mask", IX86_BUILTIN_PMULHUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34127 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv16hi3_highpart_mask, "__builtin_ia32_pmulhuw256_mask" , IX86_BUILTIN_PMULHUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34128 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv16hi3_highpart_mask, "__builtin_ia32_pmulhw256_mask" , IX86_BUILTIN_PMULHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34129 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv8hi3_highpart_mask, "__builtin_ia32_pmulhw128_mask", IX86_BUILTIN_PMULHW128_MASK, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34130 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv16hi3_mask, "__builtin_ia32_pmullw256_mask" , IX86_BUILTIN_PMULLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34131 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8hi3_mask, "__builtin_ia32_pmullw128_mask", IX86_BUILTIN_PMULLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34132 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv4di3_mask, "__builtin_ia32_pmullq256_mask", IX86_BUILTIN_PMULLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34133 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv2di3_mask, "__builtin_ia32_pmullq128_mask", IX86_BUILTIN_PMULLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34134 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4df3_mask, "__builtin_ia32_andpd256_mask", IX86_BUILTIN_ANDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34135 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2df3_mask, "__builtin_ia32_andpd128_mask", IX86_BUILTIN_ANDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34136 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8sf3_mask, "__builtin_ia32_andps256_mask", IX86_BUILTIN_ANDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34137 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4sf3_mask, "__builtin_ia32_andps128_mask", IX86_BUILTIN_ANDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34138 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv4df3_mask, "__builtin_ia32_andnpd256_mask", IX86_BUILTIN_ANDNPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34139 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2df3_mask, "__builtin_ia32_andnpd128_mask", IX86_BUILTIN_ANDNPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34140 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv8sf3_mask, "__builtin_ia32_andnps256_mask", IX86_BUILTIN_ANDNPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34141 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_andnotv4sf3_mask, "__builtin_ia32_andnps128_mask", IX86_BUILTIN_ANDNPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34142 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllwi128_mask", IX86_BUILTIN_PSLLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
34143 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslldi128_mask", IX86_BUILTIN_PSLLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
34144 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllqi128_mask", IX86_BUILTIN_PSLLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
34145 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllw128_mask", IX86_BUILTIN_PSLLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34146 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslld128_mask", IX86_BUILTIN_PSLLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34147 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllq128_mask", IX86_BUILTIN_PSLLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34148 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllwi256_mask", IX86_BUILTIN_PSLLWI256_MASK , UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
34149 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllw256_mask", IX86_BUILTIN_PSLLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_UHI },
34150 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslldi256_mask", IX86_BUILTIN_PSLLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
34151 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslld256_mask", IX86_BUILTIN_PSLLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_UQI },
34152 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllqi256_mask", IX86_BUILTIN_PSLLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
34153 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllq256_mask", IX86_BUILTIN_PSLLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_UQI },
34154 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psradi128_mask", IX86_BUILTIN_PSRADI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
34155 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psrad128_mask", IX86_BUILTIN_PSRAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34156 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psradi256_mask", IX86_BUILTIN_PSRADI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
34157 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psrad256_mask", IX86_BUILTIN_PSRAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_UQI },
34158 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraqi128_mask", IX86_BUILTIN_PSRAQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
34159 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraq128_mask", IX86_BUILTIN_PSRAQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34160 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraqi256_mask", IX86_BUILTIN_PSRAQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
34161 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraq256_mask", IX86_BUILTIN_PSRAQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_UQI },
34162 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8si3_mask, "__builtin_ia32_pandd256_mask", IX86_BUILTIN_PANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34163 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4si3_mask, "__builtin_ia32_pandd128_mask", IX86_BUILTIN_PANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34164 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrldi128_mask", IX86_BUILTIN_PSRLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
34165 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrld128_mask", IX86_BUILTIN_PSRLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34166 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrldi256_mask", IX86_BUILTIN_PSRLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
34167 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrld256_mask", IX86_BUILTIN_PSRLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_UQI },
34168 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlqi128_mask", IX86_BUILTIN_PSRLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
34169 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlq128_mask", IX86_BUILTIN_PSRLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34170 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlqi256_mask", IX86_BUILTIN_PSRLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
34171 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlq256_mask", IX86_BUILTIN_PSRLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_UQI },
34172 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4di3_mask, "__builtin_ia32_pandq256_mask", IX86_BUILTIN_PANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34173 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2di3_mask, "__builtin_ia32_pandq128_mask", IX86_BUILTIN_PANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34174 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv8si3_mask, "__builtin_ia32_pandnd256_mask", IX86_BUILTIN_PANDND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34175 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv4si3_mask, "__builtin_ia32_pandnd128_mask", IX86_BUILTIN_PANDND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34176 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv4di3_mask, "__builtin_ia32_pandnq256_mask", IX86_BUILTIN_PANDNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34177 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2di3_mask, "__builtin_ia32_pandnq128_mask", IX86_BUILTIN_PANDNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34178 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8si3_mask, "__builtin_ia32_pord256_mask", IX86_BUILTIN_PORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34179 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4si3_mask, "__builtin_ia32_pord128_mask", IX86_BUILTIN_PORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34180 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4di3_mask, "__builtin_ia32_porq256_mask", IX86_BUILTIN_PORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34181 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2di3_mask, "__builtin_ia32_porq128_mask", IX86_BUILTIN_PORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34182 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8si3_mask, "__builtin_ia32_pxord256_mask", IX86_BUILTIN_PXORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34183 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4si3_mask, "__builtin_ia32_pxord128_mask", IX86_BUILTIN_PXORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34184 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4di3_mask, "__builtin_ia32_pxorq256_mask", IX86_BUILTIN_PXORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34185 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2di3_mask, "__builtin_ia32_pxorq128_mask", IX86_BUILTIN_PXORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34186 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packsswb_mask, "__builtin_ia32_packsswb256_mask", IX86_BUILTIN_PACKSSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_USI },
34187 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packsswb_mask, "__builtin_ia32_packsswb128_mask", IX86_BUILTIN_PACKSSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_UHI },
34188 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packuswb_mask, "__builtin_ia32_packuswb256_mask", IX86_BUILTIN_PACKUSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_USI },
34189 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packuswb_mask, "__builtin_ia32_packuswb128_mask", IX86_BUILTIN_PACKUSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_UHI },
34190 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev8sf_mask, "__builtin_ia32_rndscaleps_256_mask", IX86_BUILTIN_RNDSCALEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI },
34191 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4df_mask, "__builtin_ia32_rndscalepd_256_mask", IX86_BUILTIN_RNDSCALEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
34192 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4sf_mask, "__builtin_ia32_rndscaleps_128_mask", IX86_BUILTIN_RNDSCALEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI },
34193 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev2df_mask, "__builtin_ia32_rndscalepd_128_mask", IX86_BUILTIN_RNDSCALEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI },
34194 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_mask, "__builtin_ia32_pternlogq256_mask", IX86_BUILTIN_VTERNLOGQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI },
34195 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_maskz, "__builtin_ia32_pternlogq256_maskz", IX86_BUILTIN_VTERNLOGQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI },
34196 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_mask, "__builtin_ia32_pternlogd256_mask", IX86_BUILTIN_VTERNLOGD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI },
34197 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_maskz, "__builtin_ia32_pternlogd256_maskz", IX86_BUILTIN_VTERNLOGD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI },
34198 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_mask, "__builtin_ia32_pternlogq128_mask", IX86_BUILTIN_VTERNLOGQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI },
34199 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_maskz, "__builtin_ia32_pternlogq128_maskz", IX86_BUILTIN_VTERNLOGQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI },
34200 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_mask, "__builtin_ia32_pternlogd128_mask", IX86_BUILTIN_VTERNLOGD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI },
34201 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_maskz, "__builtin_ia32_pternlogd128_maskz", IX86_BUILTIN_VTERNLOGD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI },
34202 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4df_mask, "__builtin_ia32_scalefpd256_mask", IX86_BUILTIN_SCALEFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34203 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv8sf_mask, "__builtin_ia32_scalefps256_mask", IX86_BUILTIN_SCALEFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34204 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv2df_mask, "__builtin_ia32_scalefpd128_mask", IX86_BUILTIN_SCALEFPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34205 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4sf_mask, "__builtin_ia32_scalefps128_mask", IX86_BUILTIN_SCALEFPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34206 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask, "__builtin_ia32_vfmaddpd256_mask", IX86_BUILTIN_VFMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34207 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask3, "__builtin_ia32_vfmaddpd256_mask3", IX86_BUILTIN_VFMADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34208 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_maskz, "__builtin_ia32_vfmaddpd256_maskz", IX86_BUILTIN_VFMADDPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34209 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask, "__builtin_ia32_vfmaddpd128_mask", IX86_BUILTIN_VFMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34210 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask3, "__builtin_ia32_vfmaddpd128_mask3", IX86_BUILTIN_VFMADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34211 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_maskz, "__builtin_ia32_vfmaddpd128_maskz", IX86_BUILTIN_VFMADDPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34212 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask, "__builtin_ia32_vfmaddps256_mask", IX86_BUILTIN_VFMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34213 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask3, "__builtin_ia32_vfmaddps256_mask3", IX86_BUILTIN_VFMADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34214 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_maskz, "__builtin_ia32_vfmaddps256_maskz", IX86_BUILTIN_VFMADDPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34215 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask, "__builtin_ia32_vfmaddps128_mask", IX86_BUILTIN_VFMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34216 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask3, "__builtin_ia32_vfmaddps128_mask3", IX86_BUILTIN_VFMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34217 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_maskz, "__builtin_ia32_vfmaddps128_maskz", IX86_BUILTIN_VFMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34218 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask3, "__builtin_ia32_vfmsubpd256_mask3", IX86_BUILTIN_VFMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34219 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask3, "__builtin_ia32_vfmsubpd128_mask3", IX86_BUILTIN_VFMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34220 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask3, "__builtin_ia32_vfmsubps256_mask3", IX86_BUILTIN_VFMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34221 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34222 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34223 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34224 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34225 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask, "__builtin_ia32_vfnmaddps128_mask", IX86_BUILTIN_VFNMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34226 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34227 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34228 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34229 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask3, "__builtin_ia32_vfnmsubpd128_mask3", IX86_BUILTIN_VFNMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34230 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask, "__builtin_ia32_vfnmsubps256_mask", IX86_BUILTIN_VFNMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34231 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask3, "__builtin_ia32_vfnmsubps256_mask3", IX86_BUILTIN_VFNMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34232 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask, "__builtin_ia32_vfnmsubps128_mask", IX86_BUILTIN_VFNMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34233 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask3, "__builtin_ia32_vfnmsubps128_mask3", IX86_BUILTIN_VFNMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34234 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask, "__builtin_ia32_vfmaddsubpd256_mask", IX86_BUILTIN_VFMADDSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34235 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask3, "__builtin_ia32_vfmaddsubpd256_mask3", IX86_BUILTIN_VFMADDSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34236 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_maskz, "__builtin_ia32_vfmaddsubpd256_maskz", IX86_BUILTIN_VFMADDSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34237 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask, "__builtin_ia32_vfmaddsubpd128_mask", IX86_BUILTIN_VFMADDSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34238 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask3, "__builtin_ia32_vfmaddsubpd128_mask3", IX86_BUILTIN_VFMADDSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34239 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_maskz, "__builtin_ia32_vfmaddsubpd128_maskz", IX86_BUILTIN_VFMADDSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34240 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask, "__builtin_ia32_vfmaddsubps256_mask", IX86_BUILTIN_VFMADDSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34241 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask3, "__builtin_ia32_vfmaddsubps256_mask3", IX86_BUILTIN_VFMADDSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34242 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_maskz, "__builtin_ia32_vfmaddsubps256_maskz", IX86_BUILTIN_VFMADDSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34243 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask, "__builtin_ia32_vfmaddsubps128_mask", IX86_BUILTIN_VFMADDSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34244 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask3, "__builtin_ia32_vfmaddsubps128_mask3", IX86_BUILTIN_VFMADDSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34245 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_maskz, "__builtin_ia32_vfmaddsubps128_maskz", IX86_BUILTIN_VFMADDSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34246 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4df_mask3, "__builtin_ia32_vfmsubaddpd256_mask3", IX86_BUILTIN_VFMSUBADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34247 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v2df_mask3, "__builtin_ia32_vfmsubaddpd128_mask3", IX86_BUILTIN_VFMSUBADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34248 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v8sf_mask3, "__builtin_ia32_vfmsubaddps256_mask3", IX86_BUILTIN_VFMSUBADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34249 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4sf_mask3, "__builtin_ia32_vfmsubaddps128_mask3", IX86_BUILTIN_VFMSUBADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34250 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4df, "__builtin_ia32_insertf64x2_256_mask", IX86_BUILTIN_INSERTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT_V4DF_UQI },
34251 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4di, "__builtin_ia32_inserti64x2_256_mask", IX86_BUILTIN_INSERTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT_V4DI_UQI },
34252 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv16hi_mask, "__builtin_ia32_psrav16hi_mask", IX86_BUILTIN_PSRAVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34253 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv8hi_mask, "__builtin_ia32_psrav8hi_mask", IX86_BUILTIN_PSRAVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34254 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v16hi_mask, "__builtin_ia32_pmaddubsw256_mask", IX86_BUILTIN_PMADDUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_V16HI_UHI },
34255 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v8hi_mask, "__builtin_ia32_pmaddubsw128_mask", IX86_BUILTIN_PMADDUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_V8HI_UQI },
34256 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v16hi_mask, "__builtin_ia32_pmaddwd256_mask", IX86_BUILTIN_PMADDWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI_V8SI_UQI },
34257 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v8hi_mask, "__builtin_ia32_pmaddwd128_mask", IX86_BUILTIN_PMADDWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI_V4SI_UQI },
34258 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv16hi_mask, "__builtin_ia32_psrlv16hi_mask", IX86_BUILTIN_PSRLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34259 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv8hi_mask, "__builtin_ia32_psrlv8hi_mask", IX86_BUILTIN_PSRLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34260 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_fix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2dq256_mask", IX86_BUILTIN_CVTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI },
34261 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_fix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2dq128_mask", IX86_BUILTIN_CVTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI },
34262 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2udq256_mask", IX86_BUILTIN_CVTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI },
34263 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2udq128_mask", IX86_BUILTIN_CVTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI },
34264 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv4di_mask, "__builtin_ia32_cvtps2qq256_mask", IX86_BUILTIN_CVTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI },
34265 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv2di_mask, "__builtin_ia32_cvtps2qq128_mask", IX86_BUILTIN_CVTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI },
34266 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv4di_mask, "__builtin_ia32_cvtps2uqq256_mask", IX86_BUILTIN_CVTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI },
34267 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv2di_mask, "__builtin_ia32_cvtps2uqq128_mask", IX86_BUILTIN_CVTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI },
34268 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv8sf_mask, "__builtin_ia32_getmantps256_mask", IX86_BUILTIN_GETMANTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI },
34269 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4sf_mask, "__builtin_ia32_getmantps128_mask", IX86_BUILTIN_GETMANTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI },
34270 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4df_mask, "__builtin_ia32_getmantpd256_mask", IX86_BUILTIN_GETMANTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
34271 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv2df_mask, "__builtin_ia32_getmantpd128_mask", IX86_BUILTIN_GETMANTPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI },
34272 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movddup256_mask, "__builtin_ia32_movddup256_mask", IX86_BUILTIN_MOVDDUP256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34273 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_dupv2df_mask, "__builtin_ia32_movddup128_mask", IX86_BUILTIN_MOVDDUP128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34274 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movshdup256_mask, "__builtin_ia32_movshdup256_mask", IX86_BUILTIN_MOVSHDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34275 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movshdup_mask, "__builtin_ia32_movshdup128_mask", IX86_BUILTIN_MOVSHDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34276 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movsldup256_mask, "__builtin_ia32_movsldup256_mask", IX86_BUILTIN_MOVSLDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34277 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movsldup_mask, "__builtin_ia32_movsldup128_mask", IX86_BUILTIN_MOVSLDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34278 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4sf2_mask, "__builtin_ia32_cvtqq2ps256_mask", IX86_BUILTIN_CVTQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_UQI },
34279 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2sf2_mask, "__builtin_ia32_cvtqq2ps128_mask", IX86_BUILTIN_CVTQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_UQI },
34280 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4sf2_mask, "__builtin_ia32_cvtuqq2ps256_mask", IX86_BUILTIN_CVTUQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_UQI },
34281 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2sf2_mask, "__builtin_ia32_cvtuqq2ps128_mask", IX86_BUILTIN_CVTUQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_UQI },
34282 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4df2_mask, "__builtin_ia32_cvtqq2pd256_mask", IX86_BUILTIN_CVTQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_UQI },
34283 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2df2_mask, "__builtin_ia32_cvtqq2pd128_mask", IX86_BUILTIN_CVTQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_UQI },
34284 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4df2_mask, "__builtin_ia32_cvtuqq2pd256_mask", IX86_BUILTIN_CVTUQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_UQI },
34285 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2df2_mask, "__builtin_ia32_cvtuqq2pd128_mask", IX86_BUILTIN_CVTUQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_UQI },
34286 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_mask, "__builtin_ia32_vpermt2varq256_mask", IX86_BUILTIN_VPERMT2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34287 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_maskz, "__builtin_ia32_vpermt2varq256_maskz", IX86_BUILTIN_VPERMT2VARQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34288 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_mask, "__builtin_ia32_vpermt2vard256_mask", IX86_BUILTIN_VPERMT2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34289 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_maskz, "__builtin_ia32_vpermt2vard256_maskz", IX86_BUILTIN_VPERMT2VARD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34290 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4di3_mask, "__builtin_ia32_vpermi2varq256_mask", IX86_BUILTIN_VPERMI2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34291 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8si3_mask, "__builtin_ia32_vpermi2vard256_mask", IX86_BUILTIN_VPERMI2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34292 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_mask, "__builtin_ia32_vpermt2varpd256_mask", IX86_BUILTIN_VPERMT2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_UQI },
34293 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_maskz, "__builtin_ia32_vpermt2varpd256_maskz", IX86_BUILTIN_VPERMT2VARPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_UQI },
34294 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_mask, "__builtin_ia32_vpermt2varps256_mask", IX86_BUILTIN_VPERMT2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_UQI },
34295 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_maskz, "__builtin_ia32_vpermt2varps256_maskz", IX86_BUILTIN_VPERMT2VARPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_UQI },
34296 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4df3_mask, "__builtin_ia32_vpermi2varpd256_mask", IX86_BUILTIN_VPERMI2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_UQI },
34297 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8sf3_mask, "__builtin_ia32_vpermi2varps256_mask", IX86_BUILTIN_VPERMI2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_UQI },
34298 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_mask, "__builtin_ia32_vpermt2varq128_mask", IX86_BUILTIN_VPERMT2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34299 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_maskz, "__builtin_ia32_vpermt2varq128_maskz", IX86_BUILTIN_VPERMT2VARQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34300 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_mask, "__builtin_ia32_vpermt2vard128_mask", IX86_BUILTIN_VPERMT2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34301 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_maskz, "__builtin_ia32_vpermt2vard128_maskz", IX86_BUILTIN_VPERMT2VARD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34302 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2di3_mask, "__builtin_ia32_vpermi2varq128_mask", IX86_BUILTIN_VPERMI2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34303 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4si3_mask, "__builtin_ia32_vpermi2vard128_mask", IX86_BUILTIN_VPERMI2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34304 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_mask, "__builtin_ia32_vpermt2varpd128_mask", IX86_BUILTIN_VPERMT2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_UQI },
34305 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_maskz, "__builtin_ia32_vpermt2varpd128_maskz", IX86_BUILTIN_VPERMT2VARPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_UQI },
34306 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_mask, "__builtin_ia32_vpermt2varps128_mask", IX86_BUILTIN_VPERMT2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_UQI },
34307 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_maskz, "__builtin_ia32_vpermt2varps128_maskz", IX86_BUILTIN_VPERMT2VARPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_UQI },
34308 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2df3_mask, "__builtin_ia32_vpermi2varpd128_mask", IX86_BUILTIN_VPERMI2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_UQI },
34309 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4sf3_mask, "__builtin_ia32_vpermi2varps128_mask", IX86_BUILTIN_VPERMI2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_UQI },
34310 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pshufbv32qi3_mask, "__builtin_ia32_pshufb256_mask", IX86_BUILTIN_PSHUFB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34311 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pshufbv16qi3_mask, "__builtin_ia32_pshufb128_mask", IX86_BUILTIN_PSHUFB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34312 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhwv3_mask, "__builtin_ia32_pshufhw256_mask", IX86_BUILTIN_PSHUFHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
34313 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhw_mask, "__builtin_ia32_pshufhw128_mask", IX86_BUILTIN_PSHUFHW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
34314 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflwv3_mask, "__builtin_ia32_pshuflw256_mask", IX86_BUILTIN_PSHUFLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
34315 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflw_mask, "__builtin_ia32_pshuflw128_mask", IX86_BUILTIN_PSHUFLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
34316 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufdv3_mask, "__builtin_ia32_pshufd256_mask", IX86_BUILTIN_PSHUFD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
34317 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufd_mask, "__builtin_ia32_pshufd128_mask", IX86_BUILTIN_PSHUFD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
34318 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufpd256_mask, "__builtin_ia32_shufpd256_mask", IX86_BUILTIN_SHUFPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI },
34319 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_shufpd_mask, "__builtin_ia32_shufpd128_mask", IX86_BUILTIN_SHUFPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI },
34320 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufps256_mask, "__builtin_ia32_shufps256_mask", IX86_BUILTIN_SHUFPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI },
34321 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_shufps_mask, "__builtin_ia32_shufps128_mask", IX86_BUILTIN_SHUFPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI },
34322 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4di_mask, "__builtin_ia32_prolvq256_mask", IX86_BUILTIN_PROLVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34323 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv2di_mask, "__builtin_ia32_prolvq128_mask", IX86_BUILTIN_PROLVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34324 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4di_mask, "__builtin_ia32_prolq256_mask", IX86_BUILTIN_PROLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
34325 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv2di_mask, "__builtin_ia32_prolq128_mask", IX86_BUILTIN_PROLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
34326 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4di_mask, "__builtin_ia32_prorvq256_mask", IX86_BUILTIN_PRORVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34327 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv2di_mask, "__builtin_ia32_prorvq128_mask", IX86_BUILTIN_PRORVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34328 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4di_mask, "__builtin_ia32_prorq256_mask", IX86_BUILTIN_PRORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
34329 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv2di_mask, "__builtin_ia32_prorq128_mask", IX86_BUILTIN_PRORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
34330 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv2di_mask, "__builtin_ia32_psravq128_mask", IX86_BUILTIN_PSRAVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34331 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4di_mask, "__builtin_ia32_psravq256_mask", IX86_BUILTIN_PSRAVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34332 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4di_mask, "__builtin_ia32_psllv4di_mask", IX86_BUILTIN_PSLLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34333 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv2di_mask, "__builtin_ia32_psllv2di_mask", IX86_BUILTIN_PSLLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34334 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv8si_mask, "__builtin_ia32_psllv8si_mask", IX86_BUILTIN_PSLLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34335 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4si_mask, "__builtin_ia32_psllv4si_mask", IX86_BUILTIN_PSLLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34336 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv8si_mask, "__builtin_ia32_psrav8si_mask", IX86_BUILTIN_PSRAVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34337 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4si_mask, "__builtin_ia32_psrav4si_mask", IX86_BUILTIN_PSRAVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34338 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4di_mask, "__builtin_ia32_psrlv4di_mask", IX86_BUILTIN_PSRLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34339 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv2di_mask, "__builtin_ia32_psrlv2di_mask", IX86_BUILTIN_PSRLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34340 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv8si_mask, "__builtin_ia32_psrlv8si_mask", IX86_BUILTIN_PSRLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34341 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4si_mask, "__builtin_ia32_psrlv4si_mask", IX86_BUILTIN_PSRLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34342 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psrawi256_mask", IX86_BUILTIN_PSRAWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
34343 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psraw256_mask", IX86_BUILTIN_PSRAW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_UHI },
34344 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psrawi128_mask", IX86_BUILTIN_PSRAWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
34345 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psraw128_mask", IX86_BUILTIN_PSRAW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34346 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlwi256_mask", IX86_BUILTIN_PSRLWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
34347 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlw256_mask", IX86_BUILTIN_PSRLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_UHI },
34348 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlwi128_mask", IX86_BUILTIN_PSRLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
34349 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlw128_mask", IX86_BUILTIN_PSRLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34350 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv8si_mask, "__builtin_ia32_prorvd256_mask", IX86_BUILTIN_PRORVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34351 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv8si_mask, "__builtin_ia32_prolvd256_mask", IX86_BUILTIN_PROLVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34352 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv8si_mask, "__builtin_ia32_prord256_mask", IX86_BUILTIN_PRORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
34353 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv8si_mask, "__builtin_ia32_prold256_mask", IX86_BUILTIN_PROLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
34354 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4si_mask, "__builtin_ia32_prorvd128_mask", IX86_BUILTIN_PRORVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34355 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4si_mask, "__builtin_ia32_prolvd128_mask", IX86_BUILTIN_PROLVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34356 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4si_mask, "__builtin_ia32_prord128_mask", IX86_BUILTIN_PRORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
34357 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4si_mask, "__builtin_ia32_prold128_mask", IX86_BUILTIN_PROLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
34358 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4df_mask, "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, UNKNOWN, (int) QI_FTYPE_V4DF_INT_UQI },
34359 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv2df_mask, "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, UNKNOWN, (int) QI_FTYPE_V2DF_INT_UQI },
34360 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv2df, "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int) QI_FTYPE_V2DF_INT },
34361 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv8sf_mask, "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, UNKNOWN, (int) QI_FTYPE_V8SF_INT_UQI },
34362 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4sf_mask, "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, UNKNOWN, (int) QI_FTYPE_V4SF_INT_UQI },
34363 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv4sf, "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int) QI_FTYPE_V4SF_INT },
34364 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) UHI_FTYPE_V16QI },
34365 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) USI_FTYPE_V32QI },
34366 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128", IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) UQI_FTYPE_V8HI },
34367 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv16hi, "__builtin_ia32_cvtw2mask256", IX86_BUILTIN_CVTW2MASK256, UNKNOWN, (int) UHI_FTYPE_V16HI },
34368 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv4si, "__builtin_ia32_cvtd2mask128", IX86_BUILTIN_CVTD2MASK128, UNKNOWN, (int) UQI_FTYPE_V4SI },
34369 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv8si, "__builtin_ia32_cvtd2mask256", IX86_BUILTIN_CVTD2MASK256, UNKNOWN, (int) UQI_FTYPE_V8SI },
34370 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv2di, "__builtin_ia32_cvtq2mask128", IX86_BUILTIN_CVTQ2MASK128, UNKNOWN, (int) UQI_FTYPE_V2DI },
34371 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv4di, "__builtin_ia32_cvtq2mask256", IX86_BUILTIN_CVTQ2MASK256, UNKNOWN, (int) UQI_FTYPE_V4DI },
34372 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv16qi, "__builtin_ia32_cvtmask2b128", IX86_BUILTIN_CVTMASK2B128, UNKNOWN, (int) V16QI_FTYPE_UHI },
34373 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv32qi, "__builtin_ia32_cvtmask2b256", IX86_BUILTIN_CVTMASK2B256, UNKNOWN, (int) V32QI_FTYPE_USI },
34374 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv8hi, "__builtin_ia32_cvtmask2w128", IX86_BUILTIN_CVTMASK2W128, UNKNOWN, (int) V8HI_FTYPE_UQI },
34375 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv16hi, "__builtin_ia32_cvtmask2w256", IX86_BUILTIN_CVTMASK2W256, UNKNOWN, (int) V16HI_FTYPE_UHI },
34376 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv4si, "__builtin_ia32_cvtmask2d128", IX86_BUILTIN_CVTMASK2D128, UNKNOWN, (int) V4SI_FTYPE_UQI },
34377 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv8si, "__builtin_ia32_cvtmask2d256", IX86_BUILTIN_CVTMASK2D256, UNKNOWN, (int) V8SI_FTYPE_UQI },
34378 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv2di, "__builtin_ia32_cvtmask2q128", IX86_BUILTIN_CVTMASK2Q128, UNKNOWN, (int) V2DI_FTYPE_UQI },
34379 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv4di, "__builtin_ia32_cvtmask2q256", IX86_BUILTIN_CVTMASK2Q256, UNKNOWN, (int) V4DI_FTYPE_UQI },
34380 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16qi3_mask, "__builtin_ia32_pcmpeqb128_mask", IX86_BUILTIN_PCMPEQB128_MASK, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI },
34381 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv32qi3_mask, "__builtin_ia32_pcmpeqb256_mask", IX86_BUILTIN_PCMPEQB256_MASK, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI },
34382 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8hi3_mask, "__builtin_ia32_pcmpeqw128_mask", IX86_BUILTIN_PCMPEQW128_MASK, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_UQI },
34383 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16hi3_mask, "__builtin_ia32_pcmpeqw256_mask", IX86_BUILTIN_PCMPEQW256_MASK, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_UHI },
34384 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4si3_mask, "__builtin_ia32_pcmpeqd128_mask", IX86_BUILTIN_PCMPEQD128_MASK, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_UQI },
34385 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8si3_mask, "__builtin_ia32_pcmpeqd256_mask", IX86_BUILTIN_PCMPEQD256_MASK, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_UQI },
34386 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv2di3_mask, "__builtin_ia32_pcmpeqq128_mask", IX86_BUILTIN_PCMPEQQ128_MASK, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_UQI },
34387 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4di3_mask, "__builtin_ia32_pcmpeqq256_mask", IX86_BUILTIN_PCMPEQQ256_MASK, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_UQI },
34388 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16qi3_mask, "__builtin_ia32_pcmpgtb128_mask", IX86_BUILTIN_PCMPGTB128_MASK, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI },
34389 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv32qi3_mask, "__builtin_ia32_pcmpgtb256_mask", IX86_BUILTIN_PCMPGTB256_MASK, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI },
34390 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8hi3_mask, "__builtin_ia32_pcmpgtw128_mask", IX86_BUILTIN_PCMPGTW128_MASK, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_UQI },
34391 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16hi3_mask, "__builtin_ia32_pcmpgtw256_mask", IX86_BUILTIN_PCMPGTW256_MASK, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_UHI },
34392 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4si3_mask, "__builtin_ia32_pcmpgtd128_mask", IX86_BUILTIN_PCMPGTD128_MASK, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_UQI },
34393 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8si3_mask, "__builtin_ia32_pcmpgtd256_mask", IX86_BUILTIN_PCMPGTD256_MASK, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_UQI },
34394 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv2di3_mask, "__builtin_ia32_pcmpgtq128_mask", IX86_BUILTIN_PCMPGTQ128_MASK, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_UQI },
34395 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4di3_mask, "__builtin_ia32_pcmpgtq256_mask", IX86_BUILTIN_PCMPGTQ256_MASK, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_UQI },
34396 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16qi3_mask, "__builtin_ia32_ptestmb128", IX86_BUILTIN_PTESTMB128, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI },
34397 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv32qi3_mask, "__builtin_ia32_ptestmb256", IX86_BUILTIN_PTESTMB256, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI },
34398 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8hi3_mask, "__builtin_ia32_ptestmw128", IX86_BUILTIN_PTESTMW128, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_UQI },
34399 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16hi3_mask, "__builtin_ia32_ptestmw256", IX86_BUILTIN_PTESTMW256, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_UHI },
34400 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4si3_mask, "__builtin_ia32_ptestmd128", IX86_BUILTIN_PTESTMD128, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_UQI },
34401 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8si3_mask, "__builtin_ia32_ptestmd256", IX86_BUILTIN_PTESTMD256, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_UQI },
34402 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv2di3_mask, "__builtin_ia32_ptestmq128", IX86_BUILTIN_PTESTMQ128, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_UQI },
34403 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4di3_mask, "__builtin_ia32_ptestmq256", IX86_BUILTIN_PTESTMQ256, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_UQI },
34404 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16qi3_mask, "__builtin_ia32_ptestnmb128", IX86_BUILTIN_PTESTNMB128, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI },
34405 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv32qi3_mask, "__builtin_ia32_ptestnmb256", IX86_BUILTIN_PTESTNMB256, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI },
34406 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8hi3_mask, "__builtin_ia32_ptestnmw128", IX86_BUILTIN_PTESTNMW128, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_UQI },
34407 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16hi3_mask, "__builtin_ia32_ptestnmw256", IX86_BUILTIN_PTESTNMW256, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_UHI },
34408 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4si3_mask, "__builtin_ia32_ptestnmd128", IX86_BUILTIN_PTESTNMD128, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_UQI },
34409 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8si3_mask, "__builtin_ia32_ptestnmd256", IX86_BUILTIN_PTESTNMD256, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_UQI },
34410 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv2di3_mask, "__builtin_ia32_ptestnmq128", IX86_BUILTIN_PTESTNMQ128, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_UQI },
34411 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4di3_mask, "__builtin_ia32_ptestnmq256", IX86_BUILTIN_PTESTNMQ256, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_UQI },
34412 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv2di, "__builtin_ia32_broadcastmb128", IX86_BUILTIN_PBROADCASTMB128, UNKNOWN, (int) V2DI_FTYPE_UQI },
34413 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv4di, "__builtin_ia32_broadcastmb256", IX86_BUILTIN_PBROADCASTMB256, UNKNOWN, (int) V4DI_FTYPE_UQI },
34414 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv4si, "__builtin_ia32_broadcastmw128", IX86_BUILTIN_PBROADCASTMW128, UNKNOWN, (int) V4SI_FTYPE_UHI },
34415 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv8si, "__builtin_ia32_broadcastmw256", IX86_BUILTIN_PBROADCASTMW256, UNKNOWN, (int) V8SI_FTYPE_UHI },
34416 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4df_mask, "__builtin_ia32_compressdf256_mask", IX86_BUILTIN_COMPRESSPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34417 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2df_mask, "__builtin_ia32_compressdf128_mask", IX86_BUILTIN_COMPRESSPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34418 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8sf_mask, "__builtin_ia32_compresssf256_mask", IX86_BUILTIN_COMPRESSPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34419 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4sf_mask, "__builtin_ia32_compresssf128_mask", IX86_BUILTIN_COMPRESSPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34420 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4di_mask, "__builtin_ia32_compressdi256_mask", IX86_BUILTIN_PCOMPRESSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
34421 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2di_mask, "__builtin_ia32_compressdi128_mask", IX86_BUILTIN_PCOMPRESSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34422 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8si_mask, "__builtin_ia32_compresssi256_mask", IX86_BUILTIN_PCOMPRESSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
34423 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4si_mask, "__builtin_ia32_compresssi128_mask", IX86_BUILTIN_PCOMPRESSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34424 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expanddf256_mask", IX86_BUILTIN_EXPANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34425 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expanddf128_mask", IX86_BUILTIN_EXPANDPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34426 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandsf256_mask", IX86_BUILTIN_EXPANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34427 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandsf128_mask", IX86_BUILTIN_EXPANDPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34428 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expanddi256_mask", IX86_BUILTIN_PEXPANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
34429 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expanddi128_mask", IX86_BUILTIN_PEXPANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34430 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandsi256_mask", IX86_BUILTIN_PEXPANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
34431 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandsi128_mask", IX86_BUILTIN_PEXPANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34432 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expanddf256_maskz", IX86_BUILTIN_EXPANDPD256Z, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34433 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expanddf128_maskz", IX86_BUILTIN_EXPANDPD128Z, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34434 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandsf256_maskz", IX86_BUILTIN_EXPANDPS256Z, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34435 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandsf128_maskz", IX86_BUILTIN_EXPANDPS128Z, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34436 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expanddi256_maskz", IX86_BUILTIN_PEXPANDQ256Z, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
34437 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expanddi128_maskz", IX86_BUILTIN_PEXPANDQ128Z, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34438 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandsi256_maskz", IX86_BUILTIN_PEXPANDD256Z, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
34439 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandsi128_maskz", IX86_BUILTIN_PEXPANDD128Z, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34440 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8si3_mask, "__builtin_ia32_pmaxsd256_mask", IX86_BUILTIN_PMAXSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34441 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8si3_mask, "__builtin_ia32_pminsd256_mask", IX86_BUILTIN_PMINSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34442 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8si3_mask, "__builtin_ia32_pmaxud256_mask", IX86_BUILTIN_PMAXUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34443 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8si3_mask, "__builtin_ia32_pminud256_mask", IX86_BUILTIN_PMINUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34444 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4si3_mask, "__builtin_ia32_pmaxsd128_mask", IX86_BUILTIN_PMAXSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34445 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4si3_mask, "__builtin_ia32_pminsd128_mask", IX86_BUILTIN_PMINSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34446 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4si3_mask, "__builtin_ia32_pmaxud128_mask", IX86_BUILTIN_PMAXUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34447 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4si3_mask, "__builtin_ia32_pminud128_mask", IX86_BUILTIN_PMINUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34448 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4di3_mask, "__builtin_ia32_pmaxsq256_mask", IX86_BUILTIN_PMAXSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34449 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4di3_mask, "__builtin_ia32_pminsq256_mask", IX86_BUILTIN_PMINSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34450 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4di3_mask, "__builtin_ia32_pmaxuq256_mask", IX86_BUILTIN_PMAXUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34451 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4di3_mask, "__builtin_ia32_pminuq256_mask", IX86_BUILTIN_PMINUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34452 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2di3_mask, "__builtin_ia32_pmaxsq128_mask", IX86_BUILTIN_PMAXSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34453 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2di3_mask, "__builtin_ia32_pminsq128_mask", IX86_BUILTIN_PMINSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34454 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv2di3_mask, "__builtin_ia32_pmaxuq128_mask", IX86_BUILTIN_PMAXUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34455 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv2di3_mask, "__builtin_ia32_pminuq128_mask", IX86_BUILTIN_PMINUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34456 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv32qi3_mask, "__builtin_ia32_pminsb256_mask", IX86_BUILTIN_PMINSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34457 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv32qi3_mask, "__builtin_ia32_pminub256_mask", IX86_BUILTIN_PMINUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34458 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv32qi3_mask, "__builtin_ia32_pmaxsb256_mask", IX86_BUILTIN_PMAXSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34459 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv32qi3_mask, "__builtin_ia32_pmaxub256_mask", IX86_BUILTIN_PMAXUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34460 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16qi3_mask, "__builtin_ia32_pminsb128_mask", IX86_BUILTIN_PMINSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34461 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16qi3_mask, "__builtin_ia32_pminub128_mask", IX86_BUILTIN_PMINUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34462 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16qi3_mask, "__builtin_ia32_pmaxsb128_mask", IX86_BUILTIN_PMAXSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34463 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16qi3_mask, "__builtin_ia32_pmaxub128_mask", IX86_BUILTIN_PMAXUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34464 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16hi3_mask, "__builtin_ia32_pminsw256_mask", IX86_BUILTIN_PMINSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34465 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16hi3_mask, "__builtin_ia32_pminuw256_mask", IX86_BUILTIN_PMINUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34466 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16hi3_mask, "__builtin_ia32_pmaxsw256_mask", IX86_BUILTIN_PMAXSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34467 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16hi3_mask, "__builtin_ia32_pmaxuw256_mask", IX86_BUILTIN_PMAXUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34468 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8hi3_mask, "__builtin_ia32_pminsw128_mask", IX86_BUILTIN_PMINSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34469 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8hi3_mask, "__builtin_ia32_pminuw128_mask", IX86_BUILTIN_PMINUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34470 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8hi3_mask, "__builtin_ia32_pmaxsw128_mask", IX86_BUILTIN_PMAXSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34471 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8hi3_mask, "__builtin_ia32_pmaxuw128_mask", IX86_BUILTIN_PMAXUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34472 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4di_mask, "__builtin_ia32_vpconflictdi_256_mask", IX86_BUILTIN_VPCONFLICTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
34473 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv8si_mask, "__builtin_ia32_vpconflictsi_256_mask", IX86_BUILTIN_VPCONFLICTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
34474 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4di2_mask, "__builtin_ia32_vplzcntq_256_mask", IX86_BUILTIN_VPCLZCNTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
34475 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv8si2_mask, "__builtin_ia32_vplzcntd_256_mask", IX86_BUILTIN_VPCLZCNTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
34476 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhpd256_mask, "__builtin_ia32_unpckhpd256_mask", IX86_BUILTIN_UNPCKHPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34477 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpckhpd128_mask, "__builtin_ia32_unpckhpd128_mask", IX86_BUILTIN_UNPCKHPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34478 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhps256_mask, "__builtin_ia32_unpckhps256_mask", IX86_BUILTIN_UNPCKHPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34479 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4sf_mask, "__builtin_ia32_unpckhps128_mask", IX86_BUILTIN_UNPCKHPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34480 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklpd256_mask, "__builtin_ia32_unpcklpd256_mask", IX86_BUILTIN_UNPCKLPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34481 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpcklpd128_mask, "__builtin_ia32_unpcklpd128_mask", IX86_BUILTIN_UNPCKLPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34482 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklps256_mask, "__builtin_ia32_unpcklps256_mask", IX86_BUILTIN_UNPCKLPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34483 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv2di_mask, "__builtin_ia32_vpconflictdi_128_mask", IX86_BUILTIN_VPCONFLICTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34484 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4si_mask, "__builtin_ia32_vpconflictsi_128_mask", IX86_BUILTIN_VPCONFLICTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34485 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv2di2_mask, "__builtin_ia32_vplzcntq_128_mask", IX86_BUILTIN_VPCLZCNTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34486 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4si2_mask, "__builtin_ia32_vplzcntd_128_mask", IX86_BUILTIN_VPCLZCNTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34487 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_unpcklps128_mask, "__builtin_ia32_unpcklps128_mask", IX86_BUILTIN_UNPCKLPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34488 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv8si_mask, "__builtin_ia32_alignd256_mask", IX86_BUILTIN_ALIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI },
34489 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4di_mask, "__builtin_ia32_alignq256_mask", IX86_BUILTIN_ALIGNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI },
34490 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4si_mask, "__builtin_ia32_alignd128_mask", IX86_BUILTIN_ALIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_UQI },
34491 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv2di_mask, "__builtin_ia32_alignq128_mask", IX86_BUILTIN_ALIGNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UQI },
34492 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph256_mask, "__builtin_ia32_vcvtps2ph256_mask", IX86_BUILTIN_CVTPS2PH256_MASK, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT_V8HI_UQI },
34493 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph_mask, "__builtin_ia32_vcvtps2ph_mask", IX86_BUILTIN_CVTPS2PH_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT_V8HI_UQI },
34494 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps_mask, "__builtin_ia32_vcvtph2ps_mask", IX86_BUILTIN_CVTPH2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V8HI_V4SF_UQI },
34495 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps256_mask, "__builtin_ia32_vcvtph2ps256_mask", IX86_BUILTIN_CVTPH2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8HI_V8SF_UQI },
34496 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4si_mask, "__builtin_ia32_punpckhdq128_mask", IX86_BUILTIN_PUNPCKHDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34497 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv8si_mask, "__builtin_ia32_punpckhdq256_mask", IX86_BUILTIN_PUNPCKHDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34498 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv2di_mask, "__builtin_ia32_punpckhqdq128_mask", IX86_BUILTIN_PUNPCKHQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34499 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv4di_mask, "__builtin_ia32_punpckhqdq256_mask", IX86_BUILTIN_PUNPCKHQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34500 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv4si_mask, "__builtin_ia32_punpckldq128_mask", IX86_BUILTIN_PUNPCKLDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34501 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv8si_mask, "__builtin_ia32_punpckldq256_mask", IX86_BUILTIN_PUNPCKLDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34502 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv2di_mask, "__builtin_ia32_punpcklqdq128_mask", IX86_BUILTIN_PUNPCKLQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34503 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv4di_mask, "__builtin_ia32_punpcklqdq256_mask", IX86_BUILTIN_PUNPCKLQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34504 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv16qi_mask, "__builtin_ia32_punpckhbw128_mask", IX86_BUILTIN_PUNPCKHBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34505 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv32qi_mask, "__builtin_ia32_punpckhbw256_mask", IX86_BUILTIN_PUNPCKHBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34506 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv8hi_mask, "__builtin_ia32_punpckhwd128_mask", IX86_BUILTIN_PUNPCKHWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34507 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv16hi_mask, "__builtin_ia32_punpckhwd256_mask", IX86_BUILTIN_PUNPCKHWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34508 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv16qi_mask, "__builtin_ia32_punpcklbw128_mask", IX86_BUILTIN_PUNPCKLBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34509 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv32qi_mask, "__builtin_ia32_punpcklbw256_mask", IX86_BUILTIN_PUNPCKLBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34510 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv8hi_mask, "__builtin_ia32_punpcklwd128_mask", IX86_BUILTIN_PUNPCKLWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34511 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv16hi_mask, "__builtin_ia32_punpcklwd256_mask", IX86_BUILTIN_PUNPCKLWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34512 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv16hi_mask, "__builtin_ia32_psllv16hi_mask", IX86_BUILTIN_PSLLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34513 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv8hi_mask, "__builtin_ia32_psllv8hi_mask", IX86_BUILTIN_PSLLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34514 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packssdw_mask, "__builtin_ia32_packssdw256_mask", IX86_BUILTIN_PACKSSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_UHI },
34515 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packssdw_mask, "__builtin_ia32_packssdw128_mask", IX86_BUILTIN_PACKSSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_UQI },
34516 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packusdw_mask, "__builtin_ia32_packusdw256_mask", IX86_BUILTIN_PACKUSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_UHI },
34517 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_packusdw_mask, "__builtin_ia32_packusdw128_mask", IX86_BUILTIN_PACKUSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_UQI },
34518 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv32qi3_mask, "__builtin_ia32_pavgb256_mask", IX86_BUILTIN_PAVGB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34519 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv16hi3_mask, "__builtin_ia32_pavgw256_mask", IX86_BUILTIN_PAVGW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34520 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv16qi3_mask, "__builtin_ia32_pavgb128_mask", IX86_BUILTIN_PAVGB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34521 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv8hi3_mask, "__builtin_ia32_pavgw128_mask", IX86_BUILTIN_PAVGW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34522 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8sf_mask, "__builtin_ia32_permvarsf256_mask", IX86_BUILTIN_VPERMVARSF256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_UQI },
34523 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4df_mask, "__builtin_ia32_permvardf256_mask", IX86_BUILTIN_VPERMVARDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_UQI },
34524 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4df_mask, "__builtin_ia32_permdf256_mask", IX86_BUILTIN_VPERMDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
34525 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv32qi2_mask, "__builtin_ia32_pabsb256_mask", IX86_BUILTIN_PABSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI },
34526 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16qi2_mask, "__builtin_ia32_pabsb128_mask", IX86_BUILTIN_PABSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI },
34527 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16hi2_mask, "__builtin_ia32_pabsw256_mask", IX86_BUILTIN_PABSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI },
34528 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8hi2_mask, "__builtin_ia32_pabsw128_mask", IX86_BUILTIN_PABSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI },
34529 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv2df3_mask, "__builtin_ia32_vpermilvarpd_mask", IX86_BUILTIN_VPERMILVARPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_UQI },
34530 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4sf3_mask, "__builtin_ia32_vpermilvarps_mask", IX86_BUILTIN_VPERMILVARPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_UQI },
34531 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4df3_mask, "__builtin_ia32_vpermilvarpd256_mask", IX86_BUILTIN_VPERMILVARPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_UQI },
34532 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv8sf3_mask, "__builtin_ia32_vpermilvarps256_mask", IX86_BUILTIN_VPERMILVARPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_UQI },
34533 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv2df_mask, "__builtin_ia32_vpermilpd_mask", IX86_BUILTIN_VPERMILPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI },
34534 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4sf_mask, "__builtin_ia32_vpermilps_mask", IX86_BUILTIN_VPERMILPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI },
34535 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4df_mask, "__builtin_ia32_vpermilpd256_mask", IX86_BUILTIN_VPERMILPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
34536 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv8sf_mask, "__builtin_ia32_vpermilps256_mask", IX86_BUILTIN_VPERMILPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI },
34537 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4di, "__builtin_ia32_blendmq_256_mask", IX86_BUILTIN_BLENDMQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
34538 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8si, "__builtin_ia32_blendmd_256_mask", IX86_BUILTIN_BLENDMD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
34539 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4df, "__builtin_ia32_blendmpd_256_mask", IX86_BUILTIN_BLENDMPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34540 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8sf, "__builtin_ia32_blendmps_256_mask", IX86_BUILTIN_BLENDMPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34541 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2di, "__builtin_ia32_blendmq_128_mask", IX86_BUILTIN_BLENDMQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34542 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4si, "__builtin_ia32_blendmd_128_mask", IX86_BUILTIN_BLENDMD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34543 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2df, "__builtin_ia32_blendmpd_128_mask", IX86_BUILTIN_BLENDMPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34544 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4sf, "__builtin_ia32_blendmps_128_mask", IX86_BUILTIN_BLENDMPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34545 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16hi, "__builtin_ia32_blendmw_256_mask", IX86_BUILTIN_BLENDMW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI },
34546 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv32qi, "__builtin_ia32_blendmb_256_mask", IX86_BUILTIN_BLENDMB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI },
34547 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8hi, "__builtin_ia32_blendmw_128_mask", IX86_BUILTIN_BLENDMW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI },
34548 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16qi, "__builtin_ia32_blendmb_128_mask", IX86_BUILTIN_BLENDMB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI },
34549 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8si3_mask, "__builtin_ia32_pmulld256_mask", IX86_BUILTIN_PMULLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34550 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4si3_mask, "__builtin_ia32_pmulld128_mask", IX86_BUILTIN_PMULLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34551 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v8si_mask, "__builtin_ia32_pmuludq256_mask", IX86_BUILTIN_PMULUDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_UQI },
34552 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_smult_even_v8si_mask, "__builtin_ia32_pmuldq256_mask", IX86_BUILTIN_PMULDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_UQI },
34553 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_mulv2siv2di3_mask, "__builtin_ia32_pmuldq128_mask", IX86_BUILTIN_PMULDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_UQI },
34554 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v4si_mask, "__builtin_ia32_pmuludq128_mask", IX86_BUILTIN_PMULUDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_UQI },
34555 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2ps256_mask, "__builtin_ia32_cvtpd2ps256_mask", IX86_BUILTIN_CVTPD2PS256_MASK, UNKNOWN, (int) V4SF_FTYPE_V4DF_V4SF_UQI },
34556 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2ps_mask, "__builtin_ia32_cvtpd2ps_mask", IX86_BUILTIN_CVTPD2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V2DF_V4SF_UQI },
34557 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8si_mask, "__builtin_ia32_permvarsi256_mask", IX86_BUILTIN_VPERMVARSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34558 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4di_mask, "__builtin_ia32_permvardi256_mask", IX86_BUILTIN_VPERMVARDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34559 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4di_mask, "__builtin_ia32_permdi256_mask", IX86_BUILTIN_VPERMDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
34560 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4di3_mask, "__builtin_ia32_cmpq256_mask", IX86_BUILTIN_CMPQ256, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_INT_UQI },
34561 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8si3_mask, "__builtin_ia32_cmpd256_mask", IX86_BUILTIN_CMPD256, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_INT_UQI },
34562 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4di3_mask, "__builtin_ia32_ucmpq256_mask", IX86_BUILTIN_UCMPQ256, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_INT_UQI },
34563 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8si3_mask, "__builtin_ia32_ucmpd256_mask", IX86_BUILTIN_UCMPD256, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_INT_UQI },
34564 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv32qi3_mask, "__builtin_ia32_cmpb256_mask", IX86_BUILTIN_CMPB256, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_INT_USI },
34565 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16hi3_mask, "__builtin_ia32_cmpw256_mask", IX86_BUILTIN_CMPW256, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_INT_UHI },
34566 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv32qi3_mask, "__builtin_ia32_ucmpb256_mask", IX86_BUILTIN_UCMPB256, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_INT_USI },
34567 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16hi3_mask, "__builtin_ia32_ucmpw256_mask", IX86_BUILTIN_UCMPW256, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_INT_UHI },
34568 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4df3_mask, "__builtin_ia32_cmppd256_mask", IX86_BUILTIN_CMPPD256_MASK, UNKNOWN, (int) QI_FTYPE_V4DF_V4DF_INT_UQI },
34569 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8sf3_mask, "__builtin_ia32_cmpps256_mask", IX86_BUILTIN_CMPPS256_MASK, UNKNOWN, (int) QI_FTYPE_V8SF_V8SF_INT_UQI },
34570 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2di3_mask, "__builtin_ia32_cmpq128_mask", IX86_BUILTIN_CMPQ128, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_INT_UQI },
34571 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4si3_mask, "__builtin_ia32_cmpd128_mask", IX86_BUILTIN_CMPD128, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_INT_UQI },
34572 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv2di3_mask, "__builtin_ia32_ucmpq128_mask", IX86_BUILTIN_UCMPQ128, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_INT_UQI },
34573 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4si3_mask, "__builtin_ia32_ucmpd128_mask", IX86_BUILTIN_UCMPD128, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_INT_UQI },
34574 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16qi3_mask, "__builtin_ia32_cmpb128_mask", IX86_BUILTIN_CMPB128, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_INT_UHI },
34575 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8hi3_mask, "__builtin_ia32_cmpw128_mask", IX86_BUILTIN_CMPW128, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_INT_UQI },
34576 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16qi3_mask, "__builtin_ia32_ucmpb128_mask", IX86_BUILTIN_UCMPB128, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_INT_UHI },
34577 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8hi3_mask, "__builtin_ia32_ucmpw128_mask", IX86_BUILTIN_UCMPW128, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_INT_UQI },
34578 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_ia32_cmppd128_mask", IX86_BUILTIN_CMPPD128_MASK, UNKNOWN, (int) UQI_FTYPE_V2DF_V2DF_INT_UQI },
34579 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) UQI_FTYPE_V4SF_V4SF_INT_UQI },
34580
34581 /* AVX512DQ. */
34582 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI },
34583 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI },
34584 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_UQI },
34585 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_UQI },
34586 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_UHI },
34587 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_UHI },
34588 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_UQI },
34589 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_UQI },
34590 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_UQI },
34591 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_UQI },
34592 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI },
34593 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI },
34594 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34595 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
34596 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
34597 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
34598 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
34599 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
34600 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
34601 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI},
34602 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
34603 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI },
34604 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI },
34605 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI },
34606 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI },
34607 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_UQI },
34608 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_UHI },
34609 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) UHI_FTYPE_V16SI },
34610 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) UQI_FTYPE_V8DI },
34611 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_UHI },
34612 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_UQI },
34613
34614 /* AVX512BW. */
34615 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) USI_FTYPE_USI_USI },
34616 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) UDI_FTYPE_UDI_UDI },
34617 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI },
34618 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
34619 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
34620 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI },
34621 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT },
34622 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT },
34623 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI },
34624 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI },
34625 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI },
34626 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI },
34627 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_UDI },
34628 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_UDI },
34629 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_USI },
34630 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_USI },
34631 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_USI },
34632 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_USI },
34633 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34634 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34635 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34636 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34637 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34638 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34639 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34640 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34641 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34642 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34643 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34644 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34645 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34646 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34647 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34648 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34649 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34650 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34651 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34652 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34653 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34654 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34655 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34656 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34657 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34658 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34659 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI },
34660 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI },
34661 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI },
34662 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34663 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34664 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34665 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34666 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
34667 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI },
34668 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_UDI },
34669 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_UDI },
34670 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34671 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_USI },
34672 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_UHI },
34673 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34674 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34675 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34676 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34677 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34678 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34679 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
34680 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
34681 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
34682 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI },
34683 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
34684 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI },
34685 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) UDI_FTYPE_V64QI },
34686 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) USI_FTYPE_V32HI },
34687 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_UDI },
34688 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_USI },
34689 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI },
34690 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI },
34691 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI },
34692 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI },
34693 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI },
34694 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI },
34695 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI },
34696 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI },
34697 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34698 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI },
34699 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI },
34700 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI },
34701 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI },
34702 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_INT_UDI },
34703 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI },
34704 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_INT_UDI },
34705 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI },
34706
34707 /* AVX512IFMA */
34708 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34709 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34710 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34711 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34712 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34713 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34714 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34715 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_maskz, "__builtin_ia32_vpmadd52huq256_maskz", IX86_BUILTIN_VPMADD52HUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34716 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_mask, "__builtin_ia32_vpmadd52luq128_mask", IX86_BUILTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34717 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_maskz, "__builtin_ia32_vpmadd52luq128_maskz", IX86_BUILTIN_VPMADD52LUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34718 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_mask, "__builtin_ia32_vpmadd52huq128_mask", IX86_BUILTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34719 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_maskz, "__builtin_ia32_vpmadd52huq128_maskz", IX86_BUILTIN_VPMADD52HUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34720
34721 /* AVX512VBMI */
34722 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34723 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv32qi_mask, "__builtin_ia32_vpmultishiftqb256_mask", IX86_BUILTIN_VPMULTISHIFTQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34724 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv16qi_mask, "__builtin_ia32_vpmultishiftqb128_mask", IX86_BUILTIN_VPMULTISHIFTQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34725 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34726 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34727 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34728 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34729 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv32qi_mask, "__builtin_ia32_permvarqi256_mask", IX86_BUILTIN_VPERMVARQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34730 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16qi_mask, "__builtin_ia32_permvarqi128_mask", IX86_BUILTIN_VPERMVARQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34731 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_mask, "__builtin_ia32_vpermt2varqi256_mask", IX86_BUILTIN_VPERMT2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34732 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_maskz, "__builtin_ia32_vpermt2varqi256_maskz", IX86_BUILTIN_VPERMT2VARQI256_MASKZ, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34733 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_mask, "__builtin_ia32_vpermt2varqi128_mask", IX86_BUILTIN_VPERMT2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34734 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_maskz, "__builtin_ia32_vpermt2varqi128_maskz", IX86_BUILTIN_VPERMT2VARQI128_MASKZ, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34735 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv32qi3_mask, "__builtin_ia32_vpermi2varqi256_mask", IX86_BUILTIN_VPERMI2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34736 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16qi3_mask, "__builtin_ia32_vpermi2varqi128_mask", IX86_BUILTIN_VPERMI2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34737 };
34738
34739 /* Builtins with rounding support. */
34740 static const struct builtin_description bdesc_round_args[] =
34741 {
34742 /* AVX512F */
34743 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34744 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34745 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34746 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34747 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) UQI_FTYPE_V8DF_V8DF_INT_UQI_INT },
34748 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) UHI_FTYPE_V16SF_V16SF_INT_UHI_INT },
34749 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) UQI_FTYPE_V2DF_V2DF_INT_UQI_INT },
34750 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) UQI_FTYPE_V4SF_V4SF_INT_UQI_INT },
34751 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
34752 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
34753 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
34754 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
34755 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
34756 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
34757 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
34758 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
34759 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
34760 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
34761 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
34762 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
34763 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
34764 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
34765 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
34766 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
34767 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
34768 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
34769 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
34770 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
34771 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
34772 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
34773 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
34774 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34775 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34776 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34777 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34778 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
34779 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
34780 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
34781 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
34782 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
34783 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
34784 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
34785 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
34786 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
34787 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
34788 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34789 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34790 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
34791 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
34792 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
34793 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
34794 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34795 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34796 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34797 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34798 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34799 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34800 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34801 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34802 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34803 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34804 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34805 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34806 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
34807 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
34808 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
34809 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
34810 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34811 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34812 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34813 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34814 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
34815 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
34816 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34817 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34818 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34819 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34820 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34821 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34822 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
34823 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
34824 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
34825 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
34826 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
34827 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
34828 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
34829 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
34830 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
34831 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
34832 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
34833 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
34834 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
34835 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
34836 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
34837 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
34838 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34839 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34840 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34841 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34842 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34843 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34844 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
34845 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
34846 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34847 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34848 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34849 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34850 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34851 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34852 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34853 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34854 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34855 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34856 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34857 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34858 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34859 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34860 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34861 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34862
34863 /* AVX512ER */
34864 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
34865 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
34866 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
34867 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
34868 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34869 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34870 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
34871 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
34872 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34873 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34874
34875 /* AVX512DQ. */
34876 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv2df_round, "__builtin_ia32_rangesd128_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
34877 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv4sf_round, "__builtin_ia32_rangess128_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
34878 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
34879 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
34880 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
34881 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
34882 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
34883 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
34884 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
34885 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
34886 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
34887 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
34888 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
34889 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
34890 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT },
34891 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT },
34892 };
34893
34894 /* Bultins for MPX. */
34895 static const struct builtin_description bdesc_mpx[] =
34896 {
34897 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
34898 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
34899 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
34900 };
34901
34902 /* Const builtins for MPX. */
34903 static const struct builtin_description bdesc_mpx_const[] =
34904 {
34905 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
34906 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
34907 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
34908 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
34909 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
34910 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
34911 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
34912 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
34913 };
34914
34915 /* FMA4 and XOP. */
34916 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
34917 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
34918 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
34919 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
34920 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
34921 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
34922 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
34923 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
34924 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
34925 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
34926 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
34927 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
34928 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
34929 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
34930 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
34931 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
34932 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
34933 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
34934 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
34935 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
34936 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
34937 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
34938 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
34939 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
34940 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
34941 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
34942 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
34943 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
34944 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
34945 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
34946 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
34947 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
34948 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
34949 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
34950 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
34951 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
34952 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
34953 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
34954 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
34955 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
34956 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
34957 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
34958 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
34959 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
34960 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
34961 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
34962 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
34963 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
34964 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
34965 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
34966 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
34967 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
34968
34969 static const struct builtin_description bdesc_multi_arg[] =
34970 {
34971 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
34972 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
34973 UNKNOWN, (int)MULTI_ARG_3_SF },
34974 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
34975 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
34976 UNKNOWN, (int)MULTI_ARG_3_DF },
34977
34978 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
34979 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
34980 UNKNOWN, (int)MULTI_ARG_3_SF },
34981 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
34982 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
34983 UNKNOWN, (int)MULTI_ARG_3_DF },
34984
34985 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
34986 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
34987 UNKNOWN, (int)MULTI_ARG_3_SF },
34988 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
34989 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
34990 UNKNOWN, (int)MULTI_ARG_3_DF },
34991 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
34992 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
34993 UNKNOWN, (int)MULTI_ARG_3_SF2 },
34994 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
34995 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
34996 UNKNOWN, (int)MULTI_ARG_3_DF2 },
34997
34998 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
34999 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
35000 UNKNOWN, (int)MULTI_ARG_3_SF },
35001 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
35002 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
35003 UNKNOWN, (int)MULTI_ARG_3_DF },
35004 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
35005 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
35006 UNKNOWN, (int)MULTI_ARG_3_SF2 },
35007 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
35008 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
35009 UNKNOWN, (int)MULTI_ARG_3_DF2 },
35010
35011 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
35012 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
35013 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
35014 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
35015 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
35016 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
35017 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
35018
35019 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
35020 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
35021 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
35022 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
35023 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
35024 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
35025 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
35026
35027 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
35028
35029 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
35030 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
35031 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
35032 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
35033 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
35034 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
35035 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
35036 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
35037 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
35038 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
35039 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
35040 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
35041
35042 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
35043 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
35044 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
35045 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
35046 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
35047 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
35048 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
35049 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
35050 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
35051 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
35052 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
35053 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
35054 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
35055 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
35056 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
35057 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
35058
35059 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
35060 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
35061 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
35062 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
35063 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
35064 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
35065
35066 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
35067 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
35068 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
35069 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
35070 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
35071 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
35072 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
35073 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
35074 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
35075 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
35076 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
35077 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
35078 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
35079 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
35080 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
35081
35082 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
35083 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
35084 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
35085 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
35086 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
35087 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
35088 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
35089
35090 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
35091 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
35092 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
35093 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
35094 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
35095 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
35096 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
35097
35098 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
35099 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
35100 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
35101 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
35102 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
35103 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
35104 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
35105
35106 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
35107 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
35108 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
35109 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
35110 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
35111 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
35112 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
35113
35114 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
35115 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
35116 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
35117 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
35118 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
35119 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
35120 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
35121
35122 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
35123 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
35124 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
35125 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
35126 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
35127 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
35128 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
35129
35130 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
35131 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
35132 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
35133 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
35134 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
35135 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
35136 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
35137
35138 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
35139 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
35140 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
35141 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
35142 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
35143 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
35144 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
35145
35146 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
35147 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
35148 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
35149 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
35150 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
35151 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
35152 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
35153 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
35154
35155 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
35156 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
35157 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
35158 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
35159 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
35160 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
35161 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
35162 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
35163
35164 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
35165 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
35166 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
35167 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
35168
35169 };
35170 \f
35171 /* TM vector builtins. */
35172
35173 /* Reuse the existing x86-specific `struct builtin_description' cause
35174 we're lazy. Add casts to make them fit. */
35175 static const struct builtin_description bdesc_tm[] =
35176 {
35177 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
35178 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
35179 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
35180 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
35181 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
35182 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
35183 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
35184
35185 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
35186 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
35187 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
35188 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
35189 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
35190 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
35191 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
35192
35193 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
35194 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
35195 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
35196 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
35197 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
35198 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
35199 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
35200
35201 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
35202 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
35203 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
35204 };
35205
35206 /* TM callbacks. */
35207
35208 /* Return the builtin decl needed to load a vector of TYPE. */
35209
35210 static tree
35211 ix86_builtin_tm_load (tree type)
35212 {
35213 if (TREE_CODE (type) == VECTOR_TYPE)
35214 {
35215 switch (tree_to_uhwi (TYPE_SIZE (type)))
35216 {
35217 case 64:
35218 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
35219 case 128:
35220 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
35221 case 256:
35222 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
35223 }
35224 }
35225 return NULL_TREE;
35226 }
35227
35228 /* Return the builtin decl needed to store a vector of TYPE. */
35229
35230 static tree
35231 ix86_builtin_tm_store (tree type)
35232 {
35233 if (TREE_CODE (type) == VECTOR_TYPE)
35234 {
35235 switch (tree_to_uhwi (TYPE_SIZE (type)))
35236 {
35237 case 64:
35238 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
35239 case 128:
35240 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
35241 case 256:
35242 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
35243 }
35244 }
35245 return NULL_TREE;
35246 }
35247 \f
35248 /* Initialize the transactional memory vector load/store builtins. */
35249
35250 static void
35251 ix86_init_tm_builtins (void)
35252 {
35253 enum ix86_builtin_func_type ftype;
35254 const struct builtin_description *d;
35255 size_t i;
35256 tree decl;
35257 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
35258 tree attrs_log, attrs_type_log;
35259
35260 if (!flag_tm)
35261 return;
35262
35263 /* If there are no builtins defined, we must be compiling in a
35264 language without trans-mem support. */
35265 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
35266 return;
35267
35268 /* Use whatever attributes a normal TM load has. */
35269 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
35270 attrs_load = DECL_ATTRIBUTES (decl);
35271 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
35272 /* Use whatever attributes a normal TM store has. */
35273 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
35274 attrs_store = DECL_ATTRIBUTES (decl);
35275 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
35276 /* Use whatever attributes a normal TM log has. */
35277 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
35278 attrs_log = DECL_ATTRIBUTES (decl);
35279 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
35280
35281 for (i = 0, d = bdesc_tm;
35282 i < ARRAY_SIZE (bdesc_tm);
35283 i++, d++)
35284 {
35285 if ((d->mask & ix86_isa_flags) != 0
35286 || (lang_hooks.builtin_function
35287 == lang_hooks.builtin_function_ext_scope))
35288 {
35289 tree type, attrs, attrs_type;
35290 enum built_in_function code = (enum built_in_function) d->code;
35291
35292 ftype = (enum ix86_builtin_func_type) d->flag;
35293 type = ix86_get_builtin_func_type (ftype);
35294
35295 if (BUILTIN_TM_LOAD_P (code))
35296 {
35297 attrs = attrs_load;
35298 attrs_type = attrs_type_load;
35299 }
35300 else if (BUILTIN_TM_STORE_P (code))
35301 {
35302 attrs = attrs_store;
35303 attrs_type = attrs_type_store;
35304 }
35305 else
35306 {
35307 attrs = attrs_log;
35308 attrs_type = attrs_type_log;
35309 }
35310 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
35311 /* The builtin without the prefix for
35312 calling it directly. */
35313 d->name + strlen ("__builtin_"),
35314 attrs);
35315 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
35316 set the TYPE_ATTRIBUTES. */
35317 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
35318
35319 set_builtin_decl (code, decl, false);
35320 }
35321 }
35322 }
35323
35324 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
35325 in the current target ISA to allow the user to compile particular modules
35326 with different target specific options that differ from the command line
35327 options. */
35328 static void
35329 ix86_init_mmx_sse_builtins (void)
35330 {
35331 const struct builtin_description * d;
35332 enum ix86_builtin_func_type ftype;
35333 size_t i;
35334
35335 /* Add all special builtins with variable number of operands. */
35336 for (i = 0, d = bdesc_special_args;
35337 i < ARRAY_SIZE (bdesc_special_args);
35338 i++, d++)
35339 {
35340 if (d->name == 0)
35341 continue;
35342
35343 ftype = (enum ix86_builtin_func_type) d->flag;
35344 def_builtin (d->mask, d->name, ftype, d->code);
35345 }
35346
35347 /* Add all builtins with variable number of operands. */
35348 for (i = 0, d = bdesc_args;
35349 i < ARRAY_SIZE (bdesc_args);
35350 i++, d++)
35351 {
35352 if (d->name == 0)
35353 continue;
35354
35355 ftype = (enum ix86_builtin_func_type) d->flag;
35356 def_builtin_const (d->mask, d->name, ftype, d->code);
35357 }
35358
35359 /* Add all builtins with rounding. */
35360 for (i = 0, d = bdesc_round_args;
35361 i < ARRAY_SIZE (bdesc_round_args);
35362 i++, d++)
35363 {
35364 if (d->name == 0)
35365 continue;
35366
35367 ftype = (enum ix86_builtin_func_type) d->flag;
35368 def_builtin_const (d->mask, d->name, ftype, d->code);
35369 }
35370
35371 /* pcmpestr[im] insns. */
35372 for (i = 0, d = bdesc_pcmpestr;
35373 i < ARRAY_SIZE (bdesc_pcmpestr);
35374 i++, d++)
35375 {
35376 if (d->code == IX86_BUILTIN_PCMPESTRM128)
35377 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
35378 else
35379 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
35380 def_builtin_const (d->mask, d->name, ftype, d->code);
35381 }
35382
35383 /* pcmpistr[im] insns. */
35384 for (i = 0, d = bdesc_pcmpistr;
35385 i < ARRAY_SIZE (bdesc_pcmpistr);
35386 i++, d++)
35387 {
35388 if (d->code == IX86_BUILTIN_PCMPISTRM128)
35389 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
35390 else
35391 ftype = INT_FTYPE_V16QI_V16QI_INT;
35392 def_builtin_const (d->mask, d->name, ftype, d->code);
35393 }
35394
35395 /* comi/ucomi insns. */
35396 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
35397 {
35398 if (d->mask == OPTION_MASK_ISA_SSE2)
35399 ftype = INT_FTYPE_V2DF_V2DF;
35400 else
35401 ftype = INT_FTYPE_V4SF_V4SF;
35402 def_builtin_const (d->mask, d->name, ftype, d->code);
35403 }
35404
35405 /* SSE */
35406 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
35407 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
35408 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
35409 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
35410
35411 /* SSE or 3DNow!A */
35412 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
35413 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
35414 IX86_BUILTIN_MASKMOVQ);
35415
35416 /* SSE2 */
35417 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
35418 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
35419
35420 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
35421 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
35422 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
35423 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
35424
35425 /* SSE3. */
35426 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
35427 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
35428 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
35429 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
35430
35431 /* AES */
35432 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
35433 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
35434 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
35435 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
35436 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
35437 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
35438 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
35439 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
35440 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
35441 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
35442 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
35443 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
35444
35445 /* PCLMUL */
35446 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
35447 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
35448
35449 /* RDRND */
35450 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
35451 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
35452 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
35453 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
35454 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
35455 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
35456 IX86_BUILTIN_RDRAND64_STEP);
35457
35458 /* AVX2 */
35459 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
35460 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
35461 IX86_BUILTIN_GATHERSIV2DF);
35462
35463 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
35464 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
35465 IX86_BUILTIN_GATHERSIV4DF);
35466
35467 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
35468 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
35469 IX86_BUILTIN_GATHERDIV2DF);
35470
35471 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
35472 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
35473 IX86_BUILTIN_GATHERDIV4DF);
35474
35475 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
35476 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
35477 IX86_BUILTIN_GATHERSIV4SF);
35478
35479 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
35480 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
35481 IX86_BUILTIN_GATHERSIV8SF);
35482
35483 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
35484 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
35485 IX86_BUILTIN_GATHERDIV4SF);
35486
35487 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
35488 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
35489 IX86_BUILTIN_GATHERDIV8SF);
35490
35491 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
35492 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
35493 IX86_BUILTIN_GATHERSIV2DI);
35494
35495 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
35496 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
35497 IX86_BUILTIN_GATHERSIV4DI);
35498
35499 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
35500 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
35501 IX86_BUILTIN_GATHERDIV2DI);
35502
35503 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
35504 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
35505 IX86_BUILTIN_GATHERDIV4DI);
35506
35507 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
35508 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
35509 IX86_BUILTIN_GATHERSIV4SI);
35510
35511 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
35512 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
35513 IX86_BUILTIN_GATHERSIV8SI);
35514
35515 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
35516 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
35517 IX86_BUILTIN_GATHERDIV4SI);
35518
35519 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
35520 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
35521 IX86_BUILTIN_GATHERDIV8SI);
35522
35523 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
35524 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
35525 IX86_BUILTIN_GATHERALTSIV4DF);
35526
35527 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
35528 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
35529 IX86_BUILTIN_GATHERALTDIV8SF);
35530
35531 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
35532 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
35533 IX86_BUILTIN_GATHERALTSIV4DI);
35534
35535 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
35536 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
35537 IX86_BUILTIN_GATHERALTDIV8SI);
35538
35539 /* AVX512F */
35540 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
35541 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
35542 IX86_BUILTIN_GATHER3SIV16SF);
35543
35544 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
35545 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
35546 IX86_BUILTIN_GATHER3SIV8DF);
35547
35548 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
35549 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
35550 IX86_BUILTIN_GATHER3DIV16SF);
35551
35552 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
35553 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
35554 IX86_BUILTIN_GATHER3DIV8DF);
35555
35556 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
35557 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
35558 IX86_BUILTIN_GATHER3SIV16SI);
35559
35560 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
35561 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
35562 IX86_BUILTIN_GATHER3SIV8DI);
35563
35564 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
35565 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
35566 IX86_BUILTIN_GATHER3DIV16SI);
35567
35568 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
35569 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
35570 IX86_BUILTIN_GATHER3DIV8DI);
35571
35572 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
35573 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
35574 IX86_BUILTIN_GATHER3ALTSIV8DF);
35575
35576 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
35577 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
35578 IX86_BUILTIN_GATHER3ALTDIV16SF);
35579
35580 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
35581 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
35582 IX86_BUILTIN_GATHER3ALTSIV8DI);
35583
35584 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
35585 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
35586 IX86_BUILTIN_GATHER3ALTDIV16SI);
35587
35588 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
35589 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
35590 IX86_BUILTIN_SCATTERSIV16SF);
35591
35592 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
35593 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
35594 IX86_BUILTIN_SCATTERSIV8DF);
35595
35596 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
35597 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
35598 IX86_BUILTIN_SCATTERDIV16SF);
35599
35600 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
35601 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
35602 IX86_BUILTIN_SCATTERDIV8DF);
35603
35604 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
35605 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
35606 IX86_BUILTIN_SCATTERSIV16SI);
35607
35608 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
35609 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
35610 IX86_BUILTIN_SCATTERSIV8DI);
35611
35612 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
35613 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
35614 IX86_BUILTIN_SCATTERDIV16SI);
35615
35616 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
35617 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
35618 IX86_BUILTIN_SCATTERDIV8DI);
35619
35620 /* AVX512VL */
35621 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
35622 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_QI_INT,
35623 IX86_BUILTIN_GATHER3SIV2DF);
35624
35625 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
35626 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_QI_INT,
35627 IX86_BUILTIN_GATHER3SIV4DF);
35628
35629 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
35630 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_QI_INT,
35631 IX86_BUILTIN_GATHER3DIV2DF);
35632
35633 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
35634 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_QI_INT,
35635 IX86_BUILTIN_GATHER3DIV4DF);
35636
35637 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
35638 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_QI_INT,
35639 IX86_BUILTIN_GATHER3SIV4SF);
35640
35641 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
35642 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_QI_INT,
35643 IX86_BUILTIN_GATHER3SIV8SF);
35644
35645 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
35646 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_QI_INT,
35647 IX86_BUILTIN_GATHER3DIV4SF);
35648
35649 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
35650 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_QI_INT,
35651 IX86_BUILTIN_GATHER3DIV8SF);
35652
35653 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
35654 V2DI_FTYPE_V2DI_PCINT64_V4SI_QI_INT,
35655 IX86_BUILTIN_GATHER3SIV2DI);
35656
35657 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
35658 V4DI_FTYPE_V4DI_PCINT64_V4SI_QI_INT,
35659 IX86_BUILTIN_GATHER3SIV4DI);
35660
35661 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
35662 V2DI_FTYPE_V2DI_PCINT64_V2DI_QI_INT,
35663 IX86_BUILTIN_GATHER3DIV2DI);
35664
35665 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
35666 V4DI_FTYPE_V4DI_PCINT64_V4DI_QI_INT,
35667 IX86_BUILTIN_GATHER3DIV4DI);
35668
35669 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
35670 V4SI_FTYPE_V4SI_PCINT_V4SI_QI_INT,
35671 IX86_BUILTIN_GATHER3SIV4SI);
35672
35673 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
35674 V8SI_FTYPE_V8SI_PCINT_V8SI_QI_INT,
35675 IX86_BUILTIN_GATHER3SIV8SI);
35676
35677 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
35678 V4SI_FTYPE_V4SI_PCINT_V2DI_QI_INT,
35679 IX86_BUILTIN_GATHER3DIV4SI);
35680
35681 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
35682 V4SI_FTYPE_V4SI_PCINT_V4DI_QI_INT,
35683 IX86_BUILTIN_GATHER3DIV8SI);
35684
35685 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
35686 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
35687 IX86_BUILTIN_GATHER3ALTSIV4DF);
35688
35689 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
35690 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
35691 IX86_BUILTIN_GATHER3ALTDIV8SF);
35692
35693 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
35694 V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
35695 IX86_BUILTIN_GATHER3ALTSIV4DI);
35696
35697 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
35698 V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
35699 IX86_BUILTIN_GATHER3ALTDIV8SI);
35700
35701 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
35702 VOID_FTYPE_PFLOAT_QI_V8SI_V8SF_INT,
35703 IX86_BUILTIN_SCATTERSIV8SF);
35704
35705 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
35706 VOID_FTYPE_PFLOAT_QI_V4SI_V4SF_INT,
35707 IX86_BUILTIN_SCATTERSIV4SF);
35708
35709 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
35710 VOID_FTYPE_PDOUBLE_QI_V4SI_V4DF_INT,
35711 IX86_BUILTIN_SCATTERSIV4DF);
35712
35713 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
35714 VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT,
35715 IX86_BUILTIN_SCATTERSIV2DF);
35716
35717 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
35718 VOID_FTYPE_PFLOAT_QI_V4DI_V4SF_INT,
35719 IX86_BUILTIN_SCATTERDIV8SF);
35720
35721 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
35722 VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT,
35723 IX86_BUILTIN_SCATTERDIV4SF);
35724
35725 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
35726 VOID_FTYPE_PDOUBLE_QI_V4DI_V4DF_INT,
35727 IX86_BUILTIN_SCATTERDIV4DF);
35728
35729 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
35730 VOID_FTYPE_PDOUBLE_QI_V2DI_V2DF_INT,
35731 IX86_BUILTIN_SCATTERDIV2DF);
35732
35733 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
35734 VOID_FTYPE_PINT_QI_V8SI_V8SI_INT,
35735 IX86_BUILTIN_SCATTERSIV8SI);
35736
35737 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
35738 VOID_FTYPE_PINT_QI_V4SI_V4SI_INT,
35739 IX86_BUILTIN_SCATTERSIV4SI);
35740
35741 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
35742 VOID_FTYPE_PLONGLONG_QI_V4SI_V4DI_INT,
35743 IX86_BUILTIN_SCATTERSIV4DI);
35744
35745 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
35746 VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT,
35747 IX86_BUILTIN_SCATTERSIV2DI);
35748
35749 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
35750 VOID_FTYPE_PINT_QI_V4DI_V4SI_INT,
35751 IX86_BUILTIN_SCATTERDIV8SI);
35752
35753 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
35754 VOID_FTYPE_PINT_QI_V2DI_V4SI_INT,
35755 IX86_BUILTIN_SCATTERDIV4SI);
35756
35757 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
35758 VOID_FTYPE_PLONGLONG_QI_V4DI_V4DI_INT,
35759 IX86_BUILTIN_SCATTERDIV4DI);
35760
35761 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
35762 VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT,
35763 IX86_BUILTIN_SCATTERDIV2DI);
35764 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltsiv8df ",
35765 VOID_FTYPE_PDOUBLE_QI_V16SI_V8DF_INT,
35766 IX86_BUILTIN_SCATTERALTSIV8DF);
35767
35768 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv8sf ",
35769 VOID_FTYPE_PFLOAT_HI_V8DI_V16SF_INT,
35770 IX86_BUILTIN_SCATTERALTDIV16SF);
35771
35772 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltsiv8di ",
35773 VOID_FTYPE_PLONGLONG_QI_V16SI_V8DI_INT,
35774 IX86_BUILTIN_SCATTERALTSIV8DI);
35775
35776 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv8si ",
35777 VOID_FTYPE_PINT_HI_V8DI_V16SI_INT,
35778 IX86_BUILTIN_SCATTERALTDIV16SI);
35779
35780 /* AVX512PF */
35781 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
35782 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
35783 IX86_BUILTIN_GATHERPFDPD);
35784 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
35785 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
35786 IX86_BUILTIN_GATHERPFDPS);
35787 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
35788 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
35789 IX86_BUILTIN_GATHERPFQPD);
35790 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
35791 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
35792 IX86_BUILTIN_GATHERPFQPS);
35793 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
35794 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
35795 IX86_BUILTIN_SCATTERPFDPD);
35796 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
35797 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
35798 IX86_BUILTIN_SCATTERPFDPS);
35799 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
35800 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
35801 IX86_BUILTIN_SCATTERPFQPD);
35802 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
35803 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
35804 IX86_BUILTIN_SCATTERPFQPS);
35805
35806 /* SHA */
35807 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
35808 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
35809 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
35810 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
35811 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
35812 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
35813 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
35814 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
35815 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
35816 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
35817 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
35818 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
35819 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
35820 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
35821
35822 /* RTM. */
35823 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
35824 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
35825
35826 /* MMX access to the vec_init patterns. */
35827 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
35828 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
35829
35830 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
35831 V4HI_FTYPE_HI_HI_HI_HI,
35832 IX86_BUILTIN_VEC_INIT_V4HI);
35833
35834 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
35835 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
35836 IX86_BUILTIN_VEC_INIT_V8QI);
35837
35838 /* Access to the vec_extract patterns. */
35839 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
35840 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
35841 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
35842 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
35843 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
35844 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
35845 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
35846 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
35847 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
35848 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
35849
35850 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
35851 "__builtin_ia32_vec_ext_v4hi",
35852 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
35853
35854 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
35855 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
35856
35857 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
35858 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
35859
35860 /* Access to the vec_set patterns. */
35861 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
35862 "__builtin_ia32_vec_set_v2di",
35863 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
35864
35865 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
35866 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
35867
35868 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
35869 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
35870
35871 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
35872 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
35873
35874 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
35875 "__builtin_ia32_vec_set_v4hi",
35876 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
35877
35878 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
35879 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
35880
35881 /* RDSEED */
35882 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
35883 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
35884 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
35885 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
35886 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
35887 "__builtin_ia32_rdseed_di_step",
35888 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
35889
35890 /* ADCX */
35891 def_builtin (0, "__builtin_ia32_addcarryx_u32",
35892 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
35893 def_builtin (OPTION_MASK_ISA_64BIT,
35894 "__builtin_ia32_addcarryx_u64",
35895 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
35896 IX86_BUILTIN_ADDCARRYX64);
35897
35898 /* SBB */
35899 def_builtin (0, "__builtin_ia32_sbb_u32",
35900 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
35901 def_builtin (OPTION_MASK_ISA_64BIT,
35902 "__builtin_ia32_sbb_u64",
35903 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
35904 IX86_BUILTIN_SBB64);
35905
35906 /* Read/write FLAGS. */
35907 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
35908 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
35909 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
35910 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
35911 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
35912 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
35913 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
35914 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
35915
35916 /* CLFLUSHOPT. */
35917 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
35918 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
35919
35920 /* CLWB. */
35921 def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb",
35922 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB);
35923
35924 /* MONITORX and MWAITX. */
35925 def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_monitorx",
35926 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITORX);
35927 def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_mwaitx",
35928 VOID_FTYPE_UNSIGNED_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAITX);
35929
35930 /* CLZERO. */
35931 def_builtin (OPTION_MASK_ISA_CLZERO, "__builtin_ia32_clzero",
35932 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLZERO);
35933
35934 /* Add FMA4 multi-arg argument instructions */
35935 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
35936 {
35937 if (d->name == 0)
35938 continue;
35939
35940 ftype = (enum ix86_builtin_func_type) d->flag;
35941 def_builtin_const (d->mask, d->name, ftype, d->code);
35942 }
35943 }
35944
35945 static void
35946 ix86_init_mpx_builtins ()
35947 {
35948 const struct builtin_description * d;
35949 enum ix86_builtin_func_type ftype;
35950 tree decl;
35951 size_t i;
35952
35953 for (i = 0, d = bdesc_mpx;
35954 i < ARRAY_SIZE (bdesc_mpx);
35955 i++, d++)
35956 {
35957 if (d->name == 0)
35958 continue;
35959
35960 ftype = (enum ix86_builtin_func_type) d->flag;
35961 decl = def_builtin (d->mask, d->name, ftype, d->code);
35962
35963 /* With no leaf and nothrow flags for MPX builtins
35964 abnormal edges may follow its call when setjmp
35965 presents in the function. Since we may have a lot
35966 of MPX builtins calls it causes lots of useless
35967 edges and enormous PHI nodes. To avoid this we mark
35968 MPX builtins as leaf and nothrow. */
35969 if (decl)
35970 {
35971 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
35972 NULL_TREE);
35973 TREE_NOTHROW (decl) = 1;
35974 }
35975 else
35976 {
35977 ix86_builtins_isa[(int)d->code].leaf_p = true;
35978 ix86_builtins_isa[(int)d->code].nothrow_p = true;
35979 }
35980 }
35981
35982 for (i = 0, d = bdesc_mpx_const;
35983 i < ARRAY_SIZE (bdesc_mpx_const);
35984 i++, d++)
35985 {
35986 if (d->name == 0)
35987 continue;
35988
35989 ftype = (enum ix86_builtin_func_type) d->flag;
35990 decl = def_builtin_const (d->mask, d->name, ftype, d->code);
35991
35992 if (decl)
35993 {
35994 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
35995 NULL_TREE);
35996 TREE_NOTHROW (decl) = 1;
35997 }
35998 else
35999 {
36000 ix86_builtins_isa[(int)d->code].leaf_p = true;
36001 ix86_builtins_isa[(int)d->code].nothrow_p = true;
36002 }
36003 }
36004 }
36005
36006 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
36007 to return a pointer to VERSION_DECL if the outcome of the expression
36008 formed by PREDICATE_CHAIN is true. This function will be called during
36009 version dispatch to decide which function version to execute. It returns
36010 the basic block at the end, to which more conditions can be added. */
36011
36012 static basic_block
36013 add_condition_to_bb (tree function_decl, tree version_decl,
36014 tree predicate_chain, basic_block new_bb)
36015 {
36016 gimple *return_stmt;
36017 tree convert_expr, result_var;
36018 gimple *convert_stmt;
36019 gimple *call_cond_stmt;
36020 gimple *if_else_stmt;
36021
36022 basic_block bb1, bb2, bb3;
36023 edge e12, e23;
36024
36025 tree cond_var, and_expr_var = NULL_TREE;
36026 gimple_seq gseq;
36027
36028 tree predicate_decl, predicate_arg;
36029
36030 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
36031
36032 gcc_assert (new_bb != NULL);
36033 gseq = bb_seq (new_bb);
36034
36035
36036 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
36037 build_fold_addr_expr (version_decl));
36038 result_var = create_tmp_var (ptr_type_node);
36039 convert_stmt = gimple_build_assign (result_var, convert_expr);
36040 return_stmt = gimple_build_return (result_var);
36041
36042 if (predicate_chain == NULL_TREE)
36043 {
36044 gimple_seq_add_stmt (&gseq, convert_stmt);
36045 gimple_seq_add_stmt (&gseq, return_stmt);
36046 set_bb_seq (new_bb, gseq);
36047 gimple_set_bb (convert_stmt, new_bb);
36048 gimple_set_bb (return_stmt, new_bb);
36049 pop_cfun ();
36050 return new_bb;
36051 }
36052
36053 while (predicate_chain != NULL)
36054 {
36055 cond_var = create_tmp_var (integer_type_node);
36056 predicate_decl = TREE_PURPOSE (predicate_chain);
36057 predicate_arg = TREE_VALUE (predicate_chain);
36058 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
36059 gimple_call_set_lhs (call_cond_stmt, cond_var);
36060
36061 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
36062 gimple_set_bb (call_cond_stmt, new_bb);
36063 gimple_seq_add_stmt (&gseq, call_cond_stmt);
36064
36065 predicate_chain = TREE_CHAIN (predicate_chain);
36066
36067 if (and_expr_var == NULL)
36068 and_expr_var = cond_var;
36069 else
36070 {
36071 gimple *assign_stmt;
36072 /* Use MIN_EXPR to check if any integer is zero?.
36073 and_expr_var = min_expr <cond_var, and_expr_var> */
36074 assign_stmt = gimple_build_assign (and_expr_var,
36075 build2 (MIN_EXPR, integer_type_node,
36076 cond_var, and_expr_var));
36077
36078 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
36079 gimple_set_bb (assign_stmt, new_bb);
36080 gimple_seq_add_stmt (&gseq, assign_stmt);
36081 }
36082 }
36083
36084 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
36085 integer_zero_node,
36086 NULL_TREE, NULL_TREE);
36087 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
36088 gimple_set_bb (if_else_stmt, new_bb);
36089 gimple_seq_add_stmt (&gseq, if_else_stmt);
36090
36091 gimple_seq_add_stmt (&gseq, convert_stmt);
36092 gimple_seq_add_stmt (&gseq, return_stmt);
36093 set_bb_seq (new_bb, gseq);
36094
36095 bb1 = new_bb;
36096 e12 = split_block (bb1, if_else_stmt);
36097 bb2 = e12->dest;
36098 e12->flags &= ~EDGE_FALLTHRU;
36099 e12->flags |= EDGE_TRUE_VALUE;
36100
36101 e23 = split_block (bb2, return_stmt);
36102
36103 gimple_set_bb (convert_stmt, bb2);
36104 gimple_set_bb (return_stmt, bb2);
36105
36106 bb3 = e23->dest;
36107 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
36108
36109 remove_edge (e23);
36110 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
36111
36112 pop_cfun ();
36113
36114 return bb3;
36115 }
36116
36117 /* This parses the attribute arguments to target in DECL and determines
36118 the right builtin to use to match the platform specification.
36119 It returns the priority value for this version decl. If PREDICATE_LIST
36120 is not NULL, it stores the list of cpu features that need to be checked
36121 before dispatching this function. */
36122
36123 static unsigned int
36124 get_builtin_code_for_version (tree decl, tree *predicate_list)
36125 {
36126 tree attrs;
36127 struct cl_target_option cur_target;
36128 tree target_node;
36129 struct cl_target_option *new_target;
36130 const char *arg_str = NULL;
36131 const char *attrs_str = NULL;
36132 char *tok_str = NULL;
36133 char *token;
36134
36135 /* Priority of i386 features, greater value is higher priority. This is
36136 used to decide the order in which function dispatch must happen. For
36137 instance, a version specialized for SSE4.2 should be checked for dispatch
36138 before a version for SSE3, as SSE4.2 implies SSE3. */
36139 enum feature_priority
36140 {
36141 P_ZERO = 0,
36142 P_MMX,
36143 P_SSE,
36144 P_SSE2,
36145 P_SSE3,
36146 P_SSSE3,
36147 P_PROC_SSSE3,
36148 P_SSE4_A,
36149 P_PROC_SSE4_A,
36150 P_SSE4_1,
36151 P_SSE4_2,
36152 P_PROC_SSE4_2,
36153 P_POPCNT,
36154 P_AES,
36155 P_PCLMUL,
36156 P_AVX,
36157 P_PROC_AVX,
36158 P_BMI,
36159 P_PROC_BMI,
36160 P_FMA4,
36161 P_XOP,
36162 P_PROC_XOP,
36163 P_FMA,
36164 P_PROC_FMA,
36165 P_BMI2,
36166 P_AVX2,
36167 P_PROC_AVX2,
36168 P_AVX512F,
36169 P_PROC_AVX512F
36170 };
36171
36172 enum feature_priority priority = P_ZERO;
36173
36174 /* These are the target attribute strings for which a dispatcher is
36175 available, from fold_builtin_cpu. */
36176
36177 static struct _feature_list
36178 {
36179 const char *const name;
36180 const enum feature_priority priority;
36181 }
36182 const feature_list[] =
36183 {
36184 {"mmx", P_MMX},
36185 {"sse", P_SSE},
36186 {"sse2", P_SSE2},
36187 {"sse3", P_SSE3},
36188 {"sse4a", P_SSE4_A},
36189 {"ssse3", P_SSSE3},
36190 {"sse4.1", P_SSE4_1},
36191 {"sse4.2", P_SSE4_2},
36192 {"popcnt", P_POPCNT},
36193 {"aes", P_AES},
36194 {"pclmul", P_PCLMUL},
36195 {"avx", P_AVX},
36196 {"bmi", P_BMI},
36197 {"fma4", P_FMA4},
36198 {"xop", P_XOP},
36199 {"fma", P_FMA},
36200 {"bmi2", P_BMI2},
36201 {"avx2", P_AVX2},
36202 {"avx512f", P_AVX512F}
36203 };
36204
36205
36206 static unsigned int NUM_FEATURES
36207 = sizeof (feature_list) / sizeof (struct _feature_list);
36208
36209 unsigned int i;
36210
36211 tree predicate_chain = NULL_TREE;
36212 tree predicate_decl, predicate_arg;
36213
36214 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
36215 gcc_assert (attrs != NULL);
36216
36217 attrs = TREE_VALUE (TREE_VALUE (attrs));
36218
36219 gcc_assert (TREE_CODE (attrs) == STRING_CST);
36220 attrs_str = TREE_STRING_POINTER (attrs);
36221
36222 /* Return priority zero for default function. */
36223 if (strcmp (attrs_str, "default") == 0)
36224 return 0;
36225
36226 /* Handle arch= if specified. For priority, set it to be 1 more than
36227 the best instruction set the processor can handle. For instance, if
36228 there is a version for atom and a version for ssse3 (the highest ISA
36229 priority for atom), the atom version must be checked for dispatch
36230 before the ssse3 version. */
36231 if (strstr (attrs_str, "arch=") != NULL)
36232 {
36233 cl_target_option_save (&cur_target, &global_options);
36234 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
36235 &global_options_set);
36236
36237 gcc_assert (target_node);
36238 new_target = TREE_TARGET_OPTION (target_node);
36239 gcc_assert (new_target);
36240
36241 if (new_target->arch_specified && new_target->arch > 0)
36242 {
36243 switch (new_target->arch)
36244 {
36245 case PROCESSOR_CORE2:
36246 arg_str = "core2";
36247 priority = P_PROC_SSSE3;
36248 break;
36249 case PROCESSOR_NEHALEM:
36250 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
36251 arg_str = "westmere";
36252 else
36253 /* We translate "arch=corei7" and "arch=nehalem" to
36254 "corei7" so that it will be mapped to M_INTEL_COREI7
36255 as cpu type to cover all M_INTEL_COREI7_XXXs. */
36256 arg_str = "corei7";
36257 priority = P_PROC_SSE4_2;
36258 break;
36259 case PROCESSOR_SANDYBRIDGE:
36260 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
36261 arg_str = "ivybridge";
36262 else
36263 arg_str = "sandybridge";
36264 priority = P_PROC_AVX;
36265 break;
36266 case PROCESSOR_HASWELL:
36267 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AVX512VL)
36268 arg_str = "skylake-avx512";
36269 else if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_XSAVES)
36270 arg_str = "skylake";
36271 else if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
36272 arg_str = "broadwell";
36273 else
36274 arg_str = "haswell";
36275 priority = P_PROC_AVX2;
36276 break;
36277 case PROCESSOR_BONNELL:
36278 arg_str = "bonnell";
36279 priority = P_PROC_SSSE3;
36280 break;
36281 case PROCESSOR_KNL:
36282 arg_str = "knl";
36283 priority = P_PROC_AVX512F;
36284 break;
36285 case PROCESSOR_SILVERMONT:
36286 arg_str = "silvermont";
36287 priority = P_PROC_SSE4_2;
36288 break;
36289 case PROCESSOR_AMDFAM10:
36290 arg_str = "amdfam10h";
36291 priority = P_PROC_SSE4_A;
36292 break;
36293 case PROCESSOR_BTVER1:
36294 arg_str = "btver1";
36295 priority = P_PROC_SSE4_A;
36296 break;
36297 case PROCESSOR_BTVER2:
36298 arg_str = "btver2";
36299 priority = P_PROC_BMI;
36300 break;
36301 case PROCESSOR_BDVER1:
36302 arg_str = "bdver1";
36303 priority = P_PROC_XOP;
36304 break;
36305 case PROCESSOR_BDVER2:
36306 arg_str = "bdver2";
36307 priority = P_PROC_FMA;
36308 break;
36309 case PROCESSOR_BDVER3:
36310 arg_str = "bdver3";
36311 priority = P_PROC_FMA;
36312 break;
36313 case PROCESSOR_BDVER4:
36314 arg_str = "bdver4";
36315 priority = P_PROC_AVX2;
36316 break;
36317 case PROCESSOR_ZNVER1:
36318 arg_str = "znver1";
36319 priority = P_PROC_AVX2;
36320 break;
36321 }
36322 }
36323
36324 cl_target_option_restore (&global_options, &cur_target);
36325
36326 if (predicate_list && arg_str == NULL)
36327 {
36328 error_at (DECL_SOURCE_LOCATION (decl),
36329 "No dispatcher found for the versioning attributes");
36330 return 0;
36331 }
36332
36333 if (predicate_list)
36334 {
36335 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
36336 /* For a C string literal the length includes the trailing NULL. */
36337 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
36338 predicate_chain = tree_cons (predicate_decl, predicate_arg,
36339 predicate_chain);
36340 }
36341 }
36342
36343 /* Process feature name. */
36344 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
36345 strcpy (tok_str, attrs_str);
36346 token = strtok (tok_str, ",");
36347 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
36348
36349 while (token != NULL)
36350 {
36351 /* Do not process "arch=" */
36352 if (strncmp (token, "arch=", 5) == 0)
36353 {
36354 token = strtok (NULL, ",");
36355 continue;
36356 }
36357 for (i = 0; i < NUM_FEATURES; ++i)
36358 {
36359 if (strcmp (token, feature_list[i].name) == 0)
36360 {
36361 if (predicate_list)
36362 {
36363 predicate_arg = build_string_literal (
36364 strlen (feature_list[i].name) + 1,
36365 feature_list[i].name);
36366 predicate_chain = tree_cons (predicate_decl, predicate_arg,
36367 predicate_chain);
36368 }
36369 /* Find the maximum priority feature. */
36370 if (feature_list[i].priority > priority)
36371 priority = feature_list[i].priority;
36372
36373 break;
36374 }
36375 }
36376 if (predicate_list && i == NUM_FEATURES)
36377 {
36378 error_at (DECL_SOURCE_LOCATION (decl),
36379 "No dispatcher found for %s", token);
36380 return 0;
36381 }
36382 token = strtok (NULL, ",");
36383 }
36384 free (tok_str);
36385
36386 if (predicate_list && predicate_chain == NULL_TREE)
36387 {
36388 error_at (DECL_SOURCE_LOCATION (decl),
36389 "No dispatcher found for the versioning attributes : %s",
36390 attrs_str);
36391 return 0;
36392 }
36393 else if (predicate_list)
36394 {
36395 predicate_chain = nreverse (predicate_chain);
36396 *predicate_list = predicate_chain;
36397 }
36398
36399 return priority;
36400 }
36401
36402 /* This compares the priority of target features in function DECL1
36403 and DECL2. It returns positive value if DECL1 is higher priority,
36404 negative value if DECL2 is higher priority and 0 if they are the
36405 same. */
36406
36407 static int
36408 ix86_compare_version_priority (tree decl1, tree decl2)
36409 {
36410 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
36411 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
36412
36413 return (int)priority1 - (int)priority2;
36414 }
36415
36416 /* V1 and V2 point to function versions with different priorities
36417 based on the target ISA. This function compares their priorities. */
36418
36419 static int
36420 feature_compare (const void *v1, const void *v2)
36421 {
36422 typedef struct _function_version_info
36423 {
36424 tree version_decl;
36425 tree predicate_chain;
36426 unsigned int dispatch_priority;
36427 } function_version_info;
36428
36429 const function_version_info c1 = *(const function_version_info *)v1;
36430 const function_version_info c2 = *(const function_version_info *)v2;
36431 return (c2.dispatch_priority - c1.dispatch_priority);
36432 }
36433
36434 /* This function generates the dispatch function for
36435 multi-versioned functions. DISPATCH_DECL is the function which will
36436 contain the dispatch logic. FNDECLS are the function choices for
36437 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
36438 in DISPATCH_DECL in which the dispatch code is generated. */
36439
36440 static int
36441 dispatch_function_versions (tree dispatch_decl,
36442 void *fndecls_p,
36443 basic_block *empty_bb)
36444 {
36445 tree default_decl;
36446 gimple *ifunc_cpu_init_stmt;
36447 gimple_seq gseq;
36448 int ix;
36449 tree ele;
36450 vec<tree> *fndecls;
36451 unsigned int num_versions = 0;
36452 unsigned int actual_versions = 0;
36453 unsigned int i;
36454
36455 struct _function_version_info
36456 {
36457 tree version_decl;
36458 tree predicate_chain;
36459 unsigned int dispatch_priority;
36460 }*function_version_info;
36461
36462 gcc_assert (dispatch_decl != NULL
36463 && fndecls_p != NULL
36464 && empty_bb != NULL);
36465
36466 /*fndecls_p is actually a vector. */
36467 fndecls = static_cast<vec<tree> *> (fndecls_p);
36468
36469 /* At least one more version other than the default. */
36470 num_versions = fndecls->length ();
36471 gcc_assert (num_versions >= 2);
36472
36473 function_version_info = (struct _function_version_info *)
36474 XNEWVEC (struct _function_version_info, (num_versions - 1));
36475
36476 /* The first version in the vector is the default decl. */
36477 default_decl = (*fndecls)[0];
36478
36479 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
36480
36481 gseq = bb_seq (*empty_bb);
36482 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
36483 constructors, so explicity call __builtin_cpu_init here. */
36484 ifunc_cpu_init_stmt = gimple_build_call_vec (
36485 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
36486 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
36487 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
36488 set_bb_seq (*empty_bb, gseq);
36489
36490 pop_cfun ();
36491
36492
36493 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
36494 {
36495 tree version_decl = ele;
36496 tree predicate_chain = NULL_TREE;
36497 unsigned int priority;
36498 /* Get attribute string, parse it and find the right predicate decl.
36499 The predicate function could be a lengthy combination of many
36500 features, like arch-type and various isa-variants. */
36501 priority = get_builtin_code_for_version (version_decl,
36502 &predicate_chain);
36503
36504 if (predicate_chain == NULL_TREE)
36505 continue;
36506
36507 function_version_info [actual_versions].version_decl = version_decl;
36508 function_version_info [actual_versions].predicate_chain
36509 = predicate_chain;
36510 function_version_info [actual_versions].dispatch_priority = priority;
36511 actual_versions++;
36512 }
36513
36514 /* Sort the versions according to descending order of dispatch priority. The
36515 priority is based on the ISA. This is not a perfect solution. There
36516 could still be ambiguity. If more than one function version is suitable
36517 to execute, which one should be dispatched? In future, allow the user
36518 to specify a dispatch priority next to the version. */
36519 qsort (function_version_info, actual_versions,
36520 sizeof (struct _function_version_info), feature_compare);
36521
36522 for (i = 0; i < actual_versions; ++i)
36523 *empty_bb = add_condition_to_bb (dispatch_decl,
36524 function_version_info[i].version_decl,
36525 function_version_info[i].predicate_chain,
36526 *empty_bb);
36527
36528 /* dispatch default version at the end. */
36529 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
36530 NULL, *empty_bb);
36531
36532 free (function_version_info);
36533 return 0;
36534 }
36535
36536 /* Comparator function to be used in qsort routine to sort attribute
36537 specification strings to "target". */
36538
36539 static int
36540 attr_strcmp (const void *v1, const void *v2)
36541 {
36542 const char *c1 = *(char *const*)v1;
36543 const char *c2 = *(char *const*)v2;
36544 return strcmp (c1, c2);
36545 }
36546
36547 /* ARGLIST is the argument to target attribute. This function tokenizes
36548 the comma separated arguments, sorts them and returns a string which
36549 is a unique identifier for the comma separated arguments. It also
36550 replaces non-identifier characters "=,-" with "_". */
36551
36552 static char *
36553 sorted_attr_string (tree arglist)
36554 {
36555 tree arg;
36556 size_t str_len_sum = 0;
36557 char **args = NULL;
36558 char *attr_str, *ret_str;
36559 char *attr = NULL;
36560 unsigned int argnum = 1;
36561 unsigned int i;
36562
36563 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
36564 {
36565 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
36566 size_t len = strlen (str);
36567 str_len_sum += len + 1;
36568 if (arg != arglist)
36569 argnum++;
36570 for (i = 0; i < strlen (str); i++)
36571 if (str[i] == ',')
36572 argnum++;
36573 }
36574
36575 attr_str = XNEWVEC (char, str_len_sum);
36576 str_len_sum = 0;
36577 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
36578 {
36579 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
36580 size_t len = strlen (str);
36581 memcpy (attr_str + str_len_sum, str, len);
36582 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
36583 str_len_sum += len + 1;
36584 }
36585
36586 /* Replace "=,-" with "_". */
36587 for (i = 0; i < strlen (attr_str); i++)
36588 if (attr_str[i] == '=' || attr_str[i]== '-')
36589 attr_str[i] = '_';
36590
36591 if (argnum == 1)
36592 return attr_str;
36593
36594 args = XNEWVEC (char *, argnum);
36595
36596 i = 0;
36597 attr = strtok (attr_str, ",");
36598 while (attr != NULL)
36599 {
36600 args[i] = attr;
36601 i++;
36602 attr = strtok (NULL, ",");
36603 }
36604
36605 qsort (args, argnum, sizeof (char *), attr_strcmp);
36606
36607 ret_str = XNEWVEC (char, str_len_sum);
36608 str_len_sum = 0;
36609 for (i = 0; i < argnum; i++)
36610 {
36611 size_t len = strlen (args[i]);
36612 memcpy (ret_str + str_len_sum, args[i], len);
36613 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
36614 str_len_sum += len + 1;
36615 }
36616
36617 XDELETEVEC (args);
36618 XDELETEVEC (attr_str);
36619 return ret_str;
36620 }
36621
36622 /* This function changes the assembler name for functions that are
36623 versions. If DECL is a function version and has a "target"
36624 attribute, it appends the attribute string to its assembler name. */
36625
36626 static tree
36627 ix86_mangle_function_version_assembler_name (tree decl, tree id)
36628 {
36629 tree version_attr;
36630 const char *orig_name, *version_string;
36631 char *attr_str, *assembler_name;
36632
36633 if (DECL_DECLARED_INLINE_P (decl)
36634 && lookup_attribute ("gnu_inline",
36635 DECL_ATTRIBUTES (decl)))
36636 error_at (DECL_SOURCE_LOCATION (decl),
36637 "Function versions cannot be marked as gnu_inline,"
36638 " bodies have to be generated");
36639
36640 if (DECL_VIRTUAL_P (decl)
36641 || DECL_VINDEX (decl))
36642 sorry ("Virtual function multiversioning not supported");
36643
36644 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
36645
36646 /* target attribute string cannot be NULL. */
36647 gcc_assert (version_attr != NULL_TREE);
36648
36649 orig_name = IDENTIFIER_POINTER (id);
36650 version_string
36651 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
36652
36653 if (strcmp (version_string, "default") == 0)
36654 return id;
36655
36656 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
36657 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
36658
36659 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
36660
36661 /* Allow assembler name to be modified if already set. */
36662 if (DECL_ASSEMBLER_NAME_SET_P (decl))
36663 SET_DECL_RTL (decl, NULL);
36664
36665 tree ret = get_identifier (assembler_name);
36666 XDELETEVEC (attr_str);
36667 XDELETEVEC (assembler_name);
36668 return ret;
36669 }
36670
36671 /* This function returns true if FN1 and FN2 are versions of the same function,
36672 that is, the target strings of the function decls are different. This assumes
36673 that FN1 and FN2 have the same signature. */
36674
36675 static bool
36676 ix86_function_versions (tree fn1, tree fn2)
36677 {
36678 tree attr1, attr2;
36679 char *target1, *target2;
36680 bool result;
36681
36682 if (TREE_CODE (fn1) != FUNCTION_DECL
36683 || TREE_CODE (fn2) != FUNCTION_DECL)
36684 return false;
36685
36686 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
36687 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
36688
36689 /* At least one function decl should have the target attribute specified. */
36690 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
36691 return false;
36692
36693 /* Diagnose missing target attribute if one of the decls is already
36694 multi-versioned. */
36695 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
36696 {
36697 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
36698 {
36699 if (attr2 != NULL_TREE)
36700 {
36701 std::swap (fn1, fn2);
36702 attr1 = attr2;
36703 }
36704 error_at (DECL_SOURCE_LOCATION (fn2),
36705 "missing %<target%> attribute for multi-versioned %D",
36706 fn2);
36707 inform (DECL_SOURCE_LOCATION (fn1),
36708 "previous declaration of %D", fn1);
36709 /* Prevent diagnosing of the same error multiple times. */
36710 DECL_ATTRIBUTES (fn2)
36711 = tree_cons (get_identifier ("target"),
36712 copy_node (TREE_VALUE (attr1)),
36713 DECL_ATTRIBUTES (fn2));
36714 }
36715 return false;
36716 }
36717
36718 target1 = sorted_attr_string (TREE_VALUE (attr1));
36719 target2 = sorted_attr_string (TREE_VALUE (attr2));
36720
36721 /* The sorted target strings must be different for fn1 and fn2
36722 to be versions. */
36723 if (strcmp (target1, target2) == 0)
36724 result = false;
36725 else
36726 result = true;
36727
36728 XDELETEVEC (target1);
36729 XDELETEVEC (target2);
36730
36731 return result;
36732 }
36733
36734 static tree
36735 ix86_mangle_decl_assembler_name (tree decl, tree id)
36736 {
36737 /* For function version, add the target suffix to the assembler name. */
36738 if (TREE_CODE (decl) == FUNCTION_DECL
36739 && DECL_FUNCTION_VERSIONED (decl))
36740 id = ix86_mangle_function_version_assembler_name (decl, id);
36741 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
36742 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
36743 #endif
36744
36745 return id;
36746 }
36747
36748 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
36749 is true, append the full path name of the source file. */
36750
36751 static char *
36752 make_name (tree decl, const char *suffix, bool make_unique)
36753 {
36754 char *global_var_name;
36755 int name_len;
36756 const char *name;
36757 const char *unique_name = NULL;
36758
36759 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
36760
36761 /* Get a unique name that can be used globally without any chances
36762 of collision at link time. */
36763 if (make_unique)
36764 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
36765
36766 name_len = strlen (name) + strlen (suffix) + 2;
36767
36768 if (make_unique)
36769 name_len += strlen (unique_name) + 1;
36770 global_var_name = XNEWVEC (char, name_len);
36771
36772 /* Use '.' to concatenate names as it is demangler friendly. */
36773 if (make_unique)
36774 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
36775 suffix);
36776 else
36777 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
36778
36779 return global_var_name;
36780 }
36781
36782 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
36783
36784 /* Make a dispatcher declaration for the multi-versioned function DECL.
36785 Calls to DECL function will be replaced with calls to the dispatcher
36786 by the front-end. Return the decl created. */
36787
36788 static tree
36789 make_dispatcher_decl (const tree decl)
36790 {
36791 tree func_decl;
36792 char *func_name;
36793 tree fn_type, func_type;
36794 bool is_uniq = false;
36795
36796 if (TREE_PUBLIC (decl) == 0)
36797 is_uniq = true;
36798
36799 func_name = make_name (decl, "ifunc", is_uniq);
36800
36801 fn_type = TREE_TYPE (decl);
36802 func_type = build_function_type (TREE_TYPE (fn_type),
36803 TYPE_ARG_TYPES (fn_type));
36804
36805 func_decl = build_fn_decl (func_name, func_type);
36806 XDELETEVEC (func_name);
36807 TREE_USED (func_decl) = 1;
36808 DECL_CONTEXT (func_decl) = NULL_TREE;
36809 DECL_INITIAL (func_decl) = error_mark_node;
36810 DECL_ARTIFICIAL (func_decl) = 1;
36811 /* Mark this func as external, the resolver will flip it again if
36812 it gets generated. */
36813 DECL_EXTERNAL (func_decl) = 1;
36814 /* This will be of type IFUNCs have to be externally visible. */
36815 TREE_PUBLIC (func_decl) = 1;
36816
36817 return func_decl;
36818 }
36819
36820 #endif
36821
36822 /* Returns true if decl is multi-versioned and DECL is the default function,
36823 that is it is not tagged with target specific optimization. */
36824
36825 static bool
36826 is_function_default_version (const tree decl)
36827 {
36828 if (TREE_CODE (decl) != FUNCTION_DECL
36829 || !DECL_FUNCTION_VERSIONED (decl))
36830 return false;
36831 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
36832 gcc_assert (attr);
36833 attr = TREE_VALUE (TREE_VALUE (attr));
36834 return (TREE_CODE (attr) == STRING_CST
36835 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
36836 }
36837
36838 /* Make a dispatcher declaration for the multi-versioned function DECL.
36839 Calls to DECL function will be replaced with calls to the dispatcher
36840 by the front-end. Returns the decl of the dispatcher function. */
36841
36842 static tree
36843 ix86_get_function_versions_dispatcher (void *decl)
36844 {
36845 tree fn = (tree) decl;
36846 struct cgraph_node *node = NULL;
36847 struct cgraph_node *default_node = NULL;
36848 struct cgraph_function_version_info *node_v = NULL;
36849 struct cgraph_function_version_info *first_v = NULL;
36850
36851 tree dispatch_decl = NULL;
36852
36853 struct cgraph_function_version_info *default_version_info = NULL;
36854
36855 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
36856
36857 node = cgraph_node::get (fn);
36858 gcc_assert (node != NULL);
36859
36860 node_v = node->function_version ();
36861 gcc_assert (node_v != NULL);
36862
36863 if (node_v->dispatcher_resolver != NULL)
36864 return node_v->dispatcher_resolver;
36865
36866 /* Find the default version and make it the first node. */
36867 first_v = node_v;
36868 /* Go to the beginning of the chain. */
36869 while (first_v->prev != NULL)
36870 first_v = first_v->prev;
36871 default_version_info = first_v;
36872 while (default_version_info != NULL)
36873 {
36874 if (is_function_default_version
36875 (default_version_info->this_node->decl))
36876 break;
36877 default_version_info = default_version_info->next;
36878 }
36879
36880 /* If there is no default node, just return NULL. */
36881 if (default_version_info == NULL)
36882 return NULL;
36883
36884 /* Make default info the first node. */
36885 if (first_v != default_version_info)
36886 {
36887 default_version_info->prev->next = default_version_info->next;
36888 if (default_version_info->next)
36889 default_version_info->next->prev = default_version_info->prev;
36890 first_v->prev = default_version_info;
36891 default_version_info->next = first_v;
36892 default_version_info->prev = NULL;
36893 }
36894
36895 default_node = default_version_info->this_node;
36896
36897 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
36898 if (targetm.has_ifunc_p ())
36899 {
36900 struct cgraph_function_version_info *it_v = NULL;
36901 struct cgraph_node *dispatcher_node = NULL;
36902 struct cgraph_function_version_info *dispatcher_version_info = NULL;
36903
36904 /* Right now, the dispatching is done via ifunc. */
36905 dispatch_decl = make_dispatcher_decl (default_node->decl);
36906
36907 dispatcher_node = cgraph_node::get_create (dispatch_decl);
36908 gcc_assert (dispatcher_node != NULL);
36909 dispatcher_node->dispatcher_function = 1;
36910 dispatcher_version_info
36911 = dispatcher_node->insert_new_function_version ();
36912 dispatcher_version_info->next = default_version_info;
36913 dispatcher_node->definition = 1;
36914
36915 /* Set the dispatcher for all the versions. */
36916 it_v = default_version_info;
36917 while (it_v != NULL)
36918 {
36919 it_v->dispatcher_resolver = dispatch_decl;
36920 it_v = it_v->next;
36921 }
36922 }
36923 else
36924 #endif
36925 {
36926 error_at (DECL_SOURCE_LOCATION (default_node->decl),
36927 "multiversioning needs ifunc which is not supported "
36928 "on this target");
36929 }
36930
36931 return dispatch_decl;
36932 }
36933
36934 /* Make the resolver function decl to dispatch the versions of
36935 a multi-versioned function, DEFAULT_DECL. Create an
36936 empty basic block in the resolver and store the pointer in
36937 EMPTY_BB. Return the decl of the resolver function. */
36938
36939 static tree
36940 make_resolver_func (const tree default_decl,
36941 const tree dispatch_decl,
36942 basic_block *empty_bb)
36943 {
36944 char *resolver_name;
36945 tree decl, type, decl_name, t;
36946 bool is_uniq = false;
36947
36948 /* IFUNC's have to be globally visible. So, if the default_decl is
36949 not, then the name of the IFUNC should be made unique. */
36950 if (TREE_PUBLIC (default_decl) == 0)
36951 is_uniq = true;
36952
36953 /* Append the filename to the resolver function if the versions are
36954 not externally visible. This is because the resolver function has
36955 to be externally visible for the loader to find it. So, appending
36956 the filename will prevent conflicts with a resolver function from
36957 another module which is based on the same version name. */
36958 resolver_name = make_name (default_decl, "resolver", is_uniq);
36959
36960 /* The resolver function should return a (void *). */
36961 type = build_function_type_list (ptr_type_node, NULL_TREE);
36962
36963 decl = build_fn_decl (resolver_name, type);
36964 decl_name = get_identifier (resolver_name);
36965 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
36966
36967 DECL_NAME (decl) = decl_name;
36968 TREE_USED (decl) = 1;
36969 DECL_ARTIFICIAL (decl) = 1;
36970 DECL_IGNORED_P (decl) = 0;
36971 /* IFUNC resolvers have to be externally visible. */
36972 TREE_PUBLIC (decl) = 1;
36973 DECL_UNINLINABLE (decl) = 1;
36974
36975 /* Resolver is not external, body is generated. */
36976 DECL_EXTERNAL (decl) = 0;
36977 DECL_EXTERNAL (dispatch_decl) = 0;
36978
36979 DECL_CONTEXT (decl) = NULL_TREE;
36980 DECL_INITIAL (decl) = make_node (BLOCK);
36981 DECL_STATIC_CONSTRUCTOR (decl) = 0;
36982
36983 if (DECL_COMDAT_GROUP (default_decl)
36984 || TREE_PUBLIC (default_decl))
36985 {
36986 /* In this case, each translation unit with a call to this
36987 versioned function will put out a resolver. Ensure it
36988 is comdat to keep just one copy. */
36989 DECL_COMDAT (decl) = 1;
36990 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
36991 }
36992 /* Build result decl and add to function_decl. */
36993 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
36994 DECL_ARTIFICIAL (t) = 1;
36995 DECL_IGNORED_P (t) = 1;
36996 DECL_RESULT (decl) = t;
36997
36998 gimplify_function_tree (decl);
36999 push_cfun (DECL_STRUCT_FUNCTION (decl));
37000 *empty_bb = init_lowered_empty_function (decl, false, 0);
37001
37002 cgraph_node::add_new_function (decl, true);
37003 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
37004
37005 pop_cfun ();
37006
37007 gcc_assert (dispatch_decl != NULL);
37008 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
37009 DECL_ATTRIBUTES (dispatch_decl)
37010 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
37011
37012 /* Create the alias for dispatch to resolver here. */
37013 /*cgraph_create_function_alias (dispatch_decl, decl);*/
37014 cgraph_node::create_same_body_alias (dispatch_decl, decl);
37015 XDELETEVEC (resolver_name);
37016 return decl;
37017 }
37018
37019 /* Generate the dispatching code body to dispatch multi-versioned function
37020 DECL. The target hook is called to process the "target" attributes and
37021 provide the code to dispatch the right function at run-time. NODE points
37022 to the dispatcher decl whose body will be created. */
37023
37024 static tree
37025 ix86_generate_version_dispatcher_body (void *node_p)
37026 {
37027 tree resolver_decl;
37028 basic_block empty_bb;
37029 tree default_ver_decl;
37030 struct cgraph_node *versn;
37031 struct cgraph_node *node;
37032
37033 struct cgraph_function_version_info *node_version_info = NULL;
37034 struct cgraph_function_version_info *versn_info = NULL;
37035
37036 node = (cgraph_node *)node_p;
37037
37038 node_version_info = node->function_version ();
37039 gcc_assert (node->dispatcher_function
37040 && node_version_info != NULL);
37041
37042 if (node_version_info->dispatcher_resolver)
37043 return node_version_info->dispatcher_resolver;
37044
37045 /* The first version in the chain corresponds to the default version. */
37046 default_ver_decl = node_version_info->next->this_node->decl;
37047
37048 /* node is going to be an alias, so remove the finalized bit. */
37049 node->definition = false;
37050
37051 resolver_decl = make_resolver_func (default_ver_decl,
37052 node->decl, &empty_bb);
37053
37054 node_version_info->dispatcher_resolver = resolver_decl;
37055
37056 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
37057
37058 auto_vec<tree, 2> fn_ver_vec;
37059
37060 for (versn_info = node_version_info->next; versn_info;
37061 versn_info = versn_info->next)
37062 {
37063 versn = versn_info->this_node;
37064 /* Check for virtual functions here again, as by this time it should
37065 have been determined if this function needs a vtable index or
37066 not. This happens for methods in derived classes that override
37067 virtual methods in base classes but are not explicitly marked as
37068 virtual. */
37069 if (DECL_VINDEX (versn->decl))
37070 sorry ("Virtual function multiversioning not supported");
37071
37072 fn_ver_vec.safe_push (versn->decl);
37073 }
37074
37075 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
37076 cgraph_edge::rebuild_edges ();
37077 pop_cfun ();
37078 return resolver_decl;
37079 }
37080 /* This builds the processor_model struct type defined in
37081 libgcc/config/i386/cpuinfo.c */
37082
37083 static tree
37084 build_processor_model_struct (void)
37085 {
37086 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
37087 "__cpu_features"};
37088 tree field = NULL_TREE, field_chain = NULL_TREE;
37089 int i;
37090 tree type = make_node (RECORD_TYPE);
37091
37092 /* The first 3 fields are unsigned int. */
37093 for (i = 0; i < 3; ++i)
37094 {
37095 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
37096 get_identifier (field_name[i]), unsigned_type_node);
37097 if (field_chain != NULL_TREE)
37098 DECL_CHAIN (field) = field_chain;
37099 field_chain = field;
37100 }
37101
37102 /* The last field is an array of unsigned integers of size one. */
37103 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
37104 get_identifier (field_name[3]),
37105 build_array_type (unsigned_type_node,
37106 build_index_type (size_one_node)));
37107 if (field_chain != NULL_TREE)
37108 DECL_CHAIN (field) = field_chain;
37109 field_chain = field;
37110
37111 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
37112 return type;
37113 }
37114
37115 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
37116
37117 static tree
37118 make_var_decl (tree type, const char *name)
37119 {
37120 tree new_decl;
37121
37122 new_decl = build_decl (UNKNOWN_LOCATION,
37123 VAR_DECL,
37124 get_identifier(name),
37125 type);
37126
37127 DECL_EXTERNAL (new_decl) = 1;
37128 TREE_STATIC (new_decl) = 1;
37129 TREE_PUBLIC (new_decl) = 1;
37130 DECL_INITIAL (new_decl) = 0;
37131 DECL_ARTIFICIAL (new_decl) = 0;
37132 DECL_PRESERVE_P (new_decl) = 1;
37133
37134 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
37135 assemble_variable (new_decl, 0, 0, 0);
37136
37137 return new_decl;
37138 }
37139
37140 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
37141 into an integer defined in libgcc/config/i386/cpuinfo.c */
37142
37143 static tree
37144 fold_builtin_cpu (tree fndecl, tree *args)
37145 {
37146 unsigned int i;
37147 enum ix86_builtins fn_code = (enum ix86_builtins)
37148 DECL_FUNCTION_CODE (fndecl);
37149 tree param_string_cst = NULL;
37150
37151 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
37152 enum processor_features
37153 {
37154 F_CMOV = 0,
37155 F_MMX,
37156 F_POPCNT,
37157 F_SSE,
37158 F_SSE2,
37159 F_SSE3,
37160 F_SSSE3,
37161 F_SSE4_1,
37162 F_SSE4_2,
37163 F_AVX,
37164 F_AVX2,
37165 F_SSE4_A,
37166 F_FMA4,
37167 F_XOP,
37168 F_FMA,
37169 F_AVX512F,
37170 F_BMI,
37171 F_BMI2,
37172 F_AES,
37173 F_PCLMUL,
37174 F_AVX512VL,
37175 F_AVX512BW,
37176 F_AVX512DQ,
37177 F_AVX512CD,
37178 F_AVX512ER,
37179 F_AVX512PF,
37180 F_AVX512VBMI,
37181 F_AVX512IFMA,
37182 F_MAX
37183 };
37184
37185 /* These are the values for vendor types and cpu types and subtypes
37186 in cpuinfo.c. Cpu types and subtypes should be subtracted by
37187 the corresponding start value. */
37188 enum processor_model
37189 {
37190 M_INTEL = 1,
37191 M_AMD,
37192 M_CPU_TYPE_START,
37193 M_INTEL_BONNELL,
37194 M_INTEL_CORE2,
37195 M_INTEL_COREI7,
37196 M_AMDFAM10H,
37197 M_AMDFAM15H,
37198 M_INTEL_SILVERMONT,
37199 M_INTEL_KNL,
37200 M_AMD_BTVER1,
37201 M_AMD_BTVER2,
37202 M_CPU_SUBTYPE_START,
37203 M_INTEL_COREI7_NEHALEM,
37204 M_INTEL_COREI7_WESTMERE,
37205 M_INTEL_COREI7_SANDYBRIDGE,
37206 M_AMDFAM10H_BARCELONA,
37207 M_AMDFAM10H_SHANGHAI,
37208 M_AMDFAM10H_ISTANBUL,
37209 M_AMDFAM15H_BDVER1,
37210 M_AMDFAM15H_BDVER2,
37211 M_AMDFAM15H_BDVER3,
37212 M_AMDFAM15H_BDVER4,
37213 M_AMDFAM17H_ZNVER1,
37214 M_INTEL_COREI7_IVYBRIDGE,
37215 M_INTEL_COREI7_HASWELL,
37216 M_INTEL_COREI7_BROADWELL,
37217 M_INTEL_COREI7_SKYLAKE,
37218 M_INTEL_COREI7_SKYLAKE_AVX512
37219 };
37220
37221 static struct _arch_names_table
37222 {
37223 const char *const name;
37224 const enum processor_model model;
37225 }
37226 const arch_names_table[] =
37227 {
37228 {"amd", M_AMD},
37229 {"intel", M_INTEL},
37230 {"atom", M_INTEL_BONNELL},
37231 {"slm", M_INTEL_SILVERMONT},
37232 {"core2", M_INTEL_CORE2},
37233 {"corei7", M_INTEL_COREI7},
37234 {"nehalem", M_INTEL_COREI7_NEHALEM},
37235 {"westmere", M_INTEL_COREI7_WESTMERE},
37236 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
37237 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
37238 {"haswell", M_INTEL_COREI7_HASWELL},
37239 {"broadwell", M_INTEL_COREI7_BROADWELL},
37240 {"skylake", M_INTEL_COREI7_SKYLAKE},
37241 {"skylake-avx512", M_INTEL_COREI7_SKYLAKE_AVX512},
37242 {"bonnell", M_INTEL_BONNELL},
37243 {"silvermont", M_INTEL_SILVERMONT},
37244 {"knl", M_INTEL_KNL},
37245 {"amdfam10h", M_AMDFAM10H},
37246 {"barcelona", M_AMDFAM10H_BARCELONA},
37247 {"shanghai", M_AMDFAM10H_SHANGHAI},
37248 {"istanbul", M_AMDFAM10H_ISTANBUL},
37249 {"btver1", M_AMD_BTVER1},
37250 {"amdfam15h", M_AMDFAM15H},
37251 {"bdver1", M_AMDFAM15H_BDVER1},
37252 {"bdver2", M_AMDFAM15H_BDVER2},
37253 {"bdver3", M_AMDFAM15H_BDVER3},
37254 {"bdver4", M_AMDFAM15H_BDVER4},
37255 {"btver2", M_AMD_BTVER2},
37256 {"znver1", M_AMDFAM17H_ZNVER1},
37257 };
37258
37259 static struct _isa_names_table
37260 {
37261 const char *const name;
37262 const enum processor_features feature;
37263 }
37264 const isa_names_table[] =
37265 {
37266 {"cmov", F_CMOV},
37267 {"mmx", F_MMX},
37268 {"popcnt", F_POPCNT},
37269 {"sse", F_SSE},
37270 {"sse2", F_SSE2},
37271 {"sse3", F_SSE3},
37272 {"ssse3", F_SSSE3},
37273 {"sse4a", F_SSE4_A},
37274 {"sse4.1", F_SSE4_1},
37275 {"sse4.2", F_SSE4_2},
37276 {"avx", F_AVX},
37277 {"fma4", F_FMA4},
37278 {"xop", F_XOP},
37279 {"fma", F_FMA},
37280 {"avx2", F_AVX2},
37281 {"avx512f", F_AVX512F},
37282 {"bmi", F_BMI},
37283 {"bmi2", F_BMI2},
37284 {"aes", F_AES},
37285 {"pclmul", F_PCLMUL},
37286 {"avx512vl",F_AVX512VL},
37287 {"avx512bw",F_AVX512BW},
37288 {"avx512dq",F_AVX512DQ},
37289 {"avx512cd",F_AVX512CD},
37290 {"avx512er",F_AVX512ER},
37291 {"avx512pf",F_AVX512PF},
37292 {"avx512vbmi",F_AVX512VBMI},
37293 {"avx512ifma",F_AVX512IFMA},
37294 };
37295
37296 tree __processor_model_type = build_processor_model_struct ();
37297 tree __cpu_model_var = make_var_decl (__processor_model_type,
37298 "__cpu_model");
37299
37300
37301 varpool_node::add (__cpu_model_var);
37302
37303 gcc_assert ((args != NULL) && (*args != NULL));
37304
37305 param_string_cst = *args;
37306 while (param_string_cst
37307 && TREE_CODE (param_string_cst) != STRING_CST)
37308 {
37309 /* *args must be a expr that can contain other EXPRS leading to a
37310 STRING_CST. */
37311 if (!EXPR_P (param_string_cst))
37312 {
37313 error ("Parameter to builtin must be a string constant or literal");
37314 return integer_zero_node;
37315 }
37316 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
37317 }
37318
37319 gcc_assert (param_string_cst);
37320
37321 if (fn_code == IX86_BUILTIN_CPU_IS)
37322 {
37323 tree ref;
37324 tree field;
37325 tree final;
37326
37327 unsigned int field_val = 0;
37328 unsigned int NUM_ARCH_NAMES
37329 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
37330
37331 for (i = 0; i < NUM_ARCH_NAMES; i++)
37332 if (strcmp (arch_names_table[i].name,
37333 TREE_STRING_POINTER (param_string_cst)) == 0)
37334 break;
37335
37336 if (i == NUM_ARCH_NAMES)
37337 {
37338 error ("Parameter to builtin not valid: %s",
37339 TREE_STRING_POINTER (param_string_cst));
37340 return integer_zero_node;
37341 }
37342
37343 field = TYPE_FIELDS (__processor_model_type);
37344 field_val = arch_names_table[i].model;
37345
37346 /* CPU types are stored in the next field. */
37347 if (field_val > M_CPU_TYPE_START
37348 && field_val < M_CPU_SUBTYPE_START)
37349 {
37350 field = DECL_CHAIN (field);
37351 field_val -= M_CPU_TYPE_START;
37352 }
37353
37354 /* CPU subtypes are stored in the next field. */
37355 if (field_val > M_CPU_SUBTYPE_START)
37356 {
37357 field = DECL_CHAIN ( DECL_CHAIN (field));
37358 field_val -= M_CPU_SUBTYPE_START;
37359 }
37360
37361 /* Get the appropriate field in __cpu_model. */
37362 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
37363 field, NULL_TREE);
37364
37365 /* Check the value. */
37366 final = build2 (EQ_EXPR, unsigned_type_node, ref,
37367 build_int_cstu (unsigned_type_node, field_val));
37368 return build1 (CONVERT_EXPR, integer_type_node, final);
37369 }
37370 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
37371 {
37372 tree ref;
37373 tree array_elt;
37374 tree field;
37375 tree final;
37376
37377 unsigned int field_val = 0;
37378 unsigned int NUM_ISA_NAMES
37379 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
37380
37381 for (i = 0; i < NUM_ISA_NAMES; i++)
37382 if (strcmp (isa_names_table[i].name,
37383 TREE_STRING_POINTER (param_string_cst)) == 0)
37384 break;
37385
37386 if (i == NUM_ISA_NAMES)
37387 {
37388 error ("Parameter to builtin not valid: %s",
37389 TREE_STRING_POINTER (param_string_cst));
37390 return integer_zero_node;
37391 }
37392
37393 field = TYPE_FIELDS (__processor_model_type);
37394 /* Get the last field, which is __cpu_features. */
37395 while (DECL_CHAIN (field))
37396 field = DECL_CHAIN (field);
37397
37398 /* Get the appropriate field: __cpu_model.__cpu_features */
37399 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
37400 field, NULL_TREE);
37401
37402 /* Access the 0th element of __cpu_features array. */
37403 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
37404 integer_zero_node, NULL_TREE, NULL_TREE);
37405
37406 field_val = (1 << isa_names_table[i].feature);
37407 /* Return __cpu_model.__cpu_features[0] & field_val */
37408 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
37409 build_int_cstu (unsigned_type_node, field_val));
37410 return build1 (CONVERT_EXPR, integer_type_node, final);
37411 }
37412 gcc_unreachable ();
37413 }
37414
37415 static tree
37416 ix86_fold_builtin (tree fndecl, int n_args,
37417 tree *args, bool ignore ATTRIBUTE_UNUSED)
37418 {
37419 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
37420 {
37421 enum ix86_builtins fn_code = (enum ix86_builtins)
37422 DECL_FUNCTION_CODE (fndecl);
37423 if (fn_code == IX86_BUILTIN_CPU_IS
37424 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
37425 {
37426 gcc_assert (n_args == 1);
37427 return fold_builtin_cpu (fndecl, args);
37428 }
37429 }
37430
37431 #ifdef SUBTARGET_FOLD_BUILTIN
37432 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
37433 #endif
37434
37435 return NULL_TREE;
37436 }
37437
37438 /* Make builtins to detect cpu type and features supported. NAME is
37439 the builtin name, CODE is the builtin code, and FTYPE is the function
37440 type of the builtin. */
37441
37442 static void
37443 make_cpu_type_builtin (const char* name, int code,
37444 enum ix86_builtin_func_type ftype, bool is_const)
37445 {
37446 tree decl;
37447 tree type;
37448
37449 type = ix86_get_builtin_func_type (ftype);
37450 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
37451 NULL, NULL_TREE);
37452 gcc_assert (decl != NULL_TREE);
37453 ix86_builtins[(int) code] = decl;
37454 TREE_READONLY (decl) = is_const;
37455 }
37456
37457 /* Make builtins to get CPU type and features supported. The created
37458 builtins are :
37459
37460 __builtin_cpu_init (), to detect cpu type and features,
37461 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
37462 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
37463 */
37464
37465 static void
37466 ix86_init_platform_type_builtins (void)
37467 {
37468 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
37469 INT_FTYPE_VOID, false);
37470 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
37471 INT_FTYPE_PCCHAR, true);
37472 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
37473 INT_FTYPE_PCCHAR, true);
37474 }
37475
37476 /* Internal method for ix86_init_builtins. */
37477
37478 static void
37479 ix86_init_builtins_va_builtins_abi (void)
37480 {
37481 tree ms_va_ref, sysv_va_ref;
37482 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
37483 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
37484 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
37485 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
37486
37487 if (!TARGET_64BIT)
37488 return;
37489 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
37490 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
37491 ms_va_ref = build_reference_type (ms_va_list_type_node);
37492 sysv_va_ref =
37493 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
37494
37495 fnvoid_va_end_ms =
37496 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
37497 fnvoid_va_start_ms =
37498 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
37499 fnvoid_va_end_sysv =
37500 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
37501 fnvoid_va_start_sysv =
37502 build_varargs_function_type_list (void_type_node, sysv_va_ref,
37503 NULL_TREE);
37504 fnvoid_va_copy_ms =
37505 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
37506 NULL_TREE);
37507 fnvoid_va_copy_sysv =
37508 build_function_type_list (void_type_node, sysv_va_ref,
37509 sysv_va_ref, NULL_TREE);
37510
37511 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
37512 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
37513 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
37514 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
37515 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
37516 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
37517 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
37518 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
37519 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
37520 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
37521 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
37522 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
37523 }
37524
37525 static void
37526 ix86_init_builtin_types (void)
37527 {
37528 tree float128_type_node, float80_type_node;
37529
37530 /* The __float80 type. */
37531 float80_type_node = long_double_type_node;
37532 if (TYPE_MODE (float80_type_node) != XFmode)
37533 {
37534 /* The __float80 type. */
37535 float80_type_node = make_node (REAL_TYPE);
37536
37537 TYPE_PRECISION (float80_type_node) = 80;
37538 layout_type (float80_type_node);
37539 }
37540 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
37541
37542 /* The __float128 type. */
37543 float128_type_node = make_node (REAL_TYPE);
37544 TYPE_PRECISION (float128_type_node) = 128;
37545 layout_type (float128_type_node);
37546 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
37547
37548 /* This macro is built by i386-builtin-types.awk. */
37549 DEFINE_BUILTIN_PRIMITIVE_TYPES;
37550 }
37551
37552 static void
37553 ix86_init_builtins (void)
37554 {
37555 tree t;
37556
37557 ix86_init_builtin_types ();
37558
37559 /* Builtins to get CPU type and features. */
37560 ix86_init_platform_type_builtins ();
37561
37562 /* TFmode support builtins. */
37563 def_builtin_const (0, "__builtin_infq",
37564 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
37565 def_builtin_const (0, "__builtin_huge_valq",
37566 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
37567
37568 /* We will expand them to normal call if SSE isn't available since
37569 they are used by libgcc. */
37570 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
37571 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
37572 BUILT_IN_MD, "__fabstf2", NULL_TREE);
37573 TREE_READONLY (t) = 1;
37574 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
37575
37576 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
37577 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
37578 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
37579 TREE_READONLY (t) = 1;
37580 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
37581
37582 ix86_init_tm_builtins ();
37583 ix86_init_mmx_sse_builtins ();
37584 ix86_init_mpx_builtins ();
37585
37586 if (TARGET_LP64)
37587 ix86_init_builtins_va_builtins_abi ();
37588
37589 #ifdef SUBTARGET_INIT_BUILTINS
37590 SUBTARGET_INIT_BUILTINS;
37591 #endif
37592 }
37593
37594 /* Return the ix86 builtin for CODE. */
37595
37596 static tree
37597 ix86_builtin_decl (unsigned code, bool)
37598 {
37599 if (code >= IX86_BUILTIN_MAX)
37600 return error_mark_node;
37601
37602 return ix86_builtins[code];
37603 }
37604
37605 /* Errors in the source file can cause expand_expr to return const0_rtx
37606 where we expect a vector. To avoid crashing, use one of the vector
37607 clear instructions. */
37608 static rtx
37609 safe_vector_operand (rtx x, machine_mode mode)
37610 {
37611 if (x == const0_rtx)
37612 x = CONST0_RTX (mode);
37613 return x;
37614 }
37615
37616 /* Fixup modeless constants to fit required mode. */
37617 static rtx
37618 fixup_modeless_constant (rtx x, machine_mode mode)
37619 {
37620 if (GET_MODE (x) == VOIDmode)
37621 x = convert_to_mode (mode, x, 1);
37622 return x;
37623 }
37624
37625 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
37626
37627 static rtx
37628 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
37629 {
37630 rtx pat;
37631 tree arg0 = CALL_EXPR_ARG (exp, 0);
37632 tree arg1 = CALL_EXPR_ARG (exp, 1);
37633 rtx op0 = expand_normal (arg0);
37634 rtx op1 = expand_normal (arg1);
37635 machine_mode tmode = insn_data[icode].operand[0].mode;
37636 machine_mode mode0 = insn_data[icode].operand[1].mode;
37637 machine_mode mode1 = insn_data[icode].operand[2].mode;
37638
37639 if (VECTOR_MODE_P (mode0))
37640 op0 = safe_vector_operand (op0, mode0);
37641 if (VECTOR_MODE_P (mode1))
37642 op1 = safe_vector_operand (op1, mode1);
37643
37644 if (optimize || !target
37645 || GET_MODE (target) != tmode
37646 || !insn_data[icode].operand[0].predicate (target, tmode))
37647 target = gen_reg_rtx (tmode);
37648
37649 if (GET_MODE (op1) == SImode && mode1 == TImode)
37650 {
37651 rtx x = gen_reg_rtx (V4SImode);
37652 emit_insn (gen_sse2_loadd (x, op1));
37653 op1 = gen_lowpart (TImode, x);
37654 }
37655
37656 if (!insn_data[icode].operand[1].predicate (op0, mode0))
37657 op0 = copy_to_mode_reg (mode0, op0);
37658 if (!insn_data[icode].operand[2].predicate (op1, mode1))
37659 op1 = copy_to_mode_reg (mode1, op1);
37660
37661 pat = GEN_FCN (icode) (target, op0, op1);
37662 if (! pat)
37663 return 0;
37664
37665 emit_insn (pat);
37666
37667 return target;
37668 }
37669
37670 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
37671
37672 static rtx
37673 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
37674 enum ix86_builtin_func_type m_type,
37675 enum rtx_code sub_code)
37676 {
37677 rtx pat;
37678 int i;
37679 int nargs;
37680 bool comparison_p = false;
37681 bool tf_p = false;
37682 bool last_arg_constant = false;
37683 int num_memory = 0;
37684 struct {
37685 rtx op;
37686 machine_mode mode;
37687 } args[4];
37688
37689 machine_mode tmode = insn_data[icode].operand[0].mode;
37690
37691 switch (m_type)
37692 {
37693 case MULTI_ARG_4_DF2_DI_I:
37694 case MULTI_ARG_4_DF2_DI_I1:
37695 case MULTI_ARG_4_SF2_SI_I:
37696 case MULTI_ARG_4_SF2_SI_I1:
37697 nargs = 4;
37698 last_arg_constant = true;
37699 break;
37700
37701 case MULTI_ARG_3_SF:
37702 case MULTI_ARG_3_DF:
37703 case MULTI_ARG_3_SF2:
37704 case MULTI_ARG_3_DF2:
37705 case MULTI_ARG_3_DI:
37706 case MULTI_ARG_3_SI:
37707 case MULTI_ARG_3_SI_DI:
37708 case MULTI_ARG_3_HI:
37709 case MULTI_ARG_3_HI_SI:
37710 case MULTI_ARG_3_QI:
37711 case MULTI_ARG_3_DI2:
37712 case MULTI_ARG_3_SI2:
37713 case MULTI_ARG_3_HI2:
37714 case MULTI_ARG_3_QI2:
37715 nargs = 3;
37716 break;
37717
37718 case MULTI_ARG_2_SF:
37719 case MULTI_ARG_2_DF:
37720 case MULTI_ARG_2_DI:
37721 case MULTI_ARG_2_SI:
37722 case MULTI_ARG_2_HI:
37723 case MULTI_ARG_2_QI:
37724 nargs = 2;
37725 break;
37726
37727 case MULTI_ARG_2_DI_IMM:
37728 case MULTI_ARG_2_SI_IMM:
37729 case MULTI_ARG_2_HI_IMM:
37730 case MULTI_ARG_2_QI_IMM:
37731 nargs = 2;
37732 last_arg_constant = true;
37733 break;
37734
37735 case MULTI_ARG_1_SF:
37736 case MULTI_ARG_1_DF:
37737 case MULTI_ARG_1_SF2:
37738 case MULTI_ARG_1_DF2:
37739 case MULTI_ARG_1_DI:
37740 case MULTI_ARG_1_SI:
37741 case MULTI_ARG_1_HI:
37742 case MULTI_ARG_1_QI:
37743 case MULTI_ARG_1_SI_DI:
37744 case MULTI_ARG_1_HI_DI:
37745 case MULTI_ARG_1_HI_SI:
37746 case MULTI_ARG_1_QI_DI:
37747 case MULTI_ARG_1_QI_SI:
37748 case MULTI_ARG_1_QI_HI:
37749 nargs = 1;
37750 break;
37751
37752 case MULTI_ARG_2_DI_CMP:
37753 case MULTI_ARG_2_SI_CMP:
37754 case MULTI_ARG_2_HI_CMP:
37755 case MULTI_ARG_2_QI_CMP:
37756 nargs = 2;
37757 comparison_p = true;
37758 break;
37759
37760 case MULTI_ARG_2_SF_TF:
37761 case MULTI_ARG_2_DF_TF:
37762 case MULTI_ARG_2_DI_TF:
37763 case MULTI_ARG_2_SI_TF:
37764 case MULTI_ARG_2_HI_TF:
37765 case MULTI_ARG_2_QI_TF:
37766 nargs = 2;
37767 tf_p = true;
37768 break;
37769
37770 default:
37771 gcc_unreachable ();
37772 }
37773
37774 if (optimize || !target
37775 || GET_MODE (target) != tmode
37776 || !insn_data[icode].operand[0].predicate (target, tmode))
37777 target = gen_reg_rtx (tmode);
37778
37779 gcc_assert (nargs <= 4);
37780
37781 for (i = 0; i < nargs; i++)
37782 {
37783 tree arg = CALL_EXPR_ARG (exp, i);
37784 rtx op = expand_normal (arg);
37785 int adjust = (comparison_p) ? 1 : 0;
37786 machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
37787
37788 if (last_arg_constant && i == nargs - 1)
37789 {
37790 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
37791 {
37792 enum insn_code new_icode = icode;
37793 switch (icode)
37794 {
37795 case CODE_FOR_xop_vpermil2v2df3:
37796 case CODE_FOR_xop_vpermil2v4sf3:
37797 case CODE_FOR_xop_vpermil2v4df3:
37798 case CODE_FOR_xop_vpermil2v8sf3:
37799 error ("the last argument must be a 2-bit immediate");
37800 return gen_reg_rtx (tmode);
37801 case CODE_FOR_xop_rotlv2di3:
37802 new_icode = CODE_FOR_rotlv2di3;
37803 goto xop_rotl;
37804 case CODE_FOR_xop_rotlv4si3:
37805 new_icode = CODE_FOR_rotlv4si3;
37806 goto xop_rotl;
37807 case CODE_FOR_xop_rotlv8hi3:
37808 new_icode = CODE_FOR_rotlv8hi3;
37809 goto xop_rotl;
37810 case CODE_FOR_xop_rotlv16qi3:
37811 new_icode = CODE_FOR_rotlv16qi3;
37812 xop_rotl:
37813 if (CONST_INT_P (op))
37814 {
37815 int mask = GET_MODE_UNIT_BITSIZE (tmode) - 1;
37816 op = GEN_INT (INTVAL (op) & mask);
37817 gcc_checking_assert
37818 (insn_data[icode].operand[i + 1].predicate (op, mode));
37819 }
37820 else
37821 {
37822 gcc_checking_assert
37823 (nargs == 2
37824 && insn_data[new_icode].operand[0].mode == tmode
37825 && insn_data[new_icode].operand[1].mode == tmode
37826 && insn_data[new_icode].operand[2].mode == mode
37827 && insn_data[new_icode].operand[0].predicate
37828 == insn_data[icode].operand[0].predicate
37829 && insn_data[new_icode].operand[1].predicate
37830 == insn_data[icode].operand[1].predicate);
37831 icode = new_icode;
37832 goto non_constant;
37833 }
37834 break;
37835 default:
37836 gcc_unreachable ();
37837 }
37838 }
37839 }
37840 else
37841 {
37842 non_constant:
37843 if (VECTOR_MODE_P (mode))
37844 op = safe_vector_operand (op, mode);
37845
37846 /* If we aren't optimizing, only allow one memory operand to be
37847 generated. */
37848 if (memory_operand (op, mode))
37849 num_memory++;
37850
37851 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
37852
37853 if (optimize
37854 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
37855 || num_memory > 1)
37856 op = force_reg (mode, op);
37857 }
37858
37859 args[i].op = op;
37860 args[i].mode = mode;
37861 }
37862
37863 switch (nargs)
37864 {
37865 case 1:
37866 pat = GEN_FCN (icode) (target, args[0].op);
37867 break;
37868
37869 case 2:
37870 if (tf_p)
37871 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37872 GEN_INT ((int)sub_code));
37873 else if (! comparison_p)
37874 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
37875 else
37876 {
37877 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
37878 args[0].op,
37879 args[1].op);
37880
37881 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
37882 }
37883 break;
37884
37885 case 3:
37886 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
37887 break;
37888
37889 case 4:
37890 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
37891 break;
37892
37893 default:
37894 gcc_unreachable ();
37895 }
37896
37897 if (! pat)
37898 return 0;
37899
37900 emit_insn (pat);
37901 return target;
37902 }
37903
37904 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
37905 insns with vec_merge. */
37906
37907 static rtx
37908 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
37909 rtx target)
37910 {
37911 rtx pat;
37912 tree arg0 = CALL_EXPR_ARG (exp, 0);
37913 rtx op1, op0 = expand_normal (arg0);
37914 machine_mode tmode = insn_data[icode].operand[0].mode;
37915 machine_mode mode0 = insn_data[icode].operand[1].mode;
37916
37917 if (optimize || !target
37918 || GET_MODE (target) != tmode
37919 || !insn_data[icode].operand[0].predicate (target, tmode))
37920 target = gen_reg_rtx (tmode);
37921
37922 if (VECTOR_MODE_P (mode0))
37923 op0 = safe_vector_operand (op0, mode0);
37924
37925 if ((optimize && !register_operand (op0, mode0))
37926 || !insn_data[icode].operand[1].predicate (op0, mode0))
37927 op0 = copy_to_mode_reg (mode0, op0);
37928
37929 op1 = op0;
37930 if (!insn_data[icode].operand[2].predicate (op1, mode0))
37931 op1 = copy_to_mode_reg (mode0, op1);
37932
37933 pat = GEN_FCN (icode) (target, op0, op1);
37934 if (! pat)
37935 return 0;
37936 emit_insn (pat);
37937 return target;
37938 }
37939
37940 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
37941
37942 static rtx
37943 ix86_expand_sse_compare (const struct builtin_description *d,
37944 tree exp, rtx target, bool swap)
37945 {
37946 rtx pat;
37947 tree arg0 = CALL_EXPR_ARG (exp, 0);
37948 tree arg1 = CALL_EXPR_ARG (exp, 1);
37949 rtx op0 = expand_normal (arg0);
37950 rtx op1 = expand_normal (arg1);
37951 rtx op2;
37952 machine_mode tmode = insn_data[d->icode].operand[0].mode;
37953 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
37954 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
37955 enum rtx_code comparison = d->comparison;
37956
37957 if (VECTOR_MODE_P (mode0))
37958 op0 = safe_vector_operand (op0, mode0);
37959 if (VECTOR_MODE_P (mode1))
37960 op1 = safe_vector_operand (op1, mode1);
37961
37962 /* Swap operands if we have a comparison that isn't available in
37963 hardware. */
37964 if (swap)
37965 std::swap (op0, op1);
37966
37967 if (optimize || !target
37968 || GET_MODE (target) != tmode
37969 || !insn_data[d->icode].operand[0].predicate (target, tmode))
37970 target = gen_reg_rtx (tmode);
37971
37972 if ((optimize && !register_operand (op0, mode0))
37973 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
37974 op0 = copy_to_mode_reg (mode0, op0);
37975 if ((optimize && !register_operand (op1, mode1))
37976 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
37977 op1 = copy_to_mode_reg (mode1, op1);
37978
37979 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
37980 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
37981 if (! pat)
37982 return 0;
37983 emit_insn (pat);
37984 return target;
37985 }
37986
37987 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
37988
37989 static rtx
37990 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
37991 rtx target)
37992 {
37993 rtx pat;
37994 tree arg0 = CALL_EXPR_ARG (exp, 0);
37995 tree arg1 = CALL_EXPR_ARG (exp, 1);
37996 rtx op0 = expand_normal (arg0);
37997 rtx op1 = expand_normal (arg1);
37998 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
37999 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
38000 enum rtx_code comparison = d->comparison;
38001
38002 if (VECTOR_MODE_P (mode0))
38003 op0 = safe_vector_operand (op0, mode0);
38004 if (VECTOR_MODE_P (mode1))
38005 op1 = safe_vector_operand (op1, mode1);
38006
38007 /* Swap operands if we have a comparison that isn't available in
38008 hardware. */
38009 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
38010 std::swap (op0, op1);
38011
38012 target = gen_reg_rtx (SImode);
38013 emit_move_insn (target, const0_rtx);
38014 target = gen_rtx_SUBREG (QImode, target, 0);
38015
38016 if ((optimize && !register_operand (op0, mode0))
38017 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
38018 op0 = copy_to_mode_reg (mode0, op0);
38019 if ((optimize && !register_operand (op1, mode1))
38020 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
38021 op1 = copy_to_mode_reg (mode1, op1);
38022
38023 pat = GEN_FCN (d->icode) (op0, op1);
38024 if (! pat)
38025 return 0;
38026 emit_insn (pat);
38027 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
38028 gen_rtx_fmt_ee (comparison, QImode,
38029 SET_DEST (pat),
38030 const0_rtx)));
38031
38032 return SUBREG_REG (target);
38033 }
38034
38035 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
38036
38037 static rtx
38038 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
38039 rtx target)
38040 {
38041 rtx pat;
38042 tree arg0 = CALL_EXPR_ARG (exp, 0);
38043 rtx op1, op0 = expand_normal (arg0);
38044 machine_mode tmode = insn_data[d->icode].operand[0].mode;
38045 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
38046
38047 if (optimize || target == 0
38048 || GET_MODE (target) != tmode
38049 || !insn_data[d->icode].operand[0].predicate (target, tmode))
38050 target = gen_reg_rtx (tmode);
38051
38052 if (VECTOR_MODE_P (mode0))
38053 op0 = safe_vector_operand (op0, mode0);
38054
38055 if ((optimize && !register_operand (op0, mode0))
38056 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
38057 op0 = copy_to_mode_reg (mode0, op0);
38058
38059 op1 = GEN_INT (d->comparison);
38060
38061 pat = GEN_FCN (d->icode) (target, op0, op1);
38062 if (! pat)
38063 return 0;
38064 emit_insn (pat);
38065 return target;
38066 }
38067
38068 static rtx
38069 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
38070 tree exp, rtx target)
38071 {
38072 rtx pat;
38073 tree arg0 = CALL_EXPR_ARG (exp, 0);
38074 tree arg1 = CALL_EXPR_ARG (exp, 1);
38075 rtx op0 = expand_normal (arg0);
38076 rtx op1 = expand_normal (arg1);
38077 rtx op2;
38078 machine_mode tmode = insn_data[d->icode].operand[0].mode;
38079 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
38080 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
38081
38082 if (optimize || target == 0
38083 || GET_MODE (target) != tmode
38084 || !insn_data[d->icode].operand[0].predicate (target, tmode))
38085 target = gen_reg_rtx (tmode);
38086
38087 op0 = safe_vector_operand (op0, mode0);
38088 op1 = safe_vector_operand (op1, mode1);
38089
38090 if ((optimize && !register_operand (op0, mode0))
38091 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
38092 op0 = copy_to_mode_reg (mode0, op0);
38093 if ((optimize && !register_operand (op1, mode1))
38094 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
38095 op1 = copy_to_mode_reg (mode1, op1);
38096
38097 op2 = GEN_INT (d->comparison);
38098
38099 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
38100 if (! pat)
38101 return 0;
38102 emit_insn (pat);
38103 return target;
38104 }
38105
38106 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
38107
38108 static rtx
38109 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
38110 rtx target)
38111 {
38112 rtx pat;
38113 tree arg0 = CALL_EXPR_ARG (exp, 0);
38114 tree arg1 = CALL_EXPR_ARG (exp, 1);
38115 rtx op0 = expand_normal (arg0);
38116 rtx op1 = expand_normal (arg1);
38117 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
38118 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
38119 enum rtx_code comparison = d->comparison;
38120
38121 if (VECTOR_MODE_P (mode0))
38122 op0 = safe_vector_operand (op0, mode0);
38123 if (VECTOR_MODE_P (mode1))
38124 op1 = safe_vector_operand (op1, mode1);
38125
38126 target = gen_reg_rtx (SImode);
38127 emit_move_insn (target, const0_rtx);
38128 target = gen_rtx_SUBREG (QImode, target, 0);
38129
38130 if ((optimize && !register_operand (op0, mode0))
38131 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
38132 op0 = copy_to_mode_reg (mode0, op0);
38133 if ((optimize && !register_operand (op1, mode1))
38134 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
38135 op1 = copy_to_mode_reg (mode1, op1);
38136
38137 pat = GEN_FCN (d->icode) (op0, op1);
38138 if (! pat)
38139 return 0;
38140 emit_insn (pat);
38141 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
38142 gen_rtx_fmt_ee (comparison, QImode,
38143 SET_DEST (pat),
38144 const0_rtx)));
38145
38146 return SUBREG_REG (target);
38147 }
38148
38149 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
38150
38151 static rtx
38152 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
38153 tree exp, rtx target)
38154 {
38155 rtx pat;
38156 tree arg0 = CALL_EXPR_ARG (exp, 0);
38157 tree arg1 = CALL_EXPR_ARG (exp, 1);
38158 tree arg2 = CALL_EXPR_ARG (exp, 2);
38159 tree arg3 = CALL_EXPR_ARG (exp, 3);
38160 tree arg4 = CALL_EXPR_ARG (exp, 4);
38161 rtx scratch0, scratch1;
38162 rtx op0 = expand_normal (arg0);
38163 rtx op1 = expand_normal (arg1);
38164 rtx op2 = expand_normal (arg2);
38165 rtx op3 = expand_normal (arg3);
38166 rtx op4 = expand_normal (arg4);
38167 machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
38168
38169 tmode0 = insn_data[d->icode].operand[0].mode;
38170 tmode1 = insn_data[d->icode].operand[1].mode;
38171 modev2 = insn_data[d->icode].operand[2].mode;
38172 modei3 = insn_data[d->icode].operand[3].mode;
38173 modev4 = insn_data[d->icode].operand[4].mode;
38174 modei5 = insn_data[d->icode].operand[5].mode;
38175 modeimm = insn_data[d->icode].operand[6].mode;
38176
38177 if (VECTOR_MODE_P (modev2))
38178 op0 = safe_vector_operand (op0, modev2);
38179 if (VECTOR_MODE_P (modev4))
38180 op2 = safe_vector_operand (op2, modev4);
38181
38182 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
38183 op0 = copy_to_mode_reg (modev2, op0);
38184 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
38185 op1 = copy_to_mode_reg (modei3, op1);
38186 if ((optimize && !register_operand (op2, modev4))
38187 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
38188 op2 = copy_to_mode_reg (modev4, op2);
38189 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
38190 op3 = copy_to_mode_reg (modei5, op3);
38191
38192 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
38193 {
38194 error ("the fifth argument must be an 8-bit immediate");
38195 return const0_rtx;
38196 }
38197
38198 if (d->code == IX86_BUILTIN_PCMPESTRI128)
38199 {
38200 if (optimize || !target
38201 || GET_MODE (target) != tmode0
38202 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
38203 target = gen_reg_rtx (tmode0);
38204
38205 scratch1 = gen_reg_rtx (tmode1);
38206
38207 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
38208 }
38209 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
38210 {
38211 if (optimize || !target
38212 || GET_MODE (target) != tmode1
38213 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
38214 target = gen_reg_rtx (tmode1);
38215
38216 scratch0 = gen_reg_rtx (tmode0);
38217
38218 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
38219 }
38220 else
38221 {
38222 gcc_assert (d->flag);
38223
38224 scratch0 = gen_reg_rtx (tmode0);
38225 scratch1 = gen_reg_rtx (tmode1);
38226
38227 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
38228 }
38229
38230 if (! pat)
38231 return 0;
38232
38233 emit_insn (pat);
38234
38235 if (d->flag)
38236 {
38237 target = gen_reg_rtx (SImode);
38238 emit_move_insn (target, const0_rtx);
38239 target = gen_rtx_SUBREG (QImode, target, 0);
38240
38241 emit_insn
38242 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
38243 gen_rtx_fmt_ee (EQ, QImode,
38244 gen_rtx_REG ((machine_mode) d->flag,
38245 FLAGS_REG),
38246 const0_rtx)));
38247 return SUBREG_REG (target);
38248 }
38249 else
38250 return target;
38251 }
38252
38253
38254 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
38255
38256 static rtx
38257 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
38258 tree exp, rtx target)
38259 {
38260 rtx pat;
38261 tree arg0 = CALL_EXPR_ARG (exp, 0);
38262 tree arg1 = CALL_EXPR_ARG (exp, 1);
38263 tree arg2 = CALL_EXPR_ARG (exp, 2);
38264 rtx scratch0, scratch1;
38265 rtx op0 = expand_normal (arg0);
38266 rtx op1 = expand_normal (arg1);
38267 rtx op2 = expand_normal (arg2);
38268 machine_mode tmode0, tmode1, modev2, modev3, modeimm;
38269
38270 tmode0 = insn_data[d->icode].operand[0].mode;
38271 tmode1 = insn_data[d->icode].operand[1].mode;
38272 modev2 = insn_data[d->icode].operand[2].mode;
38273 modev3 = insn_data[d->icode].operand[3].mode;
38274 modeimm = insn_data[d->icode].operand[4].mode;
38275
38276 if (VECTOR_MODE_P (modev2))
38277 op0 = safe_vector_operand (op0, modev2);
38278 if (VECTOR_MODE_P (modev3))
38279 op1 = safe_vector_operand (op1, modev3);
38280
38281 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
38282 op0 = copy_to_mode_reg (modev2, op0);
38283 if ((optimize && !register_operand (op1, modev3))
38284 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
38285 op1 = copy_to_mode_reg (modev3, op1);
38286
38287 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
38288 {
38289 error ("the third argument must be an 8-bit immediate");
38290 return const0_rtx;
38291 }
38292
38293 if (d->code == IX86_BUILTIN_PCMPISTRI128)
38294 {
38295 if (optimize || !target
38296 || GET_MODE (target) != tmode0
38297 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
38298 target = gen_reg_rtx (tmode0);
38299
38300 scratch1 = gen_reg_rtx (tmode1);
38301
38302 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
38303 }
38304 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
38305 {
38306 if (optimize || !target
38307 || GET_MODE (target) != tmode1
38308 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
38309 target = gen_reg_rtx (tmode1);
38310
38311 scratch0 = gen_reg_rtx (tmode0);
38312
38313 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
38314 }
38315 else
38316 {
38317 gcc_assert (d->flag);
38318
38319 scratch0 = gen_reg_rtx (tmode0);
38320 scratch1 = gen_reg_rtx (tmode1);
38321
38322 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
38323 }
38324
38325 if (! pat)
38326 return 0;
38327
38328 emit_insn (pat);
38329
38330 if (d->flag)
38331 {
38332 target = gen_reg_rtx (SImode);
38333 emit_move_insn (target, const0_rtx);
38334 target = gen_rtx_SUBREG (QImode, target, 0);
38335
38336 emit_insn
38337 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
38338 gen_rtx_fmt_ee (EQ, QImode,
38339 gen_rtx_REG ((machine_mode) d->flag,
38340 FLAGS_REG),
38341 const0_rtx)));
38342 return SUBREG_REG (target);
38343 }
38344 else
38345 return target;
38346 }
38347
38348 /* Subroutine of ix86_expand_builtin to take care of insns with
38349 variable number of operands. */
38350
38351 static rtx
38352 ix86_expand_args_builtin (const struct builtin_description *d,
38353 tree exp, rtx target)
38354 {
38355 rtx pat, real_target;
38356 unsigned int i, nargs;
38357 unsigned int nargs_constant = 0;
38358 unsigned int mask_pos = 0;
38359 int num_memory = 0;
38360 struct
38361 {
38362 rtx op;
38363 machine_mode mode;
38364 } args[6];
38365 bool last_arg_count = false;
38366 enum insn_code icode = d->icode;
38367 const struct insn_data_d *insn_p = &insn_data[icode];
38368 machine_mode tmode = insn_p->operand[0].mode;
38369 machine_mode rmode = VOIDmode;
38370 bool swap = false;
38371 enum rtx_code comparison = d->comparison;
38372
38373 switch ((enum ix86_builtin_func_type) d->flag)
38374 {
38375 case V2DF_FTYPE_V2DF_ROUND:
38376 case V4DF_FTYPE_V4DF_ROUND:
38377 case V4SF_FTYPE_V4SF_ROUND:
38378 case V8SF_FTYPE_V8SF_ROUND:
38379 case V4SI_FTYPE_V4SF_ROUND:
38380 case V8SI_FTYPE_V8SF_ROUND:
38381 return ix86_expand_sse_round (d, exp, target);
38382 case V4SI_FTYPE_V2DF_V2DF_ROUND:
38383 case V8SI_FTYPE_V4DF_V4DF_ROUND:
38384 case V16SI_FTYPE_V8DF_V8DF_ROUND:
38385 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
38386 case INT_FTYPE_V8SF_V8SF_PTEST:
38387 case INT_FTYPE_V4DI_V4DI_PTEST:
38388 case INT_FTYPE_V4DF_V4DF_PTEST:
38389 case INT_FTYPE_V4SF_V4SF_PTEST:
38390 case INT_FTYPE_V2DI_V2DI_PTEST:
38391 case INT_FTYPE_V2DF_V2DF_PTEST:
38392 return ix86_expand_sse_ptest (d, exp, target);
38393 case FLOAT128_FTYPE_FLOAT128:
38394 case FLOAT_FTYPE_FLOAT:
38395 case INT_FTYPE_INT:
38396 case UINT64_FTYPE_INT:
38397 case UINT16_FTYPE_UINT16:
38398 case INT64_FTYPE_INT64:
38399 case INT64_FTYPE_V4SF:
38400 case INT64_FTYPE_V2DF:
38401 case INT_FTYPE_V16QI:
38402 case INT_FTYPE_V8QI:
38403 case INT_FTYPE_V8SF:
38404 case INT_FTYPE_V4DF:
38405 case INT_FTYPE_V4SF:
38406 case INT_FTYPE_V2DF:
38407 case INT_FTYPE_V32QI:
38408 case V16QI_FTYPE_V16QI:
38409 case V8SI_FTYPE_V8SF:
38410 case V8SI_FTYPE_V4SI:
38411 case V8HI_FTYPE_V8HI:
38412 case V8HI_FTYPE_V16QI:
38413 case V8QI_FTYPE_V8QI:
38414 case V8SF_FTYPE_V8SF:
38415 case V8SF_FTYPE_V8SI:
38416 case V8SF_FTYPE_V4SF:
38417 case V8SF_FTYPE_V8HI:
38418 case V4SI_FTYPE_V4SI:
38419 case V4SI_FTYPE_V16QI:
38420 case V4SI_FTYPE_V4SF:
38421 case V4SI_FTYPE_V8SI:
38422 case V4SI_FTYPE_V8HI:
38423 case V4SI_FTYPE_V4DF:
38424 case V4SI_FTYPE_V2DF:
38425 case V4HI_FTYPE_V4HI:
38426 case V4DF_FTYPE_V4DF:
38427 case V4DF_FTYPE_V4SI:
38428 case V4DF_FTYPE_V4SF:
38429 case V4DF_FTYPE_V2DF:
38430 case V4SF_FTYPE_V4SF:
38431 case V4SF_FTYPE_V4SI:
38432 case V4SF_FTYPE_V8SF:
38433 case V4SF_FTYPE_V4DF:
38434 case V4SF_FTYPE_V8HI:
38435 case V4SF_FTYPE_V2DF:
38436 case V2DI_FTYPE_V2DI:
38437 case V2DI_FTYPE_V16QI:
38438 case V2DI_FTYPE_V8HI:
38439 case V2DI_FTYPE_V4SI:
38440 case V2DF_FTYPE_V2DF:
38441 case V2DF_FTYPE_V4SI:
38442 case V2DF_FTYPE_V4DF:
38443 case V2DF_FTYPE_V4SF:
38444 case V2DF_FTYPE_V2SI:
38445 case V2SI_FTYPE_V2SI:
38446 case V2SI_FTYPE_V4SF:
38447 case V2SI_FTYPE_V2SF:
38448 case V2SI_FTYPE_V2DF:
38449 case V2SF_FTYPE_V2SF:
38450 case V2SF_FTYPE_V2SI:
38451 case V32QI_FTYPE_V32QI:
38452 case V32QI_FTYPE_V16QI:
38453 case V16HI_FTYPE_V16HI:
38454 case V16HI_FTYPE_V8HI:
38455 case V8SI_FTYPE_V8SI:
38456 case V16HI_FTYPE_V16QI:
38457 case V8SI_FTYPE_V16QI:
38458 case V4DI_FTYPE_V16QI:
38459 case V8SI_FTYPE_V8HI:
38460 case V4DI_FTYPE_V8HI:
38461 case V4DI_FTYPE_V4SI:
38462 case V4DI_FTYPE_V2DI:
38463 case UHI_FTYPE_UHI:
38464 case UHI_FTYPE_V16QI:
38465 case USI_FTYPE_V32QI:
38466 case UDI_FTYPE_V64QI:
38467 case V16QI_FTYPE_UHI:
38468 case V32QI_FTYPE_USI:
38469 case V64QI_FTYPE_UDI:
38470 case V8HI_FTYPE_UQI:
38471 case V16HI_FTYPE_UHI:
38472 case V32HI_FTYPE_USI:
38473 case V4SI_FTYPE_UQI:
38474 case V8SI_FTYPE_UQI:
38475 case V4SI_FTYPE_UHI:
38476 case V8SI_FTYPE_UHI:
38477 case UQI_FTYPE_V8HI:
38478 case UHI_FTYPE_V16HI:
38479 case USI_FTYPE_V32HI:
38480 case UQI_FTYPE_V4SI:
38481 case UQI_FTYPE_V8SI:
38482 case UHI_FTYPE_V16SI:
38483 case UQI_FTYPE_V2DI:
38484 case UQI_FTYPE_V4DI:
38485 case UQI_FTYPE_V8DI:
38486 case V16SI_FTYPE_UHI:
38487 case V2DI_FTYPE_UQI:
38488 case V4DI_FTYPE_UQI:
38489 case V16SI_FTYPE_INT:
38490 case V16SF_FTYPE_V8SF:
38491 case V16SI_FTYPE_V8SI:
38492 case V16SF_FTYPE_V4SF:
38493 case V16SI_FTYPE_V4SI:
38494 case V16SF_FTYPE_V16SF:
38495 case V8DI_FTYPE_UQI:
38496 case V8DF_FTYPE_V4DF:
38497 case V8DF_FTYPE_V2DF:
38498 case V8DF_FTYPE_V8DF:
38499 nargs = 1;
38500 break;
38501 case V4SF_FTYPE_V4SF_VEC_MERGE:
38502 case V2DF_FTYPE_V2DF_VEC_MERGE:
38503 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
38504 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
38505 case V16QI_FTYPE_V16QI_V16QI:
38506 case V16QI_FTYPE_V8HI_V8HI:
38507 case V16SF_FTYPE_V16SF_V16SF:
38508 case V8QI_FTYPE_V8QI_V8QI:
38509 case V8QI_FTYPE_V4HI_V4HI:
38510 case V8HI_FTYPE_V8HI_V8HI:
38511 case V8HI_FTYPE_V16QI_V16QI:
38512 case V8HI_FTYPE_V4SI_V4SI:
38513 case V8SF_FTYPE_V8SF_V8SF:
38514 case V8SF_FTYPE_V8SF_V8SI:
38515 case V8DF_FTYPE_V8DF_V8DF:
38516 case V4SI_FTYPE_V4SI_V4SI:
38517 case V4SI_FTYPE_V8HI_V8HI:
38518 case V4SI_FTYPE_V2DF_V2DF:
38519 case V4HI_FTYPE_V4HI_V4HI:
38520 case V4HI_FTYPE_V8QI_V8QI:
38521 case V4HI_FTYPE_V2SI_V2SI:
38522 case V4DF_FTYPE_V4DF_V4DF:
38523 case V4DF_FTYPE_V4DF_V4DI:
38524 case V4SF_FTYPE_V4SF_V4SF:
38525 case V4SF_FTYPE_V4SF_V4SI:
38526 case V4SF_FTYPE_V4SF_V2SI:
38527 case V4SF_FTYPE_V4SF_V2DF:
38528 case V4SF_FTYPE_V4SF_UINT:
38529 case V4SF_FTYPE_V4SF_DI:
38530 case V4SF_FTYPE_V4SF_SI:
38531 case V2DI_FTYPE_V2DI_V2DI:
38532 case V2DI_FTYPE_V16QI_V16QI:
38533 case V2DI_FTYPE_V4SI_V4SI:
38534 case V2DI_FTYPE_V2DI_V16QI:
38535 case V2SI_FTYPE_V2SI_V2SI:
38536 case V2SI_FTYPE_V4HI_V4HI:
38537 case V2SI_FTYPE_V2SF_V2SF:
38538 case V2DF_FTYPE_V2DF_V2DF:
38539 case V2DF_FTYPE_V2DF_V4SF:
38540 case V2DF_FTYPE_V2DF_V2DI:
38541 case V2DF_FTYPE_V2DF_DI:
38542 case V2DF_FTYPE_V2DF_SI:
38543 case V2DF_FTYPE_V2DF_UINT:
38544 case V2SF_FTYPE_V2SF_V2SF:
38545 case V1DI_FTYPE_V1DI_V1DI:
38546 case V1DI_FTYPE_V8QI_V8QI:
38547 case V1DI_FTYPE_V2SI_V2SI:
38548 case V32QI_FTYPE_V16HI_V16HI:
38549 case V16HI_FTYPE_V8SI_V8SI:
38550 case V32QI_FTYPE_V32QI_V32QI:
38551 case V16HI_FTYPE_V32QI_V32QI:
38552 case V16HI_FTYPE_V16HI_V16HI:
38553 case V8SI_FTYPE_V4DF_V4DF:
38554 case V8SI_FTYPE_V8SI_V8SI:
38555 case V8SI_FTYPE_V16HI_V16HI:
38556 case V4DI_FTYPE_V4DI_V4DI:
38557 case V4DI_FTYPE_V8SI_V8SI:
38558 case V8DI_FTYPE_V64QI_V64QI:
38559 if (comparison == UNKNOWN)
38560 return ix86_expand_binop_builtin (icode, exp, target);
38561 nargs = 2;
38562 break;
38563 case V4SF_FTYPE_V4SF_V4SF_SWAP:
38564 case V2DF_FTYPE_V2DF_V2DF_SWAP:
38565 gcc_assert (comparison != UNKNOWN);
38566 nargs = 2;
38567 swap = true;
38568 break;
38569 case V16HI_FTYPE_V16HI_V8HI_COUNT:
38570 case V16HI_FTYPE_V16HI_SI_COUNT:
38571 case V8SI_FTYPE_V8SI_V4SI_COUNT:
38572 case V8SI_FTYPE_V8SI_SI_COUNT:
38573 case V4DI_FTYPE_V4DI_V2DI_COUNT:
38574 case V4DI_FTYPE_V4DI_INT_COUNT:
38575 case V8HI_FTYPE_V8HI_V8HI_COUNT:
38576 case V8HI_FTYPE_V8HI_SI_COUNT:
38577 case V4SI_FTYPE_V4SI_V4SI_COUNT:
38578 case V4SI_FTYPE_V4SI_SI_COUNT:
38579 case V4HI_FTYPE_V4HI_V4HI_COUNT:
38580 case V4HI_FTYPE_V4HI_SI_COUNT:
38581 case V2DI_FTYPE_V2DI_V2DI_COUNT:
38582 case V2DI_FTYPE_V2DI_SI_COUNT:
38583 case V2SI_FTYPE_V2SI_V2SI_COUNT:
38584 case V2SI_FTYPE_V2SI_SI_COUNT:
38585 case V1DI_FTYPE_V1DI_V1DI_COUNT:
38586 case V1DI_FTYPE_V1DI_SI_COUNT:
38587 nargs = 2;
38588 last_arg_count = true;
38589 break;
38590 case UINT64_FTYPE_UINT64_UINT64:
38591 case UINT_FTYPE_UINT_UINT:
38592 case UINT_FTYPE_UINT_USHORT:
38593 case UINT_FTYPE_UINT_UCHAR:
38594 case UINT16_FTYPE_UINT16_INT:
38595 case UINT8_FTYPE_UINT8_INT:
38596 case UHI_FTYPE_UHI_UHI:
38597 case USI_FTYPE_USI_USI:
38598 case UDI_FTYPE_UDI_UDI:
38599 case V16SI_FTYPE_V8DF_V8DF:
38600 nargs = 2;
38601 break;
38602 case V2DI_FTYPE_V2DI_INT_CONVERT:
38603 nargs = 2;
38604 rmode = V1TImode;
38605 nargs_constant = 1;
38606 break;
38607 case V4DI_FTYPE_V4DI_INT_CONVERT:
38608 nargs = 2;
38609 rmode = V2TImode;
38610 nargs_constant = 1;
38611 break;
38612 case V8DI_FTYPE_V8DI_INT_CONVERT:
38613 nargs = 2;
38614 rmode = V4TImode;
38615 nargs_constant = 1;
38616 break;
38617 case V8HI_FTYPE_V8HI_INT:
38618 case V8HI_FTYPE_V8SF_INT:
38619 case V16HI_FTYPE_V16SF_INT:
38620 case V8HI_FTYPE_V4SF_INT:
38621 case V8SF_FTYPE_V8SF_INT:
38622 case V4SF_FTYPE_V16SF_INT:
38623 case V16SF_FTYPE_V16SF_INT:
38624 case V4SI_FTYPE_V4SI_INT:
38625 case V4SI_FTYPE_V8SI_INT:
38626 case V4HI_FTYPE_V4HI_INT:
38627 case V4DF_FTYPE_V4DF_INT:
38628 case V4DF_FTYPE_V8DF_INT:
38629 case V4SF_FTYPE_V4SF_INT:
38630 case V4SF_FTYPE_V8SF_INT:
38631 case V2DI_FTYPE_V2DI_INT:
38632 case V2DF_FTYPE_V2DF_INT:
38633 case V2DF_FTYPE_V4DF_INT:
38634 case V16HI_FTYPE_V16HI_INT:
38635 case V8SI_FTYPE_V8SI_INT:
38636 case V16SI_FTYPE_V16SI_INT:
38637 case V4SI_FTYPE_V16SI_INT:
38638 case V4DI_FTYPE_V4DI_INT:
38639 case V2DI_FTYPE_V4DI_INT:
38640 case V4DI_FTYPE_V8DI_INT:
38641 case QI_FTYPE_V4SF_INT:
38642 case QI_FTYPE_V2DF_INT:
38643 nargs = 2;
38644 nargs_constant = 1;
38645 break;
38646 case V16QI_FTYPE_V16QI_V16QI_V16QI:
38647 case V8SF_FTYPE_V8SF_V8SF_V8SF:
38648 case V4DF_FTYPE_V4DF_V4DF_V4DF:
38649 case V4SF_FTYPE_V4SF_V4SF_V4SF:
38650 case V2DF_FTYPE_V2DF_V2DF_V2DF:
38651 case V32QI_FTYPE_V32QI_V32QI_V32QI:
38652 case UHI_FTYPE_V16SI_V16SI_UHI:
38653 case UQI_FTYPE_V8DI_V8DI_UQI:
38654 case V16HI_FTYPE_V16SI_V16HI_UHI:
38655 case V16QI_FTYPE_V16SI_V16QI_UHI:
38656 case V16QI_FTYPE_V8DI_V16QI_UQI:
38657 case V16SF_FTYPE_V16SF_V16SF_UHI:
38658 case V16SF_FTYPE_V4SF_V16SF_UHI:
38659 case V16SI_FTYPE_SI_V16SI_UHI:
38660 case V16SI_FTYPE_V16HI_V16SI_UHI:
38661 case V16SI_FTYPE_V16QI_V16SI_UHI:
38662 case V8SF_FTYPE_V4SF_V8SF_UQI:
38663 case V4DF_FTYPE_V2DF_V4DF_UQI:
38664 case V8SI_FTYPE_V4SI_V8SI_UQI:
38665 case V8SI_FTYPE_SI_V8SI_UQI:
38666 case V4SI_FTYPE_V4SI_V4SI_UQI:
38667 case V4SI_FTYPE_SI_V4SI_UQI:
38668 case V4DI_FTYPE_V2DI_V4DI_UQI:
38669 case V4DI_FTYPE_DI_V4DI_UQI:
38670 case V2DI_FTYPE_V2DI_V2DI_UQI:
38671 case V2DI_FTYPE_DI_V2DI_UQI:
38672 case V64QI_FTYPE_V64QI_V64QI_UDI:
38673 case V64QI_FTYPE_V16QI_V64QI_UDI:
38674 case V64QI_FTYPE_QI_V64QI_UDI:
38675 case V32QI_FTYPE_V32QI_V32QI_USI:
38676 case V32QI_FTYPE_V16QI_V32QI_USI:
38677 case V32QI_FTYPE_QI_V32QI_USI:
38678 case V16QI_FTYPE_V16QI_V16QI_UHI:
38679 case V16QI_FTYPE_QI_V16QI_UHI:
38680 case V32HI_FTYPE_V8HI_V32HI_USI:
38681 case V32HI_FTYPE_HI_V32HI_USI:
38682 case V16HI_FTYPE_V8HI_V16HI_UHI:
38683 case V16HI_FTYPE_HI_V16HI_UHI:
38684 case V8HI_FTYPE_V8HI_V8HI_UQI:
38685 case V8HI_FTYPE_HI_V8HI_UQI:
38686 case V8SF_FTYPE_V8HI_V8SF_UQI:
38687 case V4SF_FTYPE_V8HI_V4SF_UQI:
38688 case V8SI_FTYPE_V8SF_V8SI_UQI:
38689 case V4SI_FTYPE_V4SF_V4SI_UQI:
38690 case V4DI_FTYPE_V4SF_V4DI_UQI:
38691 case V2DI_FTYPE_V4SF_V2DI_UQI:
38692 case V4SF_FTYPE_V4DI_V4SF_UQI:
38693 case V4SF_FTYPE_V2DI_V4SF_UQI:
38694 case V4DF_FTYPE_V4DI_V4DF_UQI:
38695 case V2DF_FTYPE_V2DI_V2DF_UQI:
38696 case V16QI_FTYPE_V8HI_V16QI_UQI:
38697 case V16QI_FTYPE_V16HI_V16QI_UHI:
38698 case V16QI_FTYPE_V4SI_V16QI_UQI:
38699 case V16QI_FTYPE_V8SI_V16QI_UQI:
38700 case V8HI_FTYPE_V4SI_V8HI_UQI:
38701 case V8HI_FTYPE_V8SI_V8HI_UQI:
38702 case V16QI_FTYPE_V2DI_V16QI_UQI:
38703 case V16QI_FTYPE_V4DI_V16QI_UQI:
38704 case V8HI_FTYPE_V2DI_V8HI_UQI:
38705 case V8HI_FTYPE_V4DI_V8HI_UQI:
38706 case V4SI_FTYPE_V2DI_V4SI_UQI:
38707 case V4SI_FTYPE_V4DI_V4SI_UQI:
38708 case V32QI_FTYPE_V32HI_V32QI_USI:
38709 case UHI_FTYPE_V16QI_V16QI_UHI:
38710 case USI_FTYPE_V32QI_V32QI_USI:
38711 case UDI_FTYPE_V64QI_V64QI_UDI:
38712 case UQI_FTYPE_V8HI_V8HI_UQI:
38713 case UHI_FTYPE_V16HI_V16HI_UHI:
38714 case USI_FTYPE_V32HI_V32HI_USI:
38715 case UQI_FTYPE_V4SI_V4SI_UQI:
38716 case UQI_FTYPE_V8SI_V8SI_UQI:
38717 case UQI_FTYPE_V2DI_V2DI_UQI:
38718 case UQI_FTYPE_V4DI_V4DI_UQI:
38719 case V4SF_FTYPE_V2DF_V4SF_UQI:
38720 case V4SF_FTYPE_V4DF_V4SF_UQI:
38721 case V16SI_FTYPE_V16SI_V16SI_UHI:
38722 case V16SI_FTYPE_V4SI_V16SI_UHI:
38723 case V2DI_FTYPE_V4SI_V2DI_UQI:
38724 case V2DI_FTYPE_V8HI_V2DI_UQI:
38725 case V2DI_FTYPE_V16QI_V2DI_UQI:
38726 case V4DI_FTYPE_V4DI_V4DI_UQI:
38727 case V4DI_FTYPE_V4SI_V4DI_UQI:
38728 case V4DI_FTYPE_V8HI_V4DI_UQI:
38729 case V4DI_FTYPE_V16QI_V4DI_UQI:
38730 case V4DI_FTYPE_V4DF_V4DI_UQI:
38731 case V2DI_FTYPE_V2DF_V2DI_UQI:
38732 case V4SI_FTYPE_V4DF_V4SI_UQI:
38733 case V4SI_FTYPE_V2DF_V4SI_UQI:
38734 case V4SI_FTYPE_V8HI_V4SI_UQI:
38735 case V4SI_FTYPE_V16QI_V4SI_UQI:
38736 case V4DI_FTYPE_V4DI_V4DI_V4DI:
38737 case V8DF_FTYPE_V2DF_V8DF_UQI:
38738 case V8DF_FTYPE_V4DF_V8DF_UQI:
38739 case V8DF_FTYPE_V8DF_V8DF_UQI:
38740 case V8SF_FTYPE_V8SF_V8SF_UQI:
38741 case V8SF_FTYPE_V8SI_V8SF_UQI:
38742 case V4DF_FTYPE_V4DF_V4DF_UQI:
38743 case V4SF_FTYPE_V4SF_V4SF_UQI:
38744 case V2DF_FTYPE_V2DF_V2DF_UQI:
38745 case V2DF_FTYPE_V4SF_V2DF_UQI:
38746 case V2DF_FTYPE_V4SI_V2DF_UQI:
38747 case V4SF_FTYPE_V4SI_V4SF_UQI:
38748 case V4DF_FTYPE_V4SF_V4DF_UQI:
38749 case V4DF_FTYPE_V4SI_V4DF_UQI:
38750 case V8SI_FTYPE_V8SI_V8SI_UQI:
38751 case V8SI_FTYPE_V8HI_V8SI_UQI:
38752 case V8SI_FTYPE_V16QI_V8SI_UQI:
38753 case V8DF_FTYPE_V8SI_V8DF_UQI:
38754 case V8DI_FTYPE_DI_V8DI_UQI:
38755 case V16SF_FTYPE_V8SF_V16SF_UHI:
38756 case V16SI_FTYPE_V8SI_V16SI_UHI:
38757 case V16HI_FTYPE_V16HI_V16HI_UHI:
38758 case V8HI_FTYPE_V16QI_V8HI_UQI:
38759 case V16HI_FTYPE_V16QI_V16HI_UHI:
38760 case V32HI_FTYPE_V32HI_V32HI_USI:
38761 case V32HI_FTYPE_V32QI_V32HI_USI:
38762 case V8DI_FTYPE_V16QI_V8DI_UQI:
38763 case V8DI_FTYPE_V2DI_V8DI_UQI:
38764 case V8DI_FTYPE_V4DI_V8DI_UQI:
38765 case V8DI_FTYPE_V8DI_V8DI_UQI:
38766 case V8DI_FTYPE_V8HI_V8DI_UQI:
38767 case V8DI_FTYPE_V8SI_V8DI_UQI:
38768 case V8HI_FTYPE_V8DI_V8HI_UQI:
38769 case V8SI_FTYPE_V8DI_V8SI_UQI:
38770 case V4SI_FTYPE_V4SI_V4SI_V4SI:
38771 nargs = 3;
38772 break;
38773 case V32QI_FTYPE_V32QI_V32QI_INT:
38774 case V16HI_FTYPE_V16HI_V16HI_INT:
38775 case V16QI_FTYPE_V16QI_V16QI_INT:
38776 case V4DI_FTYPE_V4DI_V4DI_INT:
38777 case V8HI_FTYPE_V8HI_V8HI_INT:
38778 case V8SI_FTYPE_V8SI_V8SI_INT:
38779 case V8SI_FTYPE_V8SI_V4SI_INT:
38780 case V8SF_FTYPE_V8SF_V8SF_INT:
38781 case V8SF_FTYPE_V8SF_V4SF_INT:
38782 case V4SI_FTYPE_V4SI_V4SI_INT:
38783 case V4DF_FTYPE_V4DF_V4DF_INT:
38784 case V16SF_FTYPE_V16SF_V16SF_INT:
38785 case V16SF_FTYPE_V16SF_V4SF_INT:
38786 case V16SI_FTYPE_V16SI_V4SI_INT:
38787 case V4DF_FTYPE_V4DF_V2DF_INT:
38788 case V4SF_FTYPE_V4SF_V4SF_INT:
38789 case V2DI_FTYPE_V2DI_V2DI_INT:
38790 case V4DI_FTYPE_V4DI_V2DI_INT:
38791 case V2DF_FTYPE_V2DF_V2DF_INT:
38792 case UQI_FTYPE_V8DI_V8UDI_INT:
38793 case UQI_FTYPE_V8DF_V8DF_INT:
38794 case UQI_FTYPE_V2DF_V2DF_INT:
38795 case UQI_FTYPE_V4SF_V4SF_INT:
38796 case UHI_FTYPE_V16SI_V16SI_INT:
38797 case UHI_FTYPE_V16SF_V16SF_INT:
38798 nargs = 3;
38799 nargs_constant = 1;
38800 break;
38801 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
38802 nargs = 3;
38803 rmode = V4DImode;
38804 nargs_constant = 1;
38805 break;
38806 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
38807 nargs = 3;
38808 rmode = V2DImode;
38809 nargs_constant = 1;
38810 break;
38811 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
38812 nargs = 3;
38813 rmode = DImode;
38814 nargs_constant = 1;
38815 break;
38816 case V2DI_FTYPE_V2DI_UINT_UINT:
38817 nargs = 3;
38818 nargs_constant = 2;
38819 break;
38820 case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
38821 nargs = 3;
38822 rmode = V8DImode;
38823 nargs_constant = 1;
38824 break;
38825 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT:
38826 nargs = 5;
38827 rmode = V8DImode;
38828 mask_pos = 2;
38829 nargs_constant = 1;
38830 break;
38831 case QI_FTYPE_V8DF_INT_UQI:
38832 case QI_FTYPE_V4DF_INT_UQI:
38833 case QI_FTYPE_V2DF_INT_UQI:
38834 case HI_FTYPE_V16SF_INT_UHI:
38835 case QI_FTYPE_V8SF_INT_UQI:
38836 case QI_FTYPE_V4SF_INT_UQI:
38837 nargs = 3;
38838 mask_pos = 1;
38839 nargs_constant = 1;
38840 break;
38841 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_USI_CONVERT:
38842 nargs = 5;
38843 rmode = V4DImode;
38844 mask_pos = 2;
38845 nargs_constant = 1;
38846 break;
38847 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UHI_CONVERT:
38848 nargs = 5;
38849 rmode = V2DImode;
38850 mask_pos = 2;
38851 nargs_constant = 1;
38852 break;
38853 case V32QI_FTYPE_V32QI_V32QI_V32QI_USI:
38854 case V32HI_FTYPE_V32HI_V32HI_V32HI_USI:
38855 case V32HI_FTYPE_V64QI_V64QI_V32HI_USI:
38856 case V16SI_FTYPE_V32HI_V32HI_V16SI_UHI:
38857 case V64QI_FTYPE_V64QI_V64QI_V64QI_UDI:
38858 case V32HI_FTYPE_V32HI_V8HI_V32HI_USI:
38859 case V16HI_FTYPE_V16HI_V8HI_V16HI_UHI:
38860 case V8SI_FTYPE_V8SI_V4SI_V8SI_UQI:
38861 case V4DI_FTYPE_V4DI_V2DI_V4DI_UQI:
38862 case V64QI_FTYPE_V32HI_V32HI_V64QI_UDI:
38863 case V32QI_FTYPE_V16HI_V16HI_V32QI_USI:
38864 case V16QI_FTYPE_V8HI_V8HI_V16QI_UHI:
38865 case V32HI_FTYPE_V16SI_V16SI_V32HI_USI:
38866 case V16HI_FTYPE_V8SI_V8SI_V16HI_UHI:
38867 case V8HI_FTYPE_V4SI_V4SI_V8HI_UQI:
38868 case V4DF_FTYPE_V4DF_V4DI_V4DF_UQI:
38869 case V8SF_FTYPE_V8SF_V8SI_V8SF_UQI:
38870 case V4SF_FTYPE_V4SF_V4SI_V4SF_UQI:
38871 case V2DF_FTYPE_V2DF_V2DI_V2DF_UQI:
38872 case V2DI_FTYPE_V4SI_V4SI_V2DI_UQI:
38873 case V4DI_FTYPE_V8SI_V8SI_V4DI_UQI:
38874 case V4DF_FTYPE_V4DI_V4DF_V4DF_UQI:
38875 case V8SF_FTYPE_V8SI_V8SF_V8SF_UQI:
38876 case V2DF_FTYPE_V2DI_V2DF_V2DF_UQI:
38877 case V4SF_FTYPE_V4SI_V4SF_V4SF_UQI:
38878 case V16SF_FTYPE_V16SF_V16SF_V16SF_UHI:
38879 case V16SF_FTYPE_V16SF_V16SI_V16SF_UHI:
38880 case V16SF_FTYPE_V16SI_V16SF_V16SF_UHI:
38881 case V16SI_FTYPE_V16SI_V16SI_V16SI_UHI:
38882 case V16SI_FTYPE_V16SI_V4SI_V16SI_UHI:
38883 case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI:
38884 case V8SI_FTYPE_V8SI_V8SI_V8SI_UQI:
38885 case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI:
38886 case V8SF_FTYPE_V8SF_V8SF_V8SF_UQI:
38887 case V16QI_FTYPE_V16QI_V16QI_V16QI_UHI:
38888 case V16HI_FTYPE_V16HI_V16HI_V16HI_UHI:
38889 case V2DI_FTYPE_V2DI_V2DI_V2DI_UQI:
38890 case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI:
38891 case V4DI_FTYPE_V4DI_V4DI_V4DI_UQI:
38892 case V4DF_FTYPE_V4DF_V4DF_V4DF_UQI:
38893 case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI:
38894 case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI:
38895 case V8DF_FTYPE_V8DF_V8DI_V8DF_UQI:
38896 case V8DF_FTYPE_V8DI_V8DF_V8DF_UQI:
38897 case V8DI_FTYPE_V16SI_V16SI_V8DI_UQI:
38898 case V8DI_FTYPE_V8DI_V2DI_V8DI_UQI:
38899 case V8DI_FTYPE_V8DI_V8DI_V8DI_UQI:
38900 case V8HI_FTYPE_V16QI_V16QI_V8HI_UQI:
38901 case V16HI_FTYPE_V32QI_V32QI_V16HI_UHI:
38902 case V8SI_FTYPE_V16HI_V16HI_V8SI_UQI:
38903 case V4SI_FTYPE_V8HI_V8HI_V4SI_UQI:
38904 nargs = 4;
38905 break;
38906 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
38907 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
38908 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
38909 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
38910 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
38911 nargs = 4;
38912 nargs_constant = 1;
38913 break;
38914 case UQI_FTYPE_V4DI_V4DI_INT_UQI:
38915 case UQI_FTYPE_V8SI_V8SI_INT_UQI:
38916 case QI_FTYPE_V4DF_V4DF_INT_UQI:
38917 case QI_FTYPE_V8SF_V8SF_INT_UQI:
38918 case UQI_FTYPE_V2DI_V2DI_INT_UQI:
38919 case UQI_FTYPE_V4SI_V4SI_INT_UQI:
38920 case UQI_FTYPE_V2DF_V2DF_INT_UQI:
38921 case UQI_FTYPE_V4SF_V4SF_INT_UQI:
38922 case UDI_FTYPE_V64QI_V64QI_INT_UDI:
38923 case USI_FTYPE_V32QI_V32QI_INT_USI:
38924 case UHI_FTYPE_V16QI_V16QI_INT_UHI:
38925 case USI_FTYPE_V32HI_V32HI_INT_USI:
38926 case UHI_FTYPE_V16HI_V16HI_INT_UHI:
38927 case UQI_FTYPE_V8HI_V8HI_INT_UQI:
38928 nargs = 4;
38929 mask_pos = 1;
38930 nargs_constant = 1;
38931 break;
38932 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
38933 nargs = 4;
38934 nargs_constant = 2;
38935 break;
38936 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
38937 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
38938 nargs = 4;
38939 break;
38940 case UQI_FTYPE_V8DI_V8DI_INT_UQI:
38941 case UHI_FTYPE_V16SI_V16SI_INT_UHI:
38942 mask_pos = 1;
38943 nargs = 4;
38944 nargs_constant = 1;
38945 break;
38946 case V8SF_FTYPE_V8SF_INT_V8SF_UQI:
38947 case V4SF_FTYPE_V4SF_INT_V4SF_UQI:
38948 case V2DF_FTYPE_V4DF_INT_V2DF_UQI:
38949 case V2DI_FTYPE_V4DI_INT_V2DI_UQI:
38950 case V8SF_FTYPE_V16SF_INT_V8SF_UQI:
38951 case V8SI_FTYPE_V16SI_INT_V8SI_UQI:
38952 case V2DF_FTYPE_V8DF_INT_V2DF_UQI:
38953 case V2DI_FTYPE_V8DI_INT_V2DI_UQI:
38954 case V4SF_FTYPE_V8SF_INT_V4SF_UQI:
38955 case V4SI_FTYPE_V8SI_INT_V4SI_UQI:
38956 case V8HI_FTYPE_V8SF_INT_V8HI_UQI:
38957 case V8HI_FTYPE_V4SF_INT_V8HI_UQI:
38958 case V32HI_FTYPE_V32HI_INT_V32HI_USI:
38959 case V16HI_FTYPE_V16HI_INT_V16HI_UHI:
38960 case V8HI_FTYPE_V8HI_INT_V8HI_UQI:
38961 case V4DI_FTYPE_V4DI_INT_V4DI_UQI:
38962 case V2DI_FTYPE_V2DI_INT_V2DI_UQI:
38963 case V8SI_FTYPE_V8SI_INT_V8SI_UQI:
38964 case V4SI_FTYPE_V4SI_INT_V4SI_UQI:
38965 case V4DF_FTYPE_V4DF_INT_V4DF_UQI:
38966 case V2DF_FTYPE_V2DF_INT_V2DF_UQI:
38967 case V8DF_FTYPE_V8DF_INT_V8DF_UQI:
38968 case V16SF_FTYPE_V16SF_INT_V16SF_UHI:
38969 case V16HI_FTYPE_V16SF_INT_V16HI_UHI:
38970 case V16SI_FTYPE_V16SI_INT_V16SI_UHI:
38971 case V4SI_FTYPE_V16SI_INT_V4SI_UQI:
38972 case V4DI_FTYPE_V8DI_INT_V4DI_UQI:
38973 case V4DF_FTYPE_V8DF_INT_V4DF_UQI:
38974 case V4SF_FTYPE_V16SF_INT_V4SF_UQI:
38975 case V8DI_FTYPE_V8DI_INT_V8DI_UQI:
38976 nargs = 4;
38977 mask_pos = 2;
38978 nargs_constant = 1;
38979 break;
38980 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI:
38981 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI:
38982 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI:
38983 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI:
38984 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI:
38985 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI:
38986 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI:
38987 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI:
38988 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI:
38989 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI:
38990 case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI:
38991 case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI:
38992 case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI:
38993 case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI:
38994 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI:
38995 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI:
38996 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_UQI:
38997 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UQI:
38998 case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI:
38999 case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_UHI:
39000 case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_UQI:
39001 case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI:
39002 case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI:
39003 case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_UQI:
39004 case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_UQI:
39005 case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_UQI:
39006 case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_UQI:
39007 nargs = 5;
39008 mask_pos = 2;
39009 nargs_constant = 1;
39010 break;
39011 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI:
39012 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI:
39013 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI:
39014 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI:
39015 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI:
39016 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI:
39017 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI:
39018 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI:
39019 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI:
39020 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI:
39021 nargs = 5;
39022 nargs = 5;
39023 mask_pos = 1;
39024 nargs_constant = 1;
39025 break;
39026
39027 default:
39028 gcc_unreachable ();
39029 }
39030
39031 gcc_assert (nargs <= ARRAY_SIZE (args));
39032
39033 if (comparison != UNKNOWN)
39034 {
39035 gcc_assert (nargs == 2);
39036 return ix86_expand_sse_compare (d, exp, target, swap);
39037 }
39038
39039 if (rmode == VOIDmode || rmode == tmode)
39040 {
39041 if (optimize
39042 || target == 0
39043 || GET_MODE (target) != tmode
39044 || !insn_p->operand[0].predicate (target, tmode))
39045 target = gen_reg_rtx (tmode);
39046 real_target = target;
39047 }
39048 else
39049 {
39050 real_target = gen_reg_rtx (tmode);
39051 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
39052 }
39053
39054 for (i = 0; i < nargs; i++)
39055 {
39056 tree arg = CALL_EXPR_ARG (exp, i);
39057 rtx op = expand_normal (arg);
39058 machine_mode mode = insn_p->operand[i + 1].mode;
39059 bool match = insn_p->operand[i + 1].predicate (op, mode);
39060
39061 if (last_arg_count && (i + 1) == nargs)
39062 {
39063 /* SIMD shift insns take either an 8-bit immediate or
39064 register as count. But builtin functions take int as
39065 count. If count doesn't match, we put it in register. */
39066 if (!match)
39067 {
39068 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
39069 if (!insn_p->operand[i + 1].predicate (op, mode))
39070 op = copy_to_reg (op);
39071 }
39072 }
39073 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
39074 (!mask_pos && (nargs - i) <= nargs_constant))
39075 {
39076 if (!match)
39077 switch (icode)
39078 {
39079 case CODE_FOR_avx_vinsertf128v4di:
39080 case CODE_FOR_avx_vextractf128v4di:
39081 error ("the last argument must be an 1-bit immediate");
39082 return const0_rtx;
39083
39084 case CODE_FOR_avx512f_cmpv8di3_mask:
39085 case CODE_FOR_avx512f_cmpv16si3_mask:
39086 case CODE_FOR_avx512f_ucmpv8di3_mask:
39087 case CODE_FOR_avx512f_ucmpv16si3_mask:
39088 case CODE_FOR_avx512vl_cmpv4di3_mask:
39089 case CODE_FOR_avx512vl_cmpv8si3_mask:
39090 case CODE_FOR_avx512vl_ucmpv4di3_mask:
39091 case CODE_FOR_avx512vl_ucmpv8si3_mask:
39092 case CODE_FOR_avx512vl_cmpv2di3_mask:
39093 case CODE_FOR_avx512vl_cmpv4si3_mask:
39094 case CODE_FOR_avx512vl_ucmpv2di3_mask:
39095 case CODE_FOR_avx512vl_ucmpv4si3_mask:
39096 error ("the last argument must be a 3-bit immediate");
39097 return const0_rtx;
39098
39099 case CODE_FOR_sse4_1_roundsd:
39100 case CODE_FOR_sse4_1_roundss:
39101
39102 case CODE_FOR_sse4_1_roundpd:
39103 case CODE_FOR_sse4_1_roundps:
39104 case CODE_FOR_avx_roundpd256:
39105 case CODE_FOR_avx_roundps256:
39106
39107 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
39108 case CODE_FOR_sse4_1_roundps_sfix:
39109 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
39110 case CODE_FOR_avx_roundps_sfix256:
39111
39112 case CODE_FOR_sse4_1_blendps:
39113 case CODE_FOR_avx_blendpd256:
39114 case CODE_FOR_avx_vpermilv4df:
39115 case CODE_FOR_avx_vpermilv4df_mask:
39116 case CODE_FOR_avx512f_getmantv8df_mask:
39117 case CODE_FOR_avx512f_getmantv16sf_mask:
39118 case CODE_FOR_avx512vl_getmantv8sf_mask:
39119 case CODE_FOR_avx512vl_getmantv4df_mask:
39120 case CODE_FOR_avx512vl_getmantv4sf_mask:
39121 case CODE_FOR_avx512vl_getmantv2df_mask:
39122 case CODE_FOR_avx512dq_rangepv8df_mask_round:
39123 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
39124 case CODE_FOR_avx512dq_rangepv4df_mask:
39125 case CODE_FOR_avx512dq_rangepv8sf_mask:
39126 case CODE_FOR_avx512dq_rangepv2df_mask:
39127 case CODE_FOR_avx512dq_rangepv4sf_mask:
39128 case CODE_FOR_avx_shufpd256_mask:
39129 error ("the last argument must be a 4-bit immediate");
39130 return const0_rtx;
39131
39132 case CODE_FOR_sha1rnds4:
39133 case CODE_FOR_sse4_1_blendpd:
39134 case CODE_FOR_avx_vpermilv2df:
39135 case CODE_FOR_avx_vpermilv2df_mask:
39136 case CODE_FOR_xop_vpermil2v2df3:
39137 case CODE_FOR_xop_vpermil2v4sf3:
39138 case CODE_FOR_xop_vpermil2v4df3:
39139 case CODE_FOR_xop_vpermil2v8sf3:
39140 case CODE_FOR_avx512f_vinsertf32x4_mask:
39141 case CODE_FOR_avx512f_vinserti32x4_mask:
39142 case CODE_FOR_avx512f_vextractf32x4_mask:
39143 case CODE_FOR_avx512f_vextracti32x4_mask:
39144 case CODE_FOR_sse2_shufpd:
39145 case CODE_FOR_sse2_shufpd_mask:
39146 case CODE_FOR_avx512dq_shuf_f64x2_mask:
39147 case CODE_FOR_avx512dq_shuf_i64x2_mask:
39148 case CODE_FOR_avx512vl_shuf_i32x4_mask:
39149 case CODE_FOR_avx512vl_shuf_f32x4_mask:
39150 error ("the last argument must be a 2-bit immediate");
39151 return const0_rtx;
39152
39153 case CODE_FOR_avx_vextractf128v4df:
39154 case CODE_FOR_avx_vextractf128v8sf:
39155 case CODE_FOR_avx_vextractf128v8si:
39156 case CODE_FOR_avx_vinsertf128v4df:
39157 case CODE_FOR_avx_vinsertf128v8sf:
39158 case CODE_FOR_avx_vinsertf128v8si:
39159 case CODE_FOR_avx512f_vinsertf64x4_mask:
39160 case CODE_FOR_avx512f_vinserti64x4_mask:
39161 case CODE_FOR_avx512f_vextractf64x4_mask:
39162 case CODE_FOR_avx512f_vextracti64x4_mask:
39163 case CODE_FOR_avx512dq_vinsertf32x8_mask:
39164 case CODE_FOR_avx512dq_vinserti32x8_mask:
39165 case CODE_FOR_avx512vl_vinsertv4df:
39166 case CODE_FOR_avx512vl_vinsertv4di:
39167 case CODE_FOR_avx512vl_vinsertv8sf:
39168 case CODE_FOR_avx512vl_vinsertv8si:
39169 error ("the last argument must be a 1-bit immediate");
39170 return const0_rtx;
39171
39172 case CODE_FOR_avx_vmcmpv2df3:
39173 case CODE_FOR_avx_vmcmpv4sf3:
39174 case CODE_FOR_avx_cmpv2df3:
39175 case CODE_FOR_avx_cmpv4sf3:
39176 case CODE_FOR_avx_cmpv4df3:
39177 case CODE_FOR_avx_cmpv8sf3:
39178 case CODE_FOR_avx512f_cmpv8df3_mask:
39179 case CODE_FOR_avx512f_cmpv16sf3_mask:
39180 case CODE_FOR_avx512f_vmcmpv2df3_mask:
39181 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
39182 error ("the last argument must be a 5-bit immediate");
39183 return const0_rtx;
39184
39185 default:
39186 switch (nargs_constant)
39187 {
39188 case 2:
39189 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
39190 (!mask_pos && (nargs - i) == nargs_constant))
39191 {
39192 error ("the next to last argument must be an 8-bit immediate");
39193 break;
39194 }
39195 case 1:
39196 error ("the last argument must be an 8-bit immediate");
39197 break;
39198 default:
39199 gcc_unreachable ();
39200 }
39201 return const0_rtx;
39202 }
39203 }
39204 else
39205 {
39206 if (VECTOR_MODE_P (mode))
39207 op = safe_vector_operand (op, mode);
39208
39209 /* If we aren't optimizing, only allow one memory operand to
39210 be generated. */
39211 if (memory_operand (op, mode))
39212 num_memory++;
39213
39214 op = fixup_modeless_constant (op, mode);
39215
39216 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
39217 {
39218 if (optimize || !match || num_memory > 1)
39219 op = copy_to_mode_reg (mode, op);
39220 }
39221 else
39222 {
39223 op = copy_to_reg (op);
39224 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
39225 }
39226 }
39227
39228 args[i].op = op;
39229 args[i].mode = mode;
39230 }
39231
39232 switch (nargs)
39233 {
39234 case 1:
39235 pat = GEN_FCN (icode) (real_target, args[0].op);
39236 break;
39237 case 2:
39238 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
39239 break;
39240 case 3:
39241 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
39242 args[2].op);
39243 break;
39244 case 4:
39245 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
39246 args[2].op, args[3].op);
39247 break;
39248 case 5:
39249 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
39250 args[2].op, args[3].op, args[4].op);
39251 case 6:
39252 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
39253 args[2].op, args[3].op, args[4].op,
39254 args[5].op);
39255 break;
39256 default:
39257 gcc_unreachable ();
39258 }
39259
39260 if (! pat)
39261 return 0;
39262
39263 emit_insn (pat);
39264 return target;
39265 }
39266
39267 /* Transform pattern of following layout:
39268 (parallel [
39269 set (A B)
39270 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
39271 ])
39272 into:
39273 (set (A B))
39274
39275 Or:
39276 (parallel [ A B
39277 ...
39278 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
39279 ...
39280 ])
39281 into:
39282 (parallel [ A B ... ]) */
39283
39284 static rtx
39285 ix86_erase_embedded_rounding (rtx pat)
39286 {
39287 if (GET_CODE (pat) == INSN)
39288 pat = PATTERN (pat);
39289
39290 gcc_assert (GET_CODE (pat) == PARALLEL);
39291
39292 if (XVECLEN (pat, 0) == 2)
39293 {
39294 rtx p0 = XVECEXP (pat, 0, 0);
39295 rtx p1 = XVECEXP (pat, 0, 1);
39296
39297 gcc_assert (GET_CODE (p0) == SET
39298 && GET_CODE (p1) == UNSPEC
39299 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
39300
39301 return p0;
39302 }
39303 else
39304 {
39305 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
39306 int i = 0;
39307 int j = 0;
39308
39309 for (; i < XVECLEN (pat, 0); ++i)
39310 {
39311 rtx elem = XVECEXP (pat, 0, i);
39312 if (GET_CODE (elem) != UNSPEC
39313 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
39314 res [j++] = elem;
39315 }
39316
39317 /* No more than 1 occurence was removed. */
39318 gcc_assert (j >= XVECLEN (pat, 0) - 1);
39319
39320 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
39321 }
39322 }
39323
39324 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
39325 with rounding. */
39326 static rtx
39327 ix86_expand_sse_comi_round (const struct builtin_description *d,
39328 tree exp, rtx target)
39329 {
39330 rtx pat, set_dst;
39331 tree arg0 = CALL_EXPR_ARG (exp, 0);
39332 tree arg1 = CALL_EXPR_ARG (exp, 1);
39333 tree arg2 = CALL_EXPR_ARG (exp, 2);
39334 tree arg3 = CALL_EXPR_ARG (exp, 3);
39335 rtx op0 = expand_normal (arg0);
39336 rtx op1 = expand_normal (arg1);
39337 rtx op2 = expand_normal (arg2);
39338 rtx op3 = expand_normal (arg3);
39339 enum insn_code icode = d->icode;
39340 const struct insn_data_d *insn_p = &insn_data[icode];
39341 machine_mode mode0 = insn_p->operand[0].mode;
39342 machine_mode mode1 = insn_p->operand[1].mode;
39343 enum rtx_code comparison = UNEQ;
39344 bool need_ucomi = false;
39345
39346 /* See avxintrin.h for values. */
39347 enum rtx_code comi_comparisons[32] =
39348 {
39349 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
39350 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
39351 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
39352 };
39353 bool need_ucomi_values[32] =
39354 {
39355 true, false, false, true, true, false, false, true,
39356 true, false, false, true, true, false, false, true,
39357 false, true, true, false, false, true, true, false,
39358 false, true, true, false, false, true, true, false
39359 };
39360
39361 if (!CONST_INT_P (op2))
39362 {
39363 error ("the third argument must be comparison constant");
39364 return const0_rtx;
39365 }
39366 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
39367 {
39368 error ("incorrect comparison mode");
39369 return const0_rtx;
39370 }
39371
39372 if (!insn_p->operand[2].predicate (op3, SImode))
39373 {
39374 error ("incorrect rounding operand");
39375 return const0_rtx;
39376 }
39377
39378 comparison = comi_comparisons[INTVAL (op2)];
39379 need_ucomi = need_ucomi_values[INTVAL (op2)];
39380
39381 if (VECTOR_MODE_P (mode0))
39382 op0 = safe_vector_operand (op0, mode0);
39383 if (VECTOR_MODE_P (mode1))
39384 op1 = safe_vector_operand (op1, mode1);
39385
39386 target = gen_reg_rtx (SImode);
39387 emit_move_insn (target, const0_rtx);
39388 target = gen_rtx_SUBREG (QImode, target, 0);
39389
39390 if ((optimize && !register_operand (op0, mode0))
39391 || !insn_p->operand[0].predicate (op0, mode0))
39392 op0 = copy_to_mode_reg (mode0, op0);
39393 if ((optimize && !register_operand (op1, mode1))
39394 || !insn_p->operand[1].predicate (op1, mode1))
39395 op1 = copy_to_mode_reg (mode1, op1);
39396
39397 if (need_ucomi)
39398 icode = icode == CODE_FOR_sse_comi_round
39399 ? CODE_FOR_sse_ucomi_round
39400 : CODE_FOR_sse2_ucomi_round;
39401
39402 pat = GEN_FCN (icode) (op0, op1, op3);
39403 if (! pat)
39404 return 0;
39405
39406 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
39407 if (INTVAL (op3) == NO_ROUND)
39408 {
39409 pat = ix86_erase_embedded_rounding (pat);
39410 if (! pat)
39411 return 0;
39412
39413 set_dst = SET_DEST (pat);
39414 }
39415 else
39416 {
39417 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
39418 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
39419 }
39420
39421 emit_insn (pat);
39422 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
39423 gen_rtx_fmt_ee (comparison, QImode,
39424 set_dst,
39425 const0_rtx)));
39426
39427 return SUBREG_REG (target);
39428 }
39429
39430 static rtx
39431 ix86_expand_round_builtin (const struct builtin_description *d,
39432 tree exp, rtx target)
39433 {
39434 rtx pat;
39435 unsigned int i, nargs;
39436 struct
39437 {
39438 rtx op;
39439 machine_mode mode;
39440 } args[6];
39441 enum insn_code icode = d->icode;
39442 const struct insn_data_d *insn_p = &insn_data[icode];
39443 machine_mode tmode = insn_p->operand[0].mode;
39444 unsigned int nargs_constant = 0;
39445 unsigned int redundant_embed_rnd = 0;
39446
39447 switch ((enum ix86_builtin_func_type) d->flag)
39448 {
39449 case UINT64_FTYPE_V2DF_INT:
39450 case UINT64_FTYPE_V4SF_INT:
39451 case UINT_FTYPE_V2DF_INT:
39452 case UINT_FTYPE_V4SF_INT:
39453 case INT64_FTYPE_V2DF_INT:
39454 case INT64_FTYPE_V4SF_INT:
39455 case INT_FTYPE_V2DF_INT:
39456 case INT_FTYPE_V4SF_INT:
39457 nargs = 2;
39458 break;
39459 case V4SF_FTYPE_V4SF_UINT_INT:
39460 case V4SF_FTYPE_V4SF_UINT64_INT:
39461 case V2DF_FTYPE_V2DF_UINT64_INT:
39462 case V4SF_FTYPE_V4SF_INT_INT:
39463 case V4SF_FTYPE_V4SF_INT64_INT:
39464 case V2DF_FTYPE_V2DF_INT64_INT:
39465 case V4SF_FTYPE_V4SF_V4SF_INT:
39466 case V2DF_FTYPE_V2DF_V2DF_INT:
39467 case V4SF_FTYPE_V4SF_V2DF_INT:
39468 case V2DF_FTYPE_V2DF_V4SF_INT:
39469 nargs = 3;
39470 break;
39471 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
39472 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
39473 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
39474 case V8DI_FTYPE_V8DF_V8DI_QI_INT:
39475 case V8SF_FTYPE_V8DI_V8SF_QI_INT:
39476 case V8DF_FTYPE_V8DI_V8DF_QI_INT:
39477 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
39478 case V8DI_FTYPE_V8SF_V8DI_QI_INT:
39479 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
39480 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
39481 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
39482 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
39483 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
39484 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
39485 nargs = 4;
39486 break;
39487 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
39488 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
39489 nargs_constant = 2;
39490 nargs = 4;
39491 break;
39492 case INT_FTYPE_V4SF_V4SF_INT_INT:
39493 case INT_FTYPE_V2DF_V2DF_INT_INT:
39494 return ix86_expand_sse_comi_round (d, exp, target);
39495 case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT:
39496 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
39497 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
39498 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
39499 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
39500 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
39501 nargs = 5;
39502 break;
39503 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
39504 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
39505 nargs_constant = 4;
39506 nargs = 5;
39507 break;
39508 case UQI_FTYPE_V8DF_V8DF_INT_UQI_INT:
39509 case UQI_FTYPE_V2DF_V2DF_INT_UQI_INT:
39510 case UHI_FTYPE_V16SF_V16SF_INT_UHI_INT:
39511 case UQI_FTYPE_V4SF_V4SF_INT_UQI_INT:
39512 nargs_constant = 3;
39513 nargs = 5;
39514 break;
39515 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
39516 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
39517 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
39518 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
39519 nargs = 6;
39520 nargs_constant = 4;
39521 break;
39522 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
39523 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
39524 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
39525 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
39526 nargs = 6;
39527 nargs_constant = 3;
39528 break;
39529 default:
39530 gcc_unreachable ();
39531 }
39532 gcc_assert (nargs <= ARRAY_SIZE (args));
39533
39534 if (optimize
39535 || target == 0
39536 || GET_MODE (target) != tmode
39537 || !insn_p->operand[0].predicate (target, tmode))
39538 target = gen_reg_rtx (tmode);
39539
39540 for (i = 0; i < nargs; i++)
39541 {
39542 tree arg = CALL_EXPR_ARG (exp, i);
39543 rtx op = expand_normal (arg);
39544 machine_mode mode = insn_p->operand[i + 1].mode;
39545 bool match = insn_p->operand[i + 1].predicate (op, mode);
39546
39547 if (i == nargs - nargs_constant)
39548 {
39549 if (!match)
39550 {
39551 switch (icode)
39552 {
39553 case CODE_FOR_avx512f_getmantv8df_mask_round:
39554 case CODE_FOR_avx512f_getmantv16sf_mask_round:
39555 case CODE_FOR_avx512f_vgetmantv2df_round:
39556 case CODE_FOR_avx512f_vgetmantv4sf_round:
39557 error ("the immediate argument must be a 4-bit immediate");
39558 return const0_rtx;
39559 case CODE_FOR_avx512f_cmpv8df3_mask_round:
39560 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
39561 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
39562 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
39563 error ("the immediate argument must be a 5-bit immediate");
39564 return const0_rtx;
39565 default:
39566 error ("the immediate argument must be an 8-bit immediate");
39567 return const0_rtx;
39568 }
39569 }
39570 }
39571 else if (i == nargs-1)
39572 {
39573 if (!insn_p->operand[nargs].predicate (op, SImode))
39574 {
39575 error ("incorrect rounding operand");
39576 return const0_rtx;
39577 }
39578
39579 /* If there is no rounding use normal version of the pattern. */
39580 if (INTVAL (op) == NO_ROUND)
39581 redundant_embed_rnd = 1;
39582 }
39583 else
39584 {
39585 if (VECTOR_MODE_P (mode))
39586 op = safe_vector_operand (op, mode);
39587
39588 op = fixup_modeless_constant (op, mode);
39589
39590 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
39591 {
39592 if (optimize || !match)
39593 op = copy_to_mode_reg (mode, op);
39594 }
39595 else
39596 {
39597 op = copy_to_reg (op);
39598 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
39599 }
39600 }
39601
39602 args[i].op = op;
39603 args[i].mode = mode;
39604 }
39605
39606 switch (nargs)
39607 {
39608 case 1:
39609 pat = GEN_FCN (icode) (target, args[0].op);
39610 break;
39611 case 2:
39612 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
39613 break;
39614 case 3:
39615 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
39616 args[2].op);
39617 break;
39618 case 4:
39619 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
39620 args[2].op, args[3].op);
39621 break;
39622 case 5:
39623 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
39624 args[2].op, args[3].op, args[4].op);
39625 case 6:
39626 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
39627 args[2].op, args[3].op, args[4].op,
39628 args[5].op);
39629 break;
39630 default:
39631 gcc_unreachable ();
39632 }
39633
39634 if (!pat)
39635 return 0;
39636
39637 if (redundant_embed_rnd)
39638 pat = ix86_erase_embedded_rounding (pat);
39639
39640 emit_insn (pat);
39641 return target;
39642 }
39643
39644 /* Subroutine of ix86_expand_builtin to take care of special insns
39645 with variable number of operands. */
39646
39647 static rtx
39648 ix86_expand_special_args_builtin (const struct builtin_description *d,
39649 tree exp, rtx target)
39650 {
39651 tree arg;
39652 rtx pat, op;
39653 unsigned int i, nargs, arg_adjust, memory;
39654 bool aligned_mem = false;
39655 struct
39656 {
39657 rtx op;
39658 machine_mode mode;
39659 } args[3];
39660 enum insn_code icode = d->icode;
39661 bool last_arg_constant = false;
39662 const struct insn_data_d *insn_p = &insn_data[icode];
39663 machine_mode tmode = insn_p->operand[0].mode;
39664 enum { load, store } klass;
39665
39666 switch ((enum ix86_builtin_func_type) d->flag)
39667 {
39668 case VOID_FTYPE_VOID:
39669 emit_insn (GEN_FCN (icode) (target));
39670 return 0;
39671 case VOID_FTYPE_UINT64:
39672 case VOID_FTYPE_UNSIGNED:
39673 nargs = 0;
39674 klass = store;
39675 memory = 0;
39676 break;
39677
39678 case INT_FTYPE_VOID:
39679 case USHORT_FTYPE_VOID:
39680 case UINT64_FTYPE_VOID:
39681 case UNSIGNED_FTYPE_VOID:
39682 nargs = 0;
39683 klass = load;
39684 memory = 0;
39685 break;
39686 case UINT64_FTYPE_PUNSIGNED:
39687 case V2DI_FTYPE_PV2DI:
39688 case V4DI_FTYPE_PV4DI:
39689 case V32QI_FTYPE_PCCHAR:
39690 case V16QI_FTYPE_PCCHAR:
39691 case V8SF_FTYPE_PCV4SF:
39692 case V8SF_FTYPE_PCFLOAT:
39693 case V4SF_FTYPE_PCFLOAT:
39694 case V4DF_FTYPE_PCV2DF:
39695 case V4DF_FTYPE_PCDOUBLE:
39696 case V2DF_FTYPE_PCDOUBLE:
39697 case VOID_FTYPE_PVOID:
39698 case V8DI_FTYPE_PV8DI:
39699 nargs = 1;
39700 klass = load;
39701 memory = 0;
39702 switch (icode)
39703 {
39704 case CODE_FOR_sse4_1_movntdqa:
39705 case CODE_FOR_avx2_movntdqa:
39706 case CODE_FOR_avx512f_movntdqa:
39707 aligned_mem = true;
39708 break;
39709 default:
39710 break;
39711 }
39712 break;
39713 case VOID_FTYPE_PV2SF_V4SF:
39714 case VOID_FTYPE_PV8DI_V8DI:
39715 case VOID_FTYPE_PV4DI_V4DI:
39716 case VOID_FTYPE_PV2DI_V2DI:
39717 case VOID_FTYPE_PCHAR_V32QI:
39718 case VOID_FTYPE_PCHAR_V16QI:
39719 case VOID_FTYPE_PFLOAT_V16SF:
39720 case VOID_FTYPE_PFLOAT_V8SF:
39721 case VOID_FTYPE_PFLOAT_V4SF:
39722 case VOID_FTYPE_PDOUBLE_V8DF:
39723 case VOID_FTYPE_PDOUBLE_V4DF:
39724 case VOID_FTYPE_PDOUBLE_V2DF:
39725 case VOID_FTYPE_PLONGLONG_LONGLONG:
39726 case VOID_FTYPE_PULONGLONG_ULONGLONG:
39727 case VOID_FTYPE_PINT_INT:
39728 nargs = 1;
39729 klass = store;
39730 /* Reserve memory operand for target. */
39731 memory = ARRAY_SIZE (args);
39732 switch (icode)
39733 {
39734 /* These builtins and instructions require the memory
39735 to be properly aligned. */
39736 case CODE_FOR_avx_movntv4di:
39737 case CODE_FOR_sse2_movntv2di:
39738 case CODE_FOR_avx_movntv8sf:
39739 case CODE_FOR_sse_movntv4sf:
39740 case CODE_FOR_sse4a_vmmovntv4sf:
39741 case CODE_FOR_avx_movntv4df:
39742 case CODE_FOR_sse2_movntv2df:
39743 case CODE_FOR_sse4a_vmmovntv2df:
39744 case CODE_FOR_sse2_movntidi:
39745 case CODE_FOR_sse_movntq:
39746 case CODE_FOR_sse2_movntisi:
39747 case CODE_FOR_avx512f_movntv16sf:
39748 case CODE_FOR_avx512f_movntv8df:
39749 case CODE_FOR_avx512f_movntv8di:
39750 aligned_mem = true;
39751 break;
39752 default:
39753 break;
39754 }
39755 break;
39756 case V4SF_FTYPE_V4SF_PCV2SF:
39757 case V2DF_FTYPE_V2DF_PCDOUBLE:
39758 nargs = 2;
39759 klass = load;
39760 memory = 1;
39761 break;
39762 case V8SF_FTYPE_PCV8SF_V8SI:
39763 case V4DF_FTYPE_PCV4DF_V4DI:
39764 case V4SF_FTYPE_PCV4SF_V4SI:
39765 case V2DF_FTYPE_PCV2DF_V2DI:
39766 case V8SI_FTYPE_PCV8SI_V8SI:
39767 case V4DI_FTYPE_PCV4DI_V4DI:
39768 case V4SI_FTYPE_PCV4SI_V4SI:
39769 case V2DI_FTYPE_PCV2DI_V2DI:
39770 nargs = 2;
39771 klass = load;
39772 memory = 0;
39773 break;
39774 case VOID_FTYPE_PV8DF_V8DF_UQI:
39775 case VOID_FTYPE_PV16SF_V16SF_UHI:
39776 case VOID_FTYPE_PV8DI_V8DI_UQI:
39777 case VOID_FTYPE_PV4DI_V4DI_UQI:
39778 case VOID_FTYPE_PV2DI_V2DI_UQI:
39779 case VOID_FTYPE_PV16SI_V16SI_UHI:
39780 case VOID_FTYPE_PV8SI_V8SI_UQI:
39781 case VOID_FTYPE_PV4SI_V4SI_UQI:
39782 switch (icode)
39783 {
39784 /* These builtins and instructions require the memory
39785 to be properly aligned. */
39786 case CODE_FOR_avx512f_storev16sf_mask:
39787 case CODE_FOR_avx512f_storev16si_mask:
39788 case CODE_FOR_avx512f_storev8df_mask:
39789 case CODE_FOR_avx512f_storev8di_mask:
39790 case CODE_FOR_avx512vl_storev8sf_mask:
39791 case CODE_FOR_avx512vl_storev8si_mask:
39792 case CODE_FOR_avx512vl_storev4df_mask:
39793 case CODE_FOR_avx512vl_storev4di_mask:
39794 case CODE_FOR_avx512vl_storev4sf_mask:
39795 case CODE_FOR_avx512vl_storev4si_mask:
39796 case CODE_FOR_avx512vl_storev2df_mask:
39797 case CODE_FOR_avx512vl_storev2di_mask:
39798 aligned_mem = true;
39799 break;
39800 default:
39801 break;
39802 }
39803 /* FALLTHRU */
39804 case VOID_FTYPE_PV8SF_V8SI_V8SF:
39805 case VOID_FTYPE_PV4DF_V4DI_V4DF:
39806 case VOID_FTYPE_PV4SF_V4SI_V4SF:
39807 case VOID_FTYPE_PV2DF_V2DI_V2DF:
39808 case VOID_FTYPE_PV8SI_V8SI_V8SI:
39809 case VOID_FTYPE_PV4DI_V4DI_V4DI:
39810 case VOID_FTYPE_PV4SI_V4SI_V4SI:
39811 case VOID_FTYPE_PV2DI_V2DI_V2DI:
39812 case VOID_FTYPE_PV8SI_V8DI_UQI:
39813 case VOID_FTYPE_PV8HI_V8DI_UQI:
39814 case VOID_FTYPE_PV16HI_V16SI_UHI:
39815 case VOID_FTYPE_PV16QI_V8DI_UQI:
39816 case VOID_FTYPE_PV16QI_V16SI_UHI:
39817 case VOID_FTYPE_PV4SI_V4DI_UQI:
39818 case VOID_FTYPE_PV4SI_V2DI_UQI:
39819 case VOID_FTYPE_PV8HI_V4DI_UQI:
39820 case VOID_FTYPE_PV8HI_V2DI_UQI:
39821 case VOID_FTYPE_PV8HI_V8SI_UQI:
39822 case VOID_FTYPE_PV8HI_V4SI_UQI:
39823 case VOID_FTYPE_PV16QI_V4DI_UQI:
39824 case VOID_FTYPE_PV16QI_V2DI_UQI:
39825 case VOID_FTYPE_PV16QI_V8SI_UQI:
39826 case VOID_FTYPE_PV16QI_V4SI_UQI:
39827 case VOID_FTYPE_PV8HI_V8HI_UQI:
39828 case VOID_FTYPE_PV16HI_V16HI_UHI:
39829 case VOID_FTYPE_PV32HI_V32HI_USI:
39830 case VOID_FTYPE_PV16QI_V16QI_UHI:
39831 case VOID_FTYPE_PV32QI_V32QI_USI:
39832 case VOID_FTYPE_PV64QI_V64QI_UDI:
39833 case VOID_FTYPE_PV4DF_V4DF_UQI:
39834 case VOID_FTYPE_PV2DF_V2DF_UQI:
39835 case VOID_FTYPE_PV8SF_V8SF_UQI:
39836 case VOID_FTYPE_PV4SF_V4SF_UQI:
39837 nargs = 2;
39838 klass = store;
39839 /* Reserve memory operand for target. */
39840 memory = ARRAY_SIZE (args);
39841 break;
39842 case V4SF_FTYPE_PCV4SF_V4SF_UQI:
39843 case V8SF_FTYPE_PCV8SF_V8SF_UQI:
39844 case V16SF_FTYPE_PCV16SF_V16SF_UHI:
39845 case V4SI_FTYPE_PCV4SI_V4SI_UQI:
39846 case V8SI_FTYPE_PCV8SI_V8SI_UQI:
39847 case V16SI_FTYPE_PCV16SI_V16SI_UHI:
39848 case V2DF_FTYPE_PCV2DF_V2DF_UQI:
39849 case V4DF_FTYPE_PCV4DF_V4DF_UQI:
39850 case V8DF_FTYPE_PCV8DF_V8DF_UQI:
39851 case V2DI_FTYPE_PCV2DI_V2DI_UQI:
39852 case V4DI_FTYPE_PCV4DI_V4DI_UQI:
39853 case V8DI_FTYPE_PCV8DI_V8DI_UQI:
39854 case V8HI_FTYPE_PCV8HI_V8HI_UQI:
39855 case V16HI_FTYPE_PCV16HI_V16HI_UHI:
39856 case V32HI_FTYPE_PCV32HI_V32HI_USI:
39857 case V16QI_FTYPE_PCV16QI_V16QI_UHI:
39858 case V32QI_FTYPE_PCV32QI_V32QI_USI:
39859 case V64QI_FTYPE_PCV64QI_V64QI_UDI:
39860 nargs = 3;
39861 klass = load;
39862 memory = 0;
39863 switch (icode)
39864 {
39865 /* These builtins and instructions require the memory
39866 to be properly aligned. */
39867 case CODE_FOR_avx512f_loadv16sf_mask:
39868 case CODE_FOR_avx512f_loadv16si_mask:
39869 case CODE_FOR_avx512f_loadv8df_mask:
39870 case CODE_FOR_avx512f_loadv8di_mask:
39871 case CODE_FOR_avx512vl_loadv8sf_mask:
39872 case CODE_FOR_avx512vl_loadv8si_mask:
39873 case CODE_FOR_avx512vl_loadv4df_mask:
39874 case CODE_FOR_avx512vl_loadv4di_mask:
39875 case CODE_FOR_avx512vl_loadv4sf_mask:
39876 case CODE_FOR_avx512vl_loadv4si_mask:
39877 case CODE_FOR_avx512vl_loadv2df_mask:
39878 case CODE_FOR_avx512vl_loadv2di_mask:
39879 case CODE_FOR_avx512bw_loadv64qi_mask:
39880 case CODE_FOR_avx512vl_loadv32qi_mask:
39881 case CODE_FOR_avx512vl_loadv16qi_mask:
39882 case CODE_FOR_avx512bw_loadv32hi_mask:
39883 case CODE_FOR_avx512vl_loadv16hi_mask:
39884 case CODE_FOR_avx512vl_loadv8hi_mask:
39885 aligned_mem = true;
39886 break;
39887 default:
39888 break;
39889 }
39890 break;
39891 case VOID_FTYPE_UINT_UINT_UINT:
39892 case VOID_FTYPE_UINT64_UINT_UINT:
39893 case UCHAR_FTYPE_UINT_UINT_UINT:
39894 case UCHAR_FTYPE_UINT64_UINT_UINT:
39895 nargs = 3;
39896 klass = load;
39897 memory = ARRAY_SIZE (args);
39898 last_arg_constant = true;
39899 break;
39900 default:
39901 gcc_unreachable ();
39902 }
39903
39904 gcc_assert (nargs <= ARRAY_SIZE (args));
39905
39906 if (klass == store)
39907 {
39908 arg = CALL_EXPR_ARG (exp, 0);
39909 op = expand_normal (arg);
39910 gcc_assert (target == 0);
39911 if (memory)
39912 {
39913 op = ix86_zero_extend_to_Pmode (op);
39914 target = gen_rtx_MEM (tmode, op);
39915 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
39916 on it. Try to improve it using get_pointer_alignment,
39917 and if the special builtin is one that requires strict
39918 mode alignment, also from it's GET_MODE_ALIGNMENT.
39919 Failure to do so could lead to ix86_legitimate_combined_insn
39920 rejecting all changes to such insns. */
39921 unsigned int align = get_pointer_alignment (arg);
39922 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
39923 align = GET_MODE_ALIGNMENT (tmode);
39924 if (MEM_ALIGN (target) < align)
39925 set_mem_align (target, align);
39926 }
39927 else
39928 target = force_reg (tmode, op);
39929 arg_adjust = 1;
39930 }
39931 else
39932 {
39933 arg_adjust = 0;
39934 if (optimize
39935 || target == 0
39936 || !register_operand (target, tmode)
39937 || GET_MODE (target) != tmode)
39938 target = gen_reg_rtx (tmode);
39939 }
39940
39941 for (i = 0; i < nargs; i++)
39942 {
39943 machine_mode mode = insn_p->operand[i + 1].mode;
39944 bool match;
39945
39946 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
39947 op = expand_normal (arg);
39948 match = insn_p->operand[i + 1].predicate (op, mode);
39949
39950 if (last_arg_constant && (i + 1) == nargs)
39951 {
39952 if (!match)
39953 {
39954 if (icode == CODE_FOR_lwp_lwpvalsi3
39955 || icode == CODE_FOR_lwp_lwpinssi3
39956 || icode == CODE_FOR_lwp_lwpvaldi3
39957 || icode == CODE_FOR_lwp_lwpinsdi3)
39958 error ("the last argument must be a 32-bit immediate");
39959 else
39960 error ("the last argument must be an 8-bit immediate");
39961 return const0_rtx;
39962 }
39963 }
39964 else
39965 {
39966 if (i == memory)
39967 {
39968 /* This must be the memory operand. */
39969 op = ix86_zero_extend_to_Pmode (op);
39970 op = gen_rtx_MEM (mode, op);
39971 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
39972 on it. Try to improve it using get_pointer_alignment,
39973 and if the special builtin is one that requires strict
39974 mode alignment, also from it's GET_MODE_ALIGNMENT.
39975 Failure to do so could lead to ix86_legitimate_combined_insn
39976 rejecting all changes to such insns. */
39977 unsigned int align = get_pointer_alignment (arg);
39978 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
39979 align = GET_MODE_ALIGNMENT (mode);
39980 if (MEM_ALIGN (op) < align)
39981 set_mem_align (op, align);
39982 }
39983 else
39984 {
39985 /* This must be register. */
39986 if (VECTOR_MODE_P (mode))
39987 op = safe_vector_operand (op, mode);
39988
39989 op = fixup_modeless_constant (op, mode);
39990
39991 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
39992 op = copy_to_mode_reg (mode, op);
39993 else
39994 {
39995 op = copy_to_reg (op);
39996 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
39997 }
39998 }
39999 }
40000
40001 args[i].op = op;
40002 args[i].mode = mode;
40003 }
40004
40005 switch (nargs)
40006 {
40007 case 0:
40008 pat = GEN_FCN (icode) (target);
40009 break;
40010 case 1:
40011 pat = GEN_FCN (icode) (target, args[0].op);
40012 break;
40013 case 2:
40014 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
40015 break;
40016 case 3:
40017 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
40018 break;
40019 default:
40020 gcc_unreachable ();
40021 }
40022
40023 if (! pat)
40024 return 0;
40025 emit_insn (pat);
40026 return klass == store ? 0 : target;
40027 }
40028
40029 /* Return the integer constant in ARG. Constrain it to be in the range
40030 of the subparts of VEC_TYPE; issue an error if not. */
40031
40032 static int
40033 get_element_number (tree vec_type, tree arg)
40034 {
40035 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
40036
40037 if (!tree_fits_uhwi_p (arg)
40038 || (elt = tree_to_uhwi (arg), elt > max))
40039 {
40040 error ("selector must be an integer constant in the range 0..%wi", max);
40041 return 0;
40042 }
40043
40044 return elt;
40045 }
40046
40047 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
40048 ix86_expand_vector_init. We DO have language-level syntax for this, in
40049 the form of (type){ init-list }. Except that since we can't place emms
40050 instructions from inside the compiler, we can't allow the use of MMX
40051 registers unless the user explicitly asks for it. So we do *not* define
40052 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
40053 we have builtins invoked by mmintrin.h that gives us license to emit
40054 these sorts of instructions. */
40055
40056 static rtx
40057 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
40058 {
40059 machine_mode tmode = TYPE_MODE (type);
40060 machine_mode inner_mode = GET_MODE_INNER (tmode);
40061 int i, n_elt = GET_MODE_NUNITS (tmode);
40062 rtvec v = rtvec_alloc (n_elt);
40063
40064 gcc_assert (VECTOR_MODE_P (tmode));
40065 gcc_assert (call_expr_nargs (exp) == n_elt);
40066
40067 for (i = 0; i < n_elt; ++i)
40068 {
40069 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
40070 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
40071 }
40072
40073 if (!target || !register_operand (target, tmode))
40074 target = gen_reg_rtx (tmode);
40075
40076 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
40077 return target;
40078 }
40079
40080 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
40081 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
40082 had a language-level syntax for referencing vector elements. */
40083
40084 static rtx
40085 ix86_expand_vec_ext_builtin (tree exp, rtx target)
40086 {
40087 machine_mode tmode, mode0;
40088 tree arg0, arg1;
40089 int elt;
40090 rtx op0;
40091
40092 arg0 = CALL_EXPR_ARG (exp, 0);
40093 arg1 = CALL_EXPR_ARG (exp, 1);
40094
40095 op0 = expand_normal (arg0);
40096 elt = get_element_number (TREE_TYPE (arg0), arg1);
40097
40098 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
40099 mode0 = TYPE_MODE (TREE_TYPE (arg0));
40100 gcc_assert (VECTOR_MODE_P (mode0));
40101
40102 op0 = force_reg (mode0, op0);
40103
40104 if (optimize || !target || !register_operand (target, tmode))
40105 target = gen_reg_rtx (tmode);
40106
40107 ix86_expand_vector_extract (true, target, op0, elt);
40108
40109 return target;
40110 }
40111
40112 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
40113 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
40114 a language-level syntax for referencing vector elements. */
40115
40116 static rtx
40117 ix86_expand_vec_set_builtin (tree exp)
40118 {
40119 machine_mode tmode, mode1;
40120 tree arg0, arg1, arg2;
40121 int elt;
40122 rtx op0, op1, target;
40123
40124 arg0 = CALL_EXPR_ARG (exp, 0);
40125 arg1 = CALL_EXPR_ARG (exp, 1);
40126 arg2 = CALL_EXPR_ARG (exp, 2);
40127
40128 tmode = TYPE_MODE (TREE_TYPE (arg0));
40129 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
40130 gcc_assert (VECTOR_MODE_P (tmode));
40131
40132 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
40133 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
40134 elt = get_element_number (TREE_TYPE (arg0), arg2);
40135
40136 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
40137 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
40138
40139 op0 = force_reg (tmode, op0);
40140 op1 = force_reg (mode1, op1);
40141
40142 /* OP0 is the source of these builtin functions and shouldn't be
40143 modified. Create a copy, use it and return it as target. */
40144 target = gen_reg_rtx (tmode);
40145 emit_move_insn (target, op0);
40146 ix86_expand_vector_set (true, target, op1, elt);
40147
40148 return target;
40149 }
40150
40151 /* Emit conditional move of SRC to DST with condition
40152 OP1 CODE OP2. */
40153 static void
40154 ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
40155 {
40156 rtx t;
40157
40158 if (TARGET_CMOVE)
40159 {
40160 t = ix86_expand_compare (code, op1, op2);
40161 emit_insn (gen_rtx_SET (dst, gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
40162 src, dst)));
40163 }
40164 else
40165 {
40166 rtx_code_label *nomove = gen_label_rtx ();
40167 emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
40168 const0_rtx, GET_MODE (op1), 1, nomove);
40169 emit_move_insn (dst, src);
40170 emit_label (nomove);
40171 }
40172 }
40173
40174 /* Choose max of DST and SRC and put it to DST. */
40175 static void
40176 ix86_emit_move_max (rtx dst, rtx src)
40177 {
40178 ix86_emit_cmove (dst, src, LTU, dst, src);
40179 }
40180
40181 /* Expand an expression EXP that calls a built-in function,
40182 with result going to TARGET if that's convenient
40183 (and in mode MODE if that's convenient).
40184 SUBTARGET may be used as the target for computing one of EXP's operands.
40185 IGNORE is nonzero if the value is to be ignored. */
40186
40187 static rtx
40188 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
40189 machine_mode mode, int ignore)
40190 {
40191 const struct builtin_description *d;
40192 size_t i;
40193 enum insn_code icode;
40194 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
40195 tree arg0, arg1, arg2, arg3, arg4;
40196 rtx op0, op1, op2, op3, op4, pat, insn;
40197 machine_mode mode0, mode1, mode2, mode3, mode4;
40198 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
40199
40200 /* For CPU builtins that can be folded, fold first and expand the fold. */
40201 switch (fcode)
40202 {
40203 case IX86_BUILTIN_CPU_INIT:
40204 {
40205 /* Make it call __cpu_indicator_init in libgcc. */
40206 tree call_expr, fndecl, type;
40207 type = build_function_type_list (integer_type_node, NULL_TREE);
40208 fndecl = build_fn_decl ("__cpu_indicator_init", type);
40209 call_expr = build_call_expr (fndecl, 0);
40210 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
40211 }
40212 case IX86_BUILTIN_CPU_IS:
40213 case IX86_BUILTIN_CPU_SUPPORTS:
40214 {
40215 tree arg0 = CALL_EXPR_ARG (exp, 0);
40216 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
40217 gcc_assert (fold_expr != NULL_TREE);
40218 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
40219 }
40220 }
40221
40222 /* Determine whether the builtin function is available under the current ISA.
40223 Originally the builtin was not created if it wasn't applicable to the
40224 current ISA based on the command line switches. With function specific
40225 options, we need to check in the context of the function making the call
40226 whether it is supported. */
40227 if (ix86_builtins_isa[fcode].isa
40228 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
40229 {
40230 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
40231 NULL, (enum fpmath_unit) 0, false);
40232
40233 if (!opts)
40234 error ("%qE needs unknown isa option", fndecl);
40235 else
40236 {
40237 gcc_assert (opts != NULL);
40238 error ("%qE needs isa option %s", fndecl, opts);
40239 free (opts);
40240 }
40241 return const0_rtx;
40242 }
40243
40244 switch (fcode)
40245 {
40246 case IX86_BUILTIN_BNDMK:
40247 if (!target
40248 || GET_MODE (target) != BNDmode
40249 || !register_operand (target, BNDmode))
40250 target = gen_reg_rtx (BNDmode);
40251
40252 arg0 = CALL_EXPR_ARG (exp, 0);
40253 arg1 = CALL_EXPR_ARG (exp, 1);
40254
40255 op0 = expand_normal (arg0);
40256 op1 = expand_normal (arg1);
40257
40258 if (!register_operand (op0, Pmode))
40259 op0 = ix86_zero_extend_to_Pmode (op0);
40260 if (!register_operand (op1, Pmode))
40261 op1 = ix86_zero_extend_to_Pmode (op1);
40262
40263 /* Builtin arg1 is size of block but instruction op1 should
40264 be (size - 1). */
40265 op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
40266 NULL_RTX, 1, OPTAB_DIRECT);
40267
40268 emit_insn (BNDmode == BND64mode
40269 ? gen_bnd64_mk (target, op0, op1)
40270 : gen_bnd32_mk (target, op0, op1));
40271 return target;
40272
40273 case IX86_BUILTIN_BNDSTX:
40274 arg0 = CALL_EXPR_ARG (exp, 0);
40275 arg1 = CALL_EXPR_ARG (exp, 1);
40276 arg2 = CALL_EXPR_ARG (exp, 2);
40277
40278 op0 = expand_normal (arg0);
40279 op1 = expand_normal (arg1);
40280 op2 = expand_normal (arg2);
40281
40282 if (!register_operand (op0, Pmode))
40283 op0 = ix86_zero_extend_to_Pmode (op0);
40284 if (!register_operand (op1, BNDmode))
40285 op1 = copy_to_mode_reg (BNDmode, op1);
40286 if (!register_operand (op2, Pmode))
40287 op2 = ix86_zero_extend_to_Pmode (op2);
40288
40289 emit_insn (BNDmode == BND64mode
40290 ? gen_bnd64_stx (op2, op0, op1)
40291 : gen_bnd32_stx (op2, op0, op1));
40292 return 0;
40293
40294 case IX86_BUILTIN_BNDLDX:
40295 if (!target
40296 || GET_MODE (target) != BNDmode
40297 || !register_operand (target, BNDmode))
40298 target = gen_reg_rtx (BNDmode);
40299
40300 arg0 = CALL_EXPR_ARG (exp, 0);
40301 arg1 = CALL_EXPR_ARG (exp, 1);
40302
40303 op0 = expand_normal (arg0);
40304 op1 = expand_normal (arg1);
40305
40306 if (!register_operand (op0, Pmode))
40307 op0 = ix86_zero_extend_to_Pmode (op0);
40308 if (!register_operand (op1, Pmode))
40309 op1 = ix86_zero_extend_to_Pmode (op1);
40310
40311 emit_insn (BNDmode == BND64mode
40312 ? gen_bnd64_ldx (target, op0, op1)
40313 : gen_bnd32_ldx (target, op0, op1));
40314 return target;
40315
40316 case IX86_BUILTIN_BNDCL:
40317 arg0 = CALL_EXPR_ARG (exp, 0);
40318 arg1 = CALL_EXPR_ARG (exp, 1);
40319
40320 op0 = expand_normal (arg0);
40321 op1 = expand_normal (arg1);
40322
40323 if (!register_operand (op0, Pmode))
40324 op0 = ix86_zero_extend_to_Pmode (op0);
40325 if (!register_operand (op1, BNDmode))
40326 op1 = copy_to_mode_reg (BNDmode, op1);
40327
40328 emit_insn (BNDmode == BND64mode
40329 ? gen_bnd64_cl (op1, op0)
40330 : gen_bnd32_cl (op1, op0));
40331 return 0;
40332
40333 case IX86_BUILTIN_BNDCU:
40334 arg0 = CALL_EXPR_ARG (exp, 0);
40335 arg1 = CALL_EXPR_ARG (exp, 1);
40336
40337 op0 = expand_normal (arg0);
40338 op1 = expand_normal (arg1);
40339
40340 if (!register_operand (op0, Pmode))
40341 op0 = ix86_zero_extend_to_Pmode (op0);
40342 if (!register_operand (op1, BNDmode))
40343 op1 = copy_to_mode_reg (BNDmode, op1);
40344
40345 emit_insn (BNDmode == BND64mode
40346 ? gen_bnd64_cu (op1, op0)
40347 : gen_bnd32_cu (op1, op0));
40348 return 0;
40349
40350 case IX86_BUILTIN_BNDRET:
40351 arg0 = CALL_EXPR_ARG (exp, 0);
40352 gcc_assert (TREE_CODE (arg0) == SSA_NAME);
40353 target = chkp_get_rtl_bounds (arg0);
40354
40355 /* If no bounds were specified for returned value,
40356 then use INIT bounds. It usually happens when
40357 some built-in function is expanded. */
40358 if (!target)
40359 {
40360 rtx t1 = gen_reg_rtx (Pmode);
40361 rtx t2 = gen_reg_rtx (Pmode);
40362 target = gen_reg_rtx (BNDmode);
40363 emit_move_insn (t1, const0_rtx);
40364 emit_move_insn (t2, constm1_rtx);
40365 emit_insn (BNDmode == BND64mode
40366 ? gen_bnd64_mk (target, t1, t2)
40367 : gen_bnd32_mk (target, t1, t2));
40368 }
40369
40370 gcc_assert (target && REG_P (target));
40371 return target;
40372
40373 case IX86_BUILTIN_BNDNARROW:
40374 {
40375 rtx m1, m1h1, m1h2, lb, ub, t1;
40376
40377 /* Return value and lb. */
40378 arg0 = CALL_EXPR_ARG (exp, 0);
40379 /* Bounds. */
40380 arg1 = CALL_EXPR_ARG (exp, 1);
40381 /* Size. */
40382 arg2 = CALL_EXPR_ARG (exp, 2);
40383
40384 lb = expand_normal (arg0);
40385 op1 = expand_normal (arg1);
40386 op2 = expand_normal (arg2);
40387
40388 /* Size was passed but we need to use (size - 1) as for bndmk. */
40389 op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
40390 NULL_RTX, 1, OPTAB_DIRECT);
40391
40392 /* Add LB to size and inverse to get UB. */
40393 op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
40394 op2, 1, OPTAB_DIRECT);
40395 ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
40396
40397 if (!register_operand (lb, Pmode))
40398 lb = ix86_zero_extend_to_Pmode (lb);
40399 if (!register_operand (ub, Pmode))
40400 ub = ix86_zero_extend_to_Pmode (ub);
40401
40402 /* We need to move bounds to memory before any computations. */
40403 if (MEM_P (op1))
40404 m1 = op1;
40405 else
40406 {
40407 m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
40408 emit_move_insn (m1, op1);
40409 }
40410
40411 /* Generate mem expression to be used for access to LB and UB. */
40412 m1h1 = adjust_address (m1, Pmode, 0);
40413 m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
40414
40415 t1 = gen_reg_rtx (Pmode);
40416
40417 /* Compute LB. */
40418 emit_move_insn (t1, m1h1);
40419 ix86_emit_move_max (t1, lb);
40420 emit_move_insn (m1h1, t1);
40421
40422 /* Compute UB. UB is stored in 1's complement form. Therefore
40423 we also use max here. */
40424 emit_move_insn (t1, m1h2);
40425 ix86_emit_move_max (t1, ub);
40426 emit_move_insn (m1h2, t1);
40427
40428 op2 = gen_reg_rtx (BNDmode);
40429 emit_move_insn (op2, m1);
40430
40431 return chkp_join_splitted_slot (lb, op2);
40432 }
40433
40434 case IX86_BUILTIN_BNDINT:
40435 {
40436 rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
40437
40438 if (!target
40439 || GET_MODE (target) != BNDmode
40440 || !register_operand (target, BNDmode))
40441 target = gen_reg_rtx (BNDmode);
40442
40443 arg0 = CALL_EXPR_ARG (exp, 0);
40444 arg1 = CALL_EXPR_ARG (exp, 1);
40445
40446 op0 = expand_normal (arg0);
40447 op1 = expand_normal (arg1);
40448
40449 res = assign_386_stack_local (BNDmode, SLOT_TEMP);
40450 rh1 = adjust_address (res, Pmode, 0);
40451 rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
40452
40453 /* Put first bounds to temporaries. */
40454 lb1 = gen_reg_rtx (Pmode);
40455 ub1 = gen_reg_rtx (Pmode);
40456 if (MEM_P (op0))
40457 {
40458 emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
40459 emit_move_insn (ub1, adjust_address (op0, Pmode,
40460 GET_MODE_SIZE (Pmode)));
40461 }
40462 else
40463 {
40464 emit_move_insn (res, op0);
40465 emit_move_insn (lb1, rh1);
40466 emit_move_insn (ub1, rh2);
40467 }
40468
40469 /* Put second bounds to temporaries. */
40470 lb2 = gen_reg_rtx (Pmode);
40471 ub2 = gen_reg_rtx (Pmode);
40472 if (MEM_P (op1))
40473 {
40474 emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
40475 emit_move_insn (ub2, adjust_address (op1, Pmode,
40476 GET_MODE_SIZE (Pmode)));
40477 }
40478 else
40479 {
40480 emit_move_insn (res, op1);
40481 emit_move_insn (lb2, rh1);
40482 emit_move_insn (ub2, rh2);
40483 }
40484
40485 /* Compute LB. */
40486 ix86_emit_move_max (lb1, lb2);
40487 emit_move_insn (rh1, lb1);
40488
40489 /* Compute UB. UB is stored in 1's complement form. Therefore
40490 we also use max here. */
40491 ix86_emit_move_max (ub1, ub2);
40492 emit_move_insn (rh2, ub1);
40493
40494 emit_move_insn (target, res);
40495
40496 return target;
40497 }
40498
40499 case IX86_BUILTIN_SIZEOF:
40500 {
40501 tree name;
40502 rtx symbol;
40503
40504 if (!target
40505 || GET_MODE (target) != Pmode
40506 || !register_operand (target, Pmode))
40507 target = gen_reg_rtx (Pmode);
40508
40509 arg0 = CALL_EXPR_ARG (exp, 0);
40510 gcc_assert (TREE_CODE (arg0) == VAR_DECL);
40511
40512 name = DECL_ASSEMBLER_NAME (arg0);
40513 symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
40514
40515 emit_insn (Pmode == SImode
40516 ? gen_move_size_reloc_si (target, symbol)
40517 : gen_move_size_reloc_di (target, symbol));
40518
40519 return target;
40520 }
40521
40522 case IX86_BUILTIN_BNDLOWER:
40523 {
40524 rtx mem, hmem;
40525
40526 if (!target
40527 || GET_MODE (target) != Pmode
40528 || !register_operand (target, Pmode))
40529 target = gen_reg_rtx (Pmode);
40530
40531 arg0 = CALL_EXPR_ARG (exp, 0);
40532 op0 = expand_normal (arg0);
40533
40534 /* We need to move bounds to memory first. */
40535 if (MEM_P (op0))
40536 mem = op0;
40537 else
40538 {
40539 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
40540 emit_move_insn (mem, op0);
40541 }
40542
40543 /* Generate mem expression to access LB and load it. */
40544 hmem = adjust_address (mem, Pmode, 0);
40545 emit_move_insn (target, hmem);
40546
40547 return target;
40548 }
40549
40550 case IX86_BUILTIN_BNDUPPER:
40551 {
40552 rtx mem, hmem, res;
40553
40554 if (!target
40555 || GET_MODE (target) != Pmode
40556 || !register_operand (target, Pmode))
40557 target = gen_reg_rtx (Pmode);
40558
40559 arg0 = CALL_EXPR_ARG (exp, 0);
40560 op0 = expand_normal (arg0);
40561
40562 /* We need to move bounds to memory first. */
40563 if (MEM_P (op0))
40564 mem = op0;
40565 else
40566 {
40567 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
40568 emit_move_insn (mem, op0);
40569 }
40570
40571 /* Generate mem expression to access UB. */
40572 hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
40573
40574 /* We need to inverse all bits of UB. */
40575 res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
40576
40577 if (res != target)
40578 emit_move_insn (target, res);
40579
40580 return target;
40581 }
40582
40583 case IX86_BUILTIN_MASKMOVQ:
40584 case IX86_BUILTIN_MASKMOVDQU:
40585 icode = (fcode == IX86_BUILTIN_MASKMOVQ
40586 ? CODE_FOR_mmx_maskmovq
40587 : CODE_FOR_sse2_maskmovdqu);
40588 /* Note the arg order is different from the operand order. */
40589 arg1 = CALL_EXPR_ARG (exp, 0);
40590 arg2 = CALL_EXPR_ARG (exp, 1);
40591 arg0 = CALL_EXPR_ARG (exp, 2);
40592 op0 = expand_normal (arg0);
40593 op1 = expand_normal (arg1);
40594 op2 = expand_normal (arg2);
40595 mode0 = insn_data[icode].operand[0].mode;
40596 mode1 = insn_data[icode].operand[1].mode;
40597 mode2 = insn_data[icode].operand[2].mode;
40598
40599 op0 = ix86_zero_extend_to_Pmode (op0);
40600 op0 = gen_rtx_MEM (mode1, op0);
40601
40602 if (!insn_data[icode].operand[0].predicate (op0, mode0))
40603 op0 = copy_to_mode_reg (mode0, op0);
40604 if (!insn_data[icode].operand[1].predicate (op1, mode1))
40605 op1 = copy_to_mode_reg (mode1, op1);
40606 if (!insn_data[icode].operand[2].predicate (op2, mode2))
40607 op2 = copy_to_mode_reg (mode2, op2);
40608 pat = GEN_FCN (icode) (op0, op1, op2);
40609 if (! pat)
40610 return 0;
40611 emit_insn (pat);
40612 return 0;
40613
40614 case IX86_BUILTIN_LDMXCSR:
40615 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
40616 target = assign_386_stack_local (SImode, SLOT_TEMP);
40617 emit_move_insn (target, op0);
40618 emit_insn (gen_sse_ldmxcsr (target));
40619 return 0;
40620
40621 case IX86_BUILTIN_STMXCSR:
40622 target = assign_386_stack_local (SImode, SLOT_TEMP);
40623 emit_insn (gen_sse_stmxcsr (target));
40624 return copy_to_mode_reg (SImode, target);
40625
40626 case IX86_BUILTIN_CLFLUSH:
40627 arg0 = CALL_EXPR_ARG (exp, 0);
40628 op0 = expand_normal (arg0);
40629 icode = CODE_FOR_sse2_clflush;
40630 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
40631 op0 = ix86_zero_extend_to_Pmode (op0);
40632
40633 emit_insn (gen_sse2_clflush (op0));
40634 return 0;
40635
40636 case IX86_BUILTIN_CLWB:
40637 arg0 = CALL_EXPR_ARG (exp, 0);
40638 op0 = expand_normal (arg0);
40639 icode = CODE_FOR_clwb;
40640 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
40641 op0 = ix86_zero_extend_to_Pmode (op0);
40642
40643 emit_insn (gen_clwb (op0));
40644 return 0;
40645
40646 case IX86_BUILTIN_CLFLUSHOPT:
40647 arg0 = CALL_EXPR_ARG (exp, 0);
40648 op0 = expand_normal (arg0);
40649 icode = CODE_FOR_clflushopt;
40650 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
40651 op0 = ix86_zero_extend_to_Pmode (op0);
40652
40653 emit_insn (gen_clflushopt (op0));
40654 return 0;
40655
40656 case IX86_BUILTIN_MONITOR:
40657 case IX86_BUILTIN_MONITORX:
40658 arg0 = CALL_EXPR_ARG (exp, 0);
40659 arg1 = CALL_EXPR_ARG (exp, 1);
40660 arg2 = CALL_EXPR_ARG (exp, 2);
40661 op0 = expand_normal (arg0);
40662 op1 = expand_normal (arg1);
40663 op2 = expand_normal (arg2);
40664 if (!REG_P (op0))
40665 op0 = ix86_zero_extend_to_Pmode (op0);
40666 if (!REG_P (op1))
40667 op1 = copy_to_mode_reg (SImode, op1);
40668 if (!REG_P (op2))
40669 op2 = copy_to_mode_reg (SImode, op2);
40670
40671 emit_insn (fcode == IX86_BUILTIN_MONITOR
40672 ? ix86_gen_monitor (op0, op1, op2)
40673 : ix86_gen_monitorx (op0, op1, op2));
40674 return 0;
40675
40676 case IX86_BUILTIN_MWAIT:
40677 arg0 = CALL_EXPR_ARG (exp, 0);
40678 arg1 = CALL_EXPR_ARG (exp, 1);
40679 op0 = expand_normal (arg0);
40680 op1 = expand_normal (arg1);
40681 if (!REG_P (op0))
40682 op0 = copy_to_mode_reg (SImode, op0);
40683 if (!REG_P (op1))
40684 op1 = copy_to_mode_reg (SImode, op1);
40685 emit_insn (gen_sse3_mwait (op0, op1));
40686 return 0;
40687
40688 case IX86_BUILTIN_MWAITX:
40689 arg0 = CALL_EXPR_ARG (exp, 0);
40690 arg1 = CALL_EXPR_ARG (exp, 1);
40691 arg2 = CALL_EXPR_ARG (exp, 2);
40692 op0 = expand_normal (arg0);
40693 op1 = expand_normal (arg1);
40694 op2 = expand_normal (arg2);
40695 if (!REG_P (op0))
40696 op0 = copy_to_mode_reg (SImode, op0);
40697 if (!REG_P (op1))
40698 op1 = copy_to_mode_reg (SImode, op1);
40699 if (!REG_P (op2))
40700 op2 = copy_to_mode_reg (SImode, op2);
40701 emit_insn (gen_mwaitx (op0, op1, op2));
40702 return 0;
40703
40704 case IX86_BUILTIN_CLZERO:
40705 arg0 = CALL_EXPR_ARG (exp, 0);
40706 op0 = expand_normal (arg0);
40707 if (!REG_P (op0))
40708 op0 = ix86_zero_extend_to_Pmode (op0);
40709 emit_insn (ix86_gen_clzero (op0));
40710 return 0;
40711
40712 case IX86_BUILTIN_VEC_INIT_V2SI:
40713 case IX86_BUILTIN_VEC_INIT_V4HI:
40714 case IX86_BUILTIN_VEC_INIT_V8QI:
40715 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
40716
40717 case IX86_BUILTIN_VEC_EXT_V2DF:
40718 case IX86_BUILTIN_VEC_EXT_V2DI:
40719 case IX86_BUILTIN_VEC_EXT_V4SF:
40720 case IX86_BUILTIN_VEC_EXT_V4SI:
40721 case IX86_BUILTIN_VEC_EXT_V8HI:
40722 case IX86_BUILTIN_VEC_EXT_V2SI:
40723 case IX86_BUILTIN_VEC_EXT_V4HI:
40724 case IX86_BUILTIN_VEC_EXT_V16QI:
40725 return ix86_expand_vec_ext_builtin (exp, target);
40726
40727 case IX86_BUILTIN_VEC_SET_V2DI:
40728 case IX86_BUILTIN_VEC_SET_V4SF:
40729 case IX86_BUILTIN_VEC_SET_V4SI:
40730 case IX86_BUILTIN_VEC_SET_V8HI:
40731 case IX86_BUILTIN_VEC_SET_V4HI:
40732 case IX86_BUILTIN_VEC_SET_V16QI:
40733 return ix86_expand_vec_set_builtin (exp);
40734
40735 case IX86_BUILTIN_INFQ:
40736 case IX86_BUILTIN_HUGE_VALQ:
40737 {
40738 REAL_VALUE_TYPE inf;
40739 rtx tmp;
40740
40741 real_inf (&inf);
40742 tmp = const_double_from_real_value (inf, mode);
40743
40744 tmp = validize_mem (force_const_mem (mode, tmp));
40745
40746 if (target == 0)
40747 target = gen_reg_rtx (mode);
40748
40749 emit_move_insn (target, tmp);
40750 return target;
40751 }
40752
40753 case IX86_BUILTIN_RDPMC:
40754 case IX86_BUILTIN_RDTSC:
40755 case IX86_BUILTIN_RDTSCP:
40756
40757 op0 = gen_reg_rtx (DImode);
40758 op1 = gen_reg_rtx (DImode);
40759
40760 if (fcode == IX86_BUILTIN_RDPMC)
40761 {
40762 arg0 = CALL_EXPR_ARG (exp, 0);
40763 op2 = expand_normal (arg0);
40764 if (!register_operand (op2, SImode))
40765 op2 = copy_to_mode_reg (SImode, op2);
40766
40767 insn = (TARGET_64BIT
40768 ? gen_rdpmc_rex64 (op0, op1, op2)
40769 : gen_rdpmc (op0, op2));
40770 emit_insn (insn);
40771 }
40772 else if (fcode == IX86_BUILTIN_RDTSC)
40773 {
40774 insn = (TARGET_64BIT
40775 ? gen_rdtsc_rex64 (op0, op1)
40776 : gen_rdtsc (op0));
40777 emit_insn (insn);
40778 }
40779 else
40780 {
40781 op2 = gen_reg_rtx (SImode);
40782
40783 insn = (TARGET_64BIT
40784 ? gen_rdtscp_rex64 (op0, op1, op2)
40785 : gen_rdtscp (op0, op2));
40786 emit_insn (insn);
40787
40788 arg0 = CALL_EXPR_ARG (exp, 0);
40789 op4 = expand_normal (arg0);
40790 if (!address_operand (op4, VOIDmode))
40791 {
40792 op4 = convert_memory_address (Pmode, op4);
40793 op4 = copy_addr_to_reg (op4);
40794 }
40795 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
40796 }
40797
40798 if (target == 0)
40799 {
40800 /* mode is VOIDmode if __builtin_rd* has been called
40801 without lhs. */
40802 if (mode == VOIDmode)
40803 return target;
40804 target = gen_reg_rtx (mode);
40805 }
40806
40807 if (TARGET_64BIT)
40808 {
40809 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
40810 op1, 1, OPTAB_DIRECT);
40811 op0 = expand_simple_binop (DImode, IOR, op0, op1,
40812 op0, 1, OPTAB_DIRECT);
40813 }
40814
40815 emit_move_insn (target, op0);
40816 return target;
40817
40818 case IX86_BUILTIN_FXSAVE:
40819 case IX86_BUILTIN_FXRSTOR:
40820 case IX86_BUILTIN_FXSAVE64:
40821 case IX86_BUILTIN_FXRSTOR64:
40822 case IX86_BUILTIN_FNSTENV:
40823 case IX86_BUILTIN_FLDENV:
40824 mode0 = BLKmode;
40825 switch (fcode)
40826 {
40827 case IX86_BUILTIN_FXSAVE:
40828 icode = CODE_FOR_fxsave;
40829 break;
40830 case IX86_BUILTIN_FXRSTOR:
40831 icode = CODE_FOR_fxrstor;
40832 break;
40833 case IX86_BUILTIN_FXSAVE64:
40834 icode = CODE_FOR_fxsave64;
40835 break;
40836 case IX86_BUILTIN_FXRSTOR64:
40837 icode = CODE_FOR_fxrstor64;
40838 break;
40839 case IX86_BUILTIN_FNSTENV:
40840 icode = CODE_FOR_fnstenv;
40841 break;
40842 case IX86_BUILTIN_FLDENV:
40843 icode = CODE_FOR_fldenv;
40844 break;
40845 default:
40846 gcc_unreachable ();
40847 }
40848
40849 arg0 = CALL_EXPR_ARG (exp, 0);
40850 op0 = expand_normal (arg0);
40851
40852 if (!address_operand (op0, VOIDmode))
40853 {
40854 op0 = convert_memory_address (Pmode, op0);
40855 op0 = copy_addr_to_reg (op0);
40856 }
40857 op0 = gen_rtx_MEM (mode0, op0);
40858
40859 pat = GEN_FCN (icode) (op0);
40860 if (pat)
40861 emit_insn (pat);
40862 return 0;
40863
40864 case IX86_BUILTIN_XSAVE:
40865 case IX86_BUILTIN_XRSTOR:
40866 case IX86_BUILTIN_XSAVE64:
40867 case IX86_BUILTIN_XRSTOR64:
40868 case IX86_BUILTIN_XSAVEOPT:
40869 case IX86_BUILTIN_XSAVEOPT64:
40870 case IX86_BUILTIN_XSAVES:
40871 case IX86_BUILTIN_XRSTORS:
40872 case IX86_BUILTIN_XSAVES64:
40873 case IX86_BUILTIN_XRSTORS64:
40874 case IX86_BUILTIN_XSAVEC:
40875 case IX86_BUILTIN_XSAVEC64:
40876 arg0 = CALL_EXPR_ARG (exp, 0);
40877 arg1 = CALL_EXPR_ARG (exp, 1);
40878 op0 = expand_normal (arg0);
40879 op1 = expand_normal (arg1);
40880
40881 if (!address_operand (op0, VOIDmode))
40882 {
40883 op0 = convert_memory_address (Pmode, op0);
40884 op0 = copy_addr_to_reg (op0);
40885 }
40886 op0 = gen_rtx_MEM (BLKmode, op0);
40887
40888 op1 = force_reg (DImode, op1);
40889
40890 if (TARGET_64BIT)
40891 {
40892 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
40893 NULL, 1, OPTAB_DIRECT);
40894 switch (fcode)
40895 {
40896 case IX86_BUILTIN_XSAVE:
40897 icode = CODE_FOR_xsave_rex64;
40898 break;
40899 case IX86_BUILTIN_XRSTOR:
40900 icode = CODE_FOR_xrstor_rex64;
40901 break;
40902 case IX86_BUILTIN_XSAVE64:
40903 icode = CODE_FOR_xsave64;
40904 break;
40905 case IX86_BUILTIN_XRSTOR64:
40906 icode = CODE_FOR_xrstor64;
40907 break;
40908 case IX86_BUILTIN_XSAVEOPT:
40909 icode = CODE_FOR_xsaveopt_rex64;
40910 break;
40911 case IX86_BUILTIN_XSAVEOPT64:
40912 icode = CODE_FOR_xsaveopt64;
40913 break;
40914 case IX86_BUILTIN_XSAVES:
40915 icode = CODE_FOR_xsaves_rex64;
40916 break;
40917 case IX86_BUILTIN_XRSTORS:
40918 icode = CODE_FOR_xrstors_rex64;
40919 break;
40920 case IX86_BUILTIN_XSAVES64:
40921 icode = CODE_FOR_xsaves64;
40922 break;
40923 case IX86_BUILTIN_XRSTORS64:
40924 icode = CODE_FOR_xrstors64;
40925 break;
40926 case IX86_BUILTIN_XSAVEC:
40927 icode = CODE_FOR_xsavec_rex64;
40928 break;
40929 case IX86_BUILTIN_XSAVEC64:
40930 icode = CODE_FOR_xsavec64;
40931 break;
40932 default:
40933 gcc_unreachable ();
40934 }
40935
40936 op2 = gen_lowpart (SImode, op2);
40937 op1 = gen_lowpart (SImode, op1);
40938 pat = GEN_FCN (icode) (op0, op1, op2);
40939 }
40940 else
40941 {
40942 switch (fcode)
40943 {
40944 case IX86_BUILTIN_XSAVE:
40945 icode = CODE_FOR_xsave;
40946 break;
40947 case IX86_BUILTIN_XRSTOR:
40948 icode = CODE_FOR_xrstor;
40949 break;
40950 case IX86_BUILTIN_XSAVEOPT:
40951 icode = CODE_FOR_xsaveopt;
40952 break;
40953 case IX86_BUILTIN_XSAVES:
40954 icode = CODE_FOR_xsaves;
40955 break;
40956 case IX86_BUILTIN_XRSTORS:
40957 icode = CODE_FOR_xrstors;
40958 break;
40959 case IX86_BUILTIN_XSAVEC:
40960 icode = CODE_FOR_xsavec;
40961 break;
40962 default:
40963 gcc_unreachable ();
40964 }
40965 pat = GEN_FCN (icode) (op0, op1);
40966 }
40967
40968 if (pat)
40969 emit_insn (pat);
40970 return 0;
40971
40972 case IX86_BUILTIN_LLWPCB:
40973 arg0 = CALL_EXPR_ARG (exp, 0);
40974 op0 = expand_normal (arg0);
40975 icode = CODE_FOR_lwp_llwpcb;
40976 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
40977 op0 = ix86_zero_extend_to_Pmode (op0);
40978 emit_insn (gen_lwp_llwpcb (op0));
40979 return 0;
40980
40981 case IX86_BUILTIN_SLWPCB:
40982 icode = CODE_FOR_lwp_slwpcb;
40983 if (!target
40984 || !insn_data[icode].operand[0].predicate (target, Pmode))
40985 target = gen_reg_rtx (Pmode);
40986 emit_insn (gen_lwp_slwpcb (target));
40987 return target;
40988
40989 case IX86_BUILTIN_BEXTRI32:
40990 case IX86_BUILTIN_BEXTRI64:
40991 arg0 = CALL_EXPR_ARG (exp, 0);
40992 arg1 = CALL_EXPR_ARG (exp, 1);
40993 op0 = expand_normal (arg0);
40994 op1 = expand_normal (arg1);
40995 icode = (fcode == IX86_BUILTIN_BEXTRI32
40996 ? CODE_FOR_tbm_bextri_si
40997 : CODE_FOR_tbm_bextri_di);
40998 if (!CONST_INT_P (op1))
40999 {
41000 error ("last argument must be an immediate");
41001 return const0_rtx;
41002 }
41003 else
41004 {
41005 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
41006 unsigned char lsb_index = INTVAL (op1) & 0xFF;
41007 op1 = GEN_INT (length);
41008 op2 = GEN_INT (lsb_index);
41009 pat = GEN_FCN (icode) (target, op0, op1, op2);
41010 if (pat)
41011 emit_insn (pat);
41012 return target;
41013 }
41014
41015 case IX86_BUILTIN_RDRAND16_STEP:
41016 icode = CODE_FOR_rdrandhi_1;
41017 mode0 = HImode;
41018 goto rdrand_step;
41019
41020 case IX86_BUILTIN_RDRAND32_STEP:
41021 icode = CODE_FOR_rdrandsi_1;
41022 mode0 = SImode;
41023 goto rdrand_step;
41024
41025 case IX86_BUILTIN_RDRAND64_STEP:
41026 icode = CODE_FOR_rdranddi_1;
41027 mode0 = DImode;
41028
41029 rdrand_step:
41030 op0 = gen_reg_rtx (mode0);
41031 emit_insn (GEN_FCN (icode) (op0));
41032
41033 arg0 = CALL_EXPR_ARG (exp, 0);
41034 op1 = expand_normal (arg0);
41035 if (!address_operand (op1, VOIDmode))
41036 {
41037 op1 = convert_memory_address (Pmode, op1);
41038 op1 = copy_addr_to_reg (op1);
41039 }
41040 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
41041
41042 op1 = gen_reg_rtx (SImode);
41043 emit_move_insn (op1, CONST1_RTX (SImode));
41044
41045 /* Emit SImode conditional move. */
41046 if (mode0 == HImode)
41047 {
41048 op2 = gen_reg_rtx (SImode);
41049 emit_insn (gen_zero_extendhisi2 (op2, op0));
41050 }
41051 else if (mode0 == SImode)
41052 op2 = op0;
41053 else
41054 op2 = gen_rtx_SUBREG (SImode, op0, 0);
41055
41056 if (target == 0
41057 || !register_operand (target, SImode))
41058 target = gen_reg_rtx (SImode);
41059
41060 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
41061 const0_rtx);
41062 emit_insn (gen_rtx_SET (target,
41063 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
41064 return target;
41065
41066 case IX86_BUILTIN_RDSEED16_STEP:
41067 icode = CODE_FOR_rdseedhi_1;
41068 mode0 = HImode;
41069 goto rdseed_step;
41070
41071 case IX86_BUILTIN_RDSEED32_STEP:
41072 icode = CODE_FOR_rdseedsi_1;
41073 mode0 = SImode;
41074 goto rdseed_step;
41075
41076 case IX86_BUILTIN_RDSEED64_STEP:
41077 icode = CODE_FOR_rdseeddi_1;
41078 mode0 = DImode;
41079
41080 rdseed_step:
41081 op0 = gen_reg_rtx (mode0);
41082 emit_insn (GEN_FCN (icode) (op0));
41083
41084 arg0 = CALL_EXPR_ARG (exp, 0);
41085 op1 = expand_normal (arg0);
41086 if (!address_operand (op1, VOIDmode))
41087 {
41088 op1 = convert_memory_address (Pmode, op1);
41089 op1 = copy_addr_to_reg (op1);
41090 }
41091 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
41092
41093 op2 = gen_reg_rtx (QImode);
41094
41095 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
41096 const0_rtx);
41097 emit_insn (gen_rtx_SET (op2, pat));
41098
41099 if (target == 0
41100 || !register_operand (target, SImode))
41101 target = gen_reg_rtx (SImode);
41102
41103 emit_insn (gen_zero_extendqisi2 (target, op2));
41104 return target;
41105
41106 case IX86_BUILTIN_SBB32:
41107 icode = CODE_FOR_subborrowsi;
41108 mode0 = SImode;
41109 goto handlecarry;
41110
41111 case IX86_BUILTIN_SBB64:
41112 icode = CODE_FOR_subborrowdi;
41113 mode0 = DImode;
41114 goto handlecarry;
41115
41116 case IX86_BUILTIN_ADDCARRYX32:
41117 icode = CODE_FOR_addcarrysi;
41118 mode0 = SImode;
41119 goto handlecarry;
41120
41121 case IX86_BUILTIN_ADDCARRYX64:
41122 icode = CODE_FOR_addcarrydi;
41123 mode0 = DImode;
41124
41125 handlecarry:
41126 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
41127 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
41128 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
41129 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
41130
41131 op1 = expand_normal (arg0);
41132 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
41133
41134 op2 = expand_normal (arg1);
41135 if (!register_operand (op2, mode0))
41136 op2 = copy_to_mode_reg (mode0, op2);
41137
41138 op3 = expand_normal (arg2);
41139 if (!register_operand (op3, mode0))
41140 op3 = copy_to_mode_reg (mode0, op3);
41141
41142 op4 = expand_normal (arg3);
41143 if (!address_operand (op4, VOIDmode))
41144 {
41145 op4 = convert_memory_address (Pmode, op4);
41146 op4 = copy_addr_to_reg (op4);
41147 }
41148
41149 /* Generate CF from input operand. */
41150 emit_insn (gen_addqi3_cconly_overflow (op1, constm1_rtx));
41151
41152 /* Generate instruction that consumes CF. */
41153 op0 = gen_reg_rtx (mode0);
41154
41155 op1 = gen_rtx_REG (CCCmode, FLAGS_REG);
41156 pat = gen_rtx_LTU (mode0, op1, const0_rtx);
41157 emit_insn (GEN_FCN (icode) (op0, op2, op3, op1, pat));
41158
41159 /* Return current CF value. */
41160 if (target == 0)
41161 target = gen_reg_rtx (QImode);
41162
41163 PUT_MODE (pat, QImode);
41164 emit_insn (gen_rtx_SET (target, pat));
41165
41166 /* Store the result. */
41167 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
41168
41169 return target;
41170
41171 case IX86_BUILTIN_READ_FLAGS:
41172 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
41173
41174 if (optimize
41175 || target == NULL_RTX
41176 || !nonimmediate_operand (target, word_mode)
41177 || GET_MODE (target) != word_mode)
41178 target = gen_reg_rtx (word_mode);
41179
41180 emit_insn (gen_pop (target));
41181 return target;
41182
41183 case IX86_BUILTIN_WRITE_FLAGS:
41184
41185 arg0 = CALL_EXPR_ARG (exp, 0);
41186 op0 = expand_normal (arg0);
41187 if (!general_no_elim_operand (op0, word_mode))
41188 op0 = copy_to_mode_reg (word_mode, op0);
41189
41190 emit_insn (gen_push (op0));
41191 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
41192 return 0;
41193
41194 case IX86_BUILTIN_KORTESTC16:
41195 icode = CODE_FOR_kortestchi;
41196 mode0 = HImode;
41197 mode1 = CCCmode;
41198 goto kortest;
41199
41200 case IX86_BUILTIN_KORTESTZ16:
41201 icode = CODE_FOR_kortestzhi;
41202 mode0 = HImode;
41203 mode1 = CCZmode;
41204
41205 kortest:
41206 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
41207 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
41208 op0 = expand_normal (arg0);
41209 op1 = expand_normal (arg1);
41210
41211 op0 = copy_to_reg (op0);
41212 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
41213 op1 = copy_to_reg (op1);
41214 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
41215
41216 target = gen_reg_rtx (QImode);
41217 emit_insn (gen_rtx_SET (target, const0_rtx));
41218
41219 /* Emit kortest. */
41220 emit_insn (GEN_FCN (icode) (op0, op1));
41221 /* And use setcc to return result from flags. */
41222 ix86_expand_setcc (target, EQ,
41223 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
41224 return target;
41225
41226 case IX86_BUILTIN_GATHERSIV2DF:
41227 icode = CODE_FOR_avx2_gathersiv2df;
41228 goto gather_gen;
41229 case IX86_BUILTIN_GATHERSIV4DF:
41230 icode = CODE_FOR_avx2_gathersiv4df;
41231 goto gather_gen;
41232 case IX86_BUILTIN_GATHERDIV2DF:
41233 icode = CODE_FOR_avx2_gatherdiv2df;
41234 goto gather_gen;
41235 case IX86_BUILTIN_GATHERDIV4DF:
41236 icode = CODE_FOR_avx2_gatherdiv4df;
41237 goto gather_gen;
41238 case IX86_BUILTIN_GATHERSIV4SF:
41239 icode = CODE_FOR_avx2_gathersiv4sf;
41240 goto gather_gen;
41241 case IX86_BUILTIN_GATHERSIV8SF:
41242 icode = CODE_FOR_avx2_gathersiv8sf;
41243 goto gather_gen;
41244 case IX86_BUILTIN_GATHERDIV4SF:
41245 icode = CODE_FOR_avx2_gatherdiv4sf;
41246 goto gather_gen;
41247 case IX86_BUILTIN_GATHERDIV8SF:
41248 icode = CODE_FOR_avx2_gatherdiv8sf;
41249 goto gather_gen;
41250 case IX86_BUILTIN_GATHERSIV2DI:
41251 icode = CODE_FOR_avx2_gathersiv2di;
41252 goto gather_gen;
41253 case IX86_BUILTIN_GATHERSIV4DI:
41254 icode = CODE_FOR_avx2_gathersiv4di;
41255 goto gather_gen;
41256 case IX86_BUILTIN_GATHERDIV2DI:
41257 icode = CODE_FOR_avx2_gatherdiv2di;
41258 goto gather_gen;
41259 case IX86_BUILTIN_GATHERDIV4DI:
41260 icode = CODE_FOR_avx2_gatherdiv4di;
41261 goto gather_gen;
41262 case IX86_BUILTIN_GATHERSIV4SI:
41263 icode = CODE_FOR_avx2_gathersiv4si;
41264 goto gather_gen;
41265 case IX86_BUILTIN_GATHERSIV8SI:
41266 icode = CODE_FOR_avx2_gathersiv8si;
41267 goto gather_gen;
41268 case IX86_BUILTIN_GATHERDIV4SI:
41269 icode = CODE_FOR_avx2_gatherdiv4si;
41270 goto gather_gen;
41271 case IX86_BUILTIN_GATHERDIV8SI:
41272 icode = CODE_FOR_avx2_gatherdiv8si;
41273 goto gather_gen;
41274 case IX86_BUILTIN_GATHERALTSIV4DF:
41275 icode = CODE_FOR_avx2_gathersiv4df;
41276 goto gather_gen;
41277 case IX86_BUILTIN_GATHERALTDIV8SF:
41278 icode = CODE_FOR_avx2_gatherdiv8sf;
41279 goto gather_gen;
41280 case IX86_BUILTIN_GATHERALTSIV4DI:
41281 icode = CODE_FOR_avx2_gathersiv4di;
41282 goto gather_gen;
41283 case IX86_BUILTIN_GATHERALTDIV8SI:
41284 icode = CODE_FOR_avx2_gatherdiv8si;
41285 goto gather_gen;
41286 case IX86_BUILTIN_GATHER3SIV16SF:
41287 icode = CODE_FOR_avx512f_gathersiv16sf;
41288 goto gather_gen;
41289 case IX86_BUILTIN_GATHER3SIV8DF:
41290 icode = CODE_FOR_avx512f_gathersiv8df;
41291 goto gather_gen;
41292 case IX86_BUILTIN_GATHER3DIV16SF:
41293 icode = CODE_FOR_avx512f_gatherdiv16sf;
41294 goto gather_gen;
41295 case IX86_BUILTIN_GATHER3DIV8DF:
41296 icode = CODE_FOR_avx512f_gatherdiv8df;
41297 goto gather_gen;
41298 case IX86_BUILTIN_GATHER3SIV16SI:
41299 icode = CODE_FOR_avx512f_gathersiv16si;
41300 goto gather_gen;
41301 case IX86_BUILTIN_GATHER3SIV8DI:
41302 icode = CODE_FOR_avx512f_gathersiv8di;
41303 goto gather_gen;
41304 case IX86_BUILTIN_GATHER3DIV16SI:
41305 icode = CODE_FOR_avx512f_gatherdiv16si;
41306 goto gather_gen;
41307 case IX86_BUILTIN_GATHER3DIV8DI:
41308 icode = CODE_FOR_avx512f_gatherdiv8di;
41309 goto gather_gen;
41310 case IX86_BUILTIN_GATHER3ALTSIV8DF:
41311 icode = CODE_FOR_avx512f_gathersiv8df;
41312 goto gather_gen;
41313 case IX86_BUILTIN_GATHER3ALTDIV16SF:
41314 icode = CODE_FOR_avx512f_gatherdiv16sf;
41315 goto gather_gen;
41316 case IX86_BUILTIN_GATHER3ALTSIV8DI:
41317 icode = CODE_FOR_avx512f_gathersiv8di;
41318 goto gather_gen;
41319 case IX86_BUILTIN_GATHER3ALTDIV16SI:
41320 icode = CODE_FOR_avx512f_gatherdiv16si;
41321 goto gather_gen;
41322 case IX86_BUILTIN_GATHER3SIV2DF:
41323 icode = CODE_FOR_avx512vl_gathersiv2df;
41324 goto gather_gen;
41325 case IX86_BUILTIN_GATHER3SIV4DF:
41326 icode = CODE_FOR_avx512vl_gathersiv4df;
41327 goto gather_gen;
41328 case IX86_BUILTIN_GATHER3DIV2DF:
41329 icode = CODE_FOR_avx512vl_gatherdiv2df;
41330 goto gather_gen;
41331 case IX86_BUILTIN_GATHER3DIV4DF:
41332 icode = CODE_FOR_avx512vl_gatherdiv4df;
41333 goto gather_gen;
41334 case IX86_BUILTIN_GATHER3SIV4SF:
41335 icode = CODE_FOR_avx512vl_gathersiv4sf;
41336 goto gather_gen;
41337 case IX86_BUILTIN_GATHER3SIV8SF:
41338 icode = CODE_FOR_avx512vl_gathersiv8sf;
41339 goto gather_gen;
41340 case IX86_BUILTIN_GATHER3DIV4SF:
41341 icode = CODE_FOR_avx512vl_gatherdiv4sf;
41342 goto gather_gen;
41343 case IX86_BUILTIN_GATHER3DIV8SF:
41344 icode = CODE_FOR_avx512vl_gatherdiv8sf;
41345 goto gather_gen;
41346 case IX86_BUILTIN_GATHER3SIV2DI:
41347 icode = CODE_FOR_avx512vl_gathersiv2di;
41348 goto gather_gen;
41349 case IX86_BUILTIN_GATHER3SIV4DI:
41350 icode = CODE_FOR_avx512vl_gathersiv4di;
41351 goto gather_gen;
41352 case IX86_BUILTIN_GATHER3DIV2DI:
41353 icode = CODE_FOR_avx512vl_gatherdiv2di;
41354 goto gather_gen;
41355 case IX86_BUILTIN_GATHER3DIV4DI:
41356 icode = CODE_FOR_avx512vl_gatherdiv4di;
41357 goto gather_gen;
41358 case IX86_BUILTIN_GATHER3SIV4SI:
41359 icode = CODE_FOR_avx512vl_gathersiv4si;
41360 goto gather_gen;
41361 case IX86_BUILTIN_GATHER3SIV8SI:
41362 icode = CODE_FOR_avx512vl_gathersiv8si;
41363 goto gather_gen;
41364 case IX86_BUILTIN_GATHER3DIV4SI:
41365 icode = CODE_FOR_avx512vl_gatherdiv4si;
41366 goto gather_gen;
41367 case IX86_BUILTIN_GATHER3DIV8SI:
41368 icode = CODE_FOR_avx512vl_gatherdiv8si;
41369 goto gather_gen;
41370 case IX86_BUILTIN_GATHER3ALTSIV4DF:
41371 icode = CODE_FOR_avx512vl_gathersiv4df;
41372 goto gather_gen;
41373 case IX86_BUILTIN_GATHER3ALTDIV8SF:
41374 icode = CODE_FOR_avx512vl_gatherdiv8sf;
41375 goto gather_gen;
41376 case IX86_BUILTIN_GATHER3ALTSIV4DI:
41377 icode = CODE_FOR_avx512vl_gathersiv4di;
41378 goto gather_gen;
41379 case IX86_BUILTIN_GATHER3ALTDIV8SI:
41380 icode = CODE_FOR_avx512vl_gatherdiv8si;
41381 goto gather_gen;
41382 case IX86_BUILTIN_SCATTERSIV16SF:
41383 icode = CODE_FOR_avx512f_scattersiv16sf;
41384 goto scatter_gen;
41385 case IX86_BUILTIN_SCATTERSIV8DF:
41386 icode = CODE_FOR_avx512f_scattersiv8df;
41387 goto scatter_gen;
41388 case IX86_BUILTIN_SCATTERDIV16SF:
41389 icode = CODE_FOR_avx512f_scatterdiv16sf;
41390 goto scatter_gen;
41391 case IX86_BUILTIN_SCATTERDIV8DF:
41392 icode = CODE_FOR_avx512f_scatterdiv8df;
41393 goto scatter_gen;
41394 case IX86_BUILTIN_SCATTERSIV16SI:
41395 icode = CODE_FOR_avx512f_scattersiv16si;
41396 goto scatter_gen;
41397 case IX86_BUILTIN_SCATTERSIV8DI:
41398 icode = CODE_FOR_avx512f_scattersiv8di;
41399 goto scatter_gen;
41400 case IX86_BUILTIN_SCATTERDIV16SI:
41401 icode = CODE_FOR_avx512f_scatterdiv16si;
41402 goto scatter_gen;
41403 case IX86_BUILTIN_SCATTERDIV8DI:
41404 icode = CODE_FOR_avx512f_scatterdiv8di;
41405 goto scatter_gen;
41406 case IX86_BUILTIN_SCATTERSIV8SF:
41407 icode = CODE_FOR_avx512vl_scattersiv8sf;
41408 goto scatter_gen;
41409 case IX86_BUILTIN_SCATTERSIV4SF:
41410 icode = CODE_FOR_avx512vl_scattersiv4sf;
41411 goto scatter_gen;
41412 case IX86_BUILTIN_SCATTERSIV4DF:
41413 icode = CODE_FOR_avx512vl_scattersiv4df;
41414 goto scatter_gen;
41415 case IX86_BUILTIN_SCATTERSIV2DF:
41416 icode = CODE_FOR_avx512vl_scattersiv2df;
41417 goto scatter_gen;
41418 case IX86_BUILTIN_SCATTERDIV8SF:
41419 icode = CODE_FOR_avx512vl_scatterdiv8sf;
41420 goto scatter_gen;
41421 case IX86_BUILTIN_SCATTERDIV4SF:
41422 icode = CODE_FOR_avx512vl_scatterdiv4sf;
41423 goto scatter_gen;
41424 case IX86_BUILTIN_SCATTERDIV4DF:
41425 icode = CODE_FOR_avx512vl_scatterdiv4df;
41426 goto scatter_gen;
41427 case IX86_BUILTIN_SCATTERDIV2DF:
41428 icode = CODE_FOR_avx512vl_scatterdiv2df;
41429 goto scatter_gen;
41430 case IX86_BUILTIN_SCATTERSIV8SI:
41431 icode = CODE_FOR_avx512vl_scattersiv8si;
41432 goto scatter_gen;
41433 case IX86_BUILTIN_SCATTERSIV4SI:
41434 icode = CODE_FOR_avx512vl_scattersiv4si;
41435 goto scatter_gen;
41436 case IX86_BUILTIN_SCATTERSIV4DI:
41437 icode = CODE_FOR_avx512vl_scattersiv4di;
41438 goto scatter_gen;
41439 case IX86_BUILTIN_SCATTERSIV2DI:
41440 icode = CODE_FOR_avx512vl_scattersiv2di;
41441 goto scatter_gen;
41442 case IX86_BUILTIN_SCATTERDIV8SI:
41443 icode = CODE_FOR_avx512vl_scatterdiv8si;
41444 goto scatter_gen;
41445 case IX86_BUILTIN_SCATTERDIV4SI:
41446 icode = CODE_FOR_avx512vl_scatterdiv4si;
41447 goto scatter_gen;
41448 case IX86_BUILTIN_SCATTERDIV4DI:
41449 icode = CODE_FOR_avx512vl_scatterdiv4di;
41450 goto scatter_gen;
41451 case IX86_BUILTIN_SCATTERDIV2DI:
41452 icode = CODE_FOR_avx512vl_scatterdiv2di;
41453 goto scatter_gen;
41454 case IX86_BUILTIN_GATHERPFDPD:
41455 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
41456 goto vec_prefetch_gen;
41457 case IX86_BUILTIN_SCATTERALTSIV8DF:
41458 icode = CODE_FOR_avx512f_scattersiv8df;
41459 goto scatter_gen;
41460 case IX86_BUILTIN_SCATTERALTDIV16SF:
41461 icode = CODE_FOR_avx512f_scatterdiv16sf;
41462 goto scatter_gen;
41463 case IX86_BUILTIN_SCATTERALTSIV8DI:
41464 icode = CODE_FOR_avx512f_scattersiv8di;
41465 goto scatter_gen;
41466 case IX86_BUILTIN_SCATTERALTDIV16SI:
41467 icode = CODE_FOR_avx512f_scatterdiv16si;
41468 goto scatter_gen;
41469 case IX86_BUILTIN_GATHERPFDPS:
41470 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
41471 goto vec_prefetch_gen;
41472 case IX86_BUILTIN_GATHERPFQPD:
41473 icode = CODE_FOR_avx512pf_gatherpfv8didf;
41474 goto vec_prefetch_gen;
41475 case IX86_BUILTIN_GATHERPFQPS:
41476 icode = CODE_FOR_avx512pf_gatherpfv8disf;
41477 goto vec_prefetch_gen;
41478 case IX86_BUILTIN_SCATTERPFDPD:
41479 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
41480 goto vec_prefetch_gen;
41481 case IX86_BUILTIN_SCATTERPFDPS:
41482 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
41483 goto vec_prefetch_gen;
41484 case IX86_BUILTIN_SCATTERPFQPD:
41485 icode = CODE_FOR_avx512pf_scatterpfv8didf;
41486 goto vec_prefetch_gen;
41487 case IX86_BUILTIN_SCATTERPFQPS:
41488 icode = CODE_FOR_avx512pf_scatterpfv8disf;
41489 goto vec_prefetch_gen;
41490
41491 gather_gen:
41492 rtx half;
41493 rtx (*gen) (rtx, rtx);
41494
41495 arg0 = CALL_EXPR_ARG (exp, 0);
41496 arg1 = CALL_EXPR_ARG (exp, 1);
41497 arg2 = CALL_EXPR_ARG (exp, 2);
41498 arg3 = CALL_EXPR_ARG (exp, 3);
41499 arg4 = CALL_EXPR_ARG (exp, 4);
41500 op0 = expand_normal (arg0);
41501 op1 = expand_normal (arg1);
41502 op2 = expand_normal (arg2);
41503 op3 = expand_normal (arg3);
41504 op4 = expand_normal (arg4);
41505 /* Note the arg order is different from the operand order. */
41506 mode0 = insn_data[icode].operand[1].mode;
41507 mode2 = insn_data[icode].operand[3].mode;
41508 mode3 = insn_data[icode].operand[4].mode;
41509 mode4 = insn_data[icode].operand[5].mode;
41510
41511 if (target == NULL_RTX
41512 || GET_MODE (target) != insn_data[icode].operand[0].mode
41513 || !insn_data[icode].operand[0].predicate (target,
41514 GET_MODE (target)))
41515 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
41516 else
41517 subtarget = target;
41518
41519 switch (fcode)
41520 {
41521 case IX86_BUILTIN_GATHER3ALTSIV8DF:
41522 case IX86_BUILTIN_GATHER3ALTSIV8DI:
41523 half = gen_reg_rtx (V8SImode);
41524 if (!nonimmediate_operand (op2, V16SImode))
41525 op2 = copy_to_mode_reg (V16SImode, op2);
41526 emit_insn (gen_vec_extract_lo_v16si (half, op2));
41527 op2 = half;
41528 break;
41529 case IX86_BUILTIN_GATHER3ALTSIV4DF:
41530 case IX86_BUILTIN_GATHER3ALTSIV4DI:
41531 case IX86_BUILTIN_GATHERALTSIV4DF:
41532 case IX86_BUILTIN_GATHERALTSIV4DI:
41533 half = gen_reg_rtx (V4SImode);
41534 if (!nonimmediate_operand (op2, V8SImode))
41535 op2 = copy_to_mode_reg (V8SImode, op2);
41536 emit_insn (gen_vec_extract_lo_v8si (half, op2));
41537 op2 = half;
41538 break;
41539 case IX86_BUILTIN_GATHER3ALTDIV16SF:
41540 case IX86_BUILTIN_GATHER3ALTDIV16SI:
41541 half = gen_reg_rtx (mode0);
41542 if (mode0 == V8SFmode)
41543 gen = gen_vec_extract_lo_v16sf;
41544 else
41545 gen = gen_vec_extract_lo_v16si;
41546 if (!nonimmediate_operand (op0, GET_MODE (op0)))
41547 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
41548 emit_insn (gen (half, op0));
41549 op0 = half;
41550 if (GET_MODE (op3) != VOIDmode)
41551 {
41552 if (!nonimmediate_operand (op3, GET_MODE (op3)))
41553 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
41554 emit_insn (gen (half, op3));
41555 op3 = half;
41556 }
41557 break;
41558 case IX86_BUILTIN_GATHER3ALTDIV8SF:
41559 case IX86_BUILTIN_GATHER3ALTDIV8SI:
41560 case IX86_BUILTIN_GATHERALTDIV8SF:
41561 case IX86_BUILTIN_GATHERALTDIV8SI:
41562 half = gen_reg_rtx (mode0);
41563 if (mode0 == V4SFmode)
41564 gen = gen_vec_extract_lo_v8sf;
41565 else
41566 gen = gen_vec_extract_lo_v8si;
41567 if (!nonimmediate_operand (op0, GET_MODE (op0)))
41568 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
41569 emit_insn (gen (half, op0));
41570 op0 = half;
41571 if (GET_MODE (op3) != VOIDmode)
41572 {
41573 if (!nonimmediate_operand (op3, GET_MODE (op3)))
41574 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
41575 emit_insn (gen (half, op3));
41576 op3 = half;
41577 }
41578 break;
41579 default:
41580 break;
41581 }
41582
41583 /* Force memory operand only with base register here. But we
41584 don't want to do it on memory operand for other builtin
41585 functions. */
41586 op1 = ix86_zero_extend_to_Pmode (op1);
41587
41588 if (!insn_data[icode].operand[1].predicate (op0, mode0))
41589 op0 = copy_to_mode_reg (mode0, op0);
41590 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
41591 op1 = copy_to_mode_reg (Pmode, op1);
41592 if (!insn_data[icode].operand[3].predicate (op2, mode2))
41593 op2 = copy_to_mode_reg (mode2, op2);
41594
41595 op3 = fixup_modeless_constant (op3, mode3);
41596
41597 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
41598 {
41599 if (!insn_data[icode].operand[4].predicate (op3, mode3))
41600 op3 = copy_to_mode_reg (mode3, op3);
41601 }
41602 else
41603 {
41604 op3 = copy_to_reg (op3);
41605 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
41606 }
41607 if (!insn_data[icode].operand[5].predicate (op4, mode4))
41608 {
41609 error ("the last argument must be scale 1, 2, 4, 8");
41610 return const0_rtx;
41611 }
41612
41613 /* Optimize. If mask is known to have all high bits set,
41614 replace op0 with pc_rtx to signal that the instruction
41615 overwrites the whole destination and doesn't use its
41616 previous contents. */
41617 if (optimize)
41618 {
41619 if (TREE_CODE (arg3) == INTEGER_CST)
41620 {
41621 if (integer_all_onesp (arg3))
41622 op0 = pc_rtx;
41623 }
41624 else if (TREE_CODE (arg3) == VECTOR_CST)
41625 {
41626 unsigned int negative = 0;
41627 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
41628 {
41629 tree cst = VECTOR_CST_ELT (arg3, i);
41630 if (TREE_CODE (cst) == INTEGER_CST
41631 && tree_int_cst_sign_bit (cst))
41632 negative++;
41633 else if (TREE_CODE (cst) == REAL_CST
41634 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
41635 negative++;
41636 }
41637 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
41638 op0 = pc_rtx;
41639 }
41640 else if (TREE_CODE (arg3) == SSA_NAME
41641 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
41642 {
41643 /* Recognize also when mask is like:
41644 __v2df src = _mm_setzero_pd ();
41645 __v2df mask = _mm_cmpeq_pd (src, src);
41646 or
41647 __v8sf src = _mm256_setzero_ps ();
41648 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
41649 as that is a cheaper way to load all ones into
41650 a register than having to load a constant from
41651 memory. */
41652 gimple *def_stmt = SSA_NAME_DEF_STMT (arg3);
41653 if (is_gimple_call (def_stmt))
41654 {
41655 tree fndecl = gimple_call_fndecl (def_stmt);
41656 if (fndecl
41657 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
41658 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
41659 {
41660 case IX86_BUILTIN_CMPPD:
41661 case IX86_BUILTIN_CMPPS:
41662 case IX86_BUILTIN_CMPPD256:
41663 case IX86_BUILTIN_CMPPS256:
41664 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
41665 break;
41666 /* FALLTHRU */
41667 case IX86_BUILTIN_CMPEQPD:
41668 case IX86_BUILTIN_CMPEQPS:
41669 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
41670 && initializer_zerop (gimple_call_arg (def_stmt,
41671 1)))
41672 op0 = pc_rtx;
41673 break;
41674 default:
41675 break;
41676 }
41677 }
41678 }
41679 }
41680
41681 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
41682 if (! pat)
41683 return const0_rtx;
41684 emit_insn (pat);
41685
41686 switch (fcode)
41687 {
41688 case IX86_BUILTIN_GATHER3DIV16SF:
41689 if (target == NULL_RTX)
41690 target = gen_reg_rtx (V8SFmode);
41691 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
41692 break;
41693 case IX86_BUILTIN_GATHER3DIV16SI:
41694 if (target == NULL_RTX)
41695 target = gen_reg_rtx (V8SImode);
41696 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
41697 break;
41698 case IX86_BUILTIN_GATHER3DIV8SF:
41699 case IX86_BUILTIN_GATHERDIV8SF:
41700 if (target == NULL_RTX)
41701 target = gen_reg_rtx (V4SFmode);
41702 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
41703 break;
41704 case IX86_BUILTIN_GATHER3DIV8SI:
41705 case IX86_BUILTIN_GATHERDIV8SI:
41706 if (target == NULL_RTX)
41707 target = gen_reg_rtx (V4SImode);
41708 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
41709 break;
41710 default:
41711 target = subtarget;
41712 break;
41713 }
41714 return target;
41715
41716 scatter_gen:
41717 arg0 = CALL_EXPR_ARG (exp, 0);
41718 arg1 = CALL_EXPR_ARG (exp, 1);
41719 arg2 = CALL_EXPR_ARG (exp, 2);
41720 arg3 = CALL_EXPR_ARG (exp, 3);
41721 arg4 = CALL_EXPR_ARG (exp, 4);
41722 op0 = expand_normal (arg0);
41723 op1 = expand_normal (arg1);
41724 op2 = expand_normal (arg2);
41725 op3 = expand_normal (arg3);
41726 op4 = expand_normal (arg4);
41727 mode1 = insn_data[icode].operand[1].mode;
41728 mode2 = insn_data[icode].operand[2].mode;
41729 mode3 = insn_data[icode].operand[3].mode;
41730 mode4 = insn_data[icode].operand[4].mode;
41731
41732 /* Scatter instruction stores operand op3 to memory with
41733 indices from op2 and scale from op4 under writemask op1.
41734 If index operand op2 has more elements then source operand
41735 op3 one need to use only its low half. And vice versa. */
41736 switch (fcode)
41737 {
41738 case IX86_BUILTIN_SCATTERALTSIV8DF:
41739 case IX86_BUILTIN_SCATTERALTSIV8DI:
41740 half = gen_reg_rtx (V8SImode);
41741 if (!nonimmediate_operand (op2, V16SImode))
41742 op2 = copy_to_mode_reg (V16SImode, op2);
41743 emit_insn (gen_vec_extract_lo_v16si (half, op2));
41744 op2 = half;
41745 break;
41746 case IX86_BUILTIN_SCATTERALTDIV16SF:
41747 case IX86_BUILTIN_SCATTERALTDIV16SI:
41748 half = gen_reg_rtx (mode3);
41749 if (mode3 == V8SFmode)
41750 gen = gen_vec_extract_lo_v16sf;
41751 else
41752 gen = gen_vec_extract_lo_v16si;
41753 if (!nonimmediate_operand (op3, GET_MODE (op3)))
41754 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
41755 emit_insn (gen (half, op3));
41756 op3 = half;
41757 break;
41758 default:
41759 break;
41760 }
41761
41762 /* Force memory operand only with base register here. But we
41763 don't want to do it on memory operand for other builtin
41764 functions. */
41765 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
41766
41767 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
41768 op0 = copy_to_mode_reg (Pmode, op0);
41769
41770 op1 = fixup_modeless_constant (op1, mode1);
41771
41772 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
41773 {
41774 if (!insn_data[icode].operand[1].predicate (op1, mode1))
41775 op1 = copy_to_mode_reg (mode1, op1);
41776 }
41777 else
41778 {
41779 op1 = copy_to_reg (op1);
41780 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
41781 }
41782
41783 if (!insn_data[icode].operand[2].predicate (op2, mode2))
41784 op2 = copy_to_mode_reg (mode2, op2);
41785
41786 if (!insn_data[icode].operand[3].predicate (op3, mode3))
41787 op3 = copy_to_mode_reg (mode3, op3);
41788
41789 if (!insn_data[icode].operand[4].predicate (op4, mode4))
41790 {
41791 error ("the last argument must be scale 1, 2, 4, 8");
41792 return const0_rtx;
41793 }
41794
41795 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
41796 if (! pat)
41797 return const0_rtx;
41798
41799 emit_insn (pat);
41800 return 0;
41801
41802 vec_prefetch_gen:
41803 arg0 = CALL_EXPR_ARG (exp, 0);
41804 arg1 = CALL_EXPR_ARG (exp, 1);
41805 arg2 = CALL_EXPR_ARG (exp, 2);
41806 arg3 = CALL_EXPR_ARG (exp, 3);
41807 arg4 = CALL_EXPR_ARG (exp, 4);
41808 op0 = expand_normal (arg0);
41809 op1 = expand_normal (arg1);
41810 op2 = expand_normal (arg2);
41811 op3 = expand_normal (arg3);
41812 op4 = expand_normal (arg4);
41813 mode0 = insn_data[icode].operand[0].mode;
41814 mode1 = insn_data[icode].operand[1].mode;
41815 mode3 = insn_data[icode].operand[3].mode;
41816 mode4 = insn_data[icode].operand[4].mode;
41817
41818 op0 = fixup_modeless_constant (op0, mode0);
41819
41820 if (GET_MODE (op0) == mode0
41821 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
41822 {
41823 if (!insn_data[icode].operand[0].predicate (op0, mode0))
41824 op0 = copy_to_mode_reg (mode0, op0);
41825 }
41826 else if (op0 != constm1_rtx)
41827 {
41828 op0 = copy_to_reg (op0);
41829 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
41830 }
41831
41832 if (!insn_data[icode].operand[1].predicate (op1, mode1))
41833 op1 = copy_to_mode_reg (mode1, op1);
41834
41835 /* Force memory operand only with base register here. But we
41836 don't want to do it on memory operand for other builtin
41837 functions. */
41838 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
41839
41840 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
41841 op2 = copy_to_mode_reg (Pmode, op2);
41842
41843 if (!insn_data[icode].operand[3].predicate (op3, mode3))
41844 {
41845 error ("the forth argument must be scale 1, 2, 4, 8");
41846 return const0_rtx;
41847 }
41848
41849 if (!insn_data[icode].operand[4].predicate (op4, mode4))
41850 {
41851 error ("incorrect hint operand");
41852 return const0_rtx;
41853 }
41854
41855 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
41856 if (! pat)
41857 return const0_rtx;
41858
41859 emit_insn (pat);
41860
41861 return 0;
41862
41863 case IX86_BUILTIN_XABORT:
41864 icode = CODE_FOR_xabort;
41865 arg0 = CALL_EXPR_ARG (exp, 0);
41866 op0 = expand_normal (arg0);
41867 mode0 = insn_data[icode].operand[0].mode;
41868 if (!insn_data[icode].operand[0].predicate (op0, mode0))
41869 {
41870 error ("the xabort's argument must be an 8-bit immediate");
41871 return const0_rtx;
41872 }
41873 emit_insn (gen_xabort (op0));
41874 return 0;
41875
41876 default:
41877 break;
41878 }
41879
41880 for (i = 0, d = bdesc_special_args;
41881 i < ARRAY_SIZE (bdesc_special_args);
41882 i++, d++)
41883 if (d->code == fcode)
41884 return ix86_expand_special_args_builtin (d, exp, target);
41885
41886 for (i = 0, d = bdesc_args;
41887 i < ARRAY_SIZE (bdesc_args);
41888 i++, d++)
41889 if (d->code == fcode)
41890 switch (fcode)
41891 {
41892 case IX86_BUILTIN_FABSQ:
41893 case IX86_BUILTIN_COPYSIGNQ:
41894 if (!TARGET_SSE)
41895 /* Emit a normal call if SSE isn't available. */
41896 return expand_call (exp, target, ignore);
41897 default:
41898 return ix86_expand_args_builtin (d, exp, target);
41899 }
41900
41901 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
41902 if (d->code == fcode)
41903 return ix86_expand_sse_comi (d, exp, target);
41904
41905 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
41906 if (d->code == fcode)
41907 return ix86_expand_round_builtin (d, exp, target);
41908
41909 for (i = 0, d = bdesc_pcmpestr;
41910 i < ARRAY_SIZE (bdesc_pcmpestr);
41911 i++, d++)
41912 if (d->code == fcode)
41913 return ix86_expand_sse_pcmpestr (d, exp, target);
41914
41915 for (i = 0, d = bdesc_pcmpistr;
41916 i < ARRAY_SIZE (bdesc_pcmpistr);
41917 i++, d++)
41918 if (d->code == fcode)
41919 return ix86_expand_sse_pcmpistr (d, exp, target);
41920
41921 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
41922 if (d->code == fcode)
41923 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
41924 (enum ix86_builtin_func_type)
41925 d->flag, d->comparison);
41926
41927 gcc_unreachable ();
41928 }
41929
41930 /* This returns the target-specific builtin with code CODE if
41931 current_function_decl has visibility on this builtin, which is checked
41932 using isa flags. Returns NULL_TREE otherwise. */
41933
41934 static tree ix86_get_builtin (enum ix86_builtins code)
41935 {
41936 struct cl_target_option *opts;
41937 tree target_tree = NULL_TREE;
41938
41939 /* Determine the isa flags of current_function_decl. */
41940
41941 if (current_function_decl)
41942 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
41943
41944 if (target_tree == NULL)
41945 target_tree = target_option_default_node;
41946
41947 opts = TREE_TARGET_OPTION (target_tree);
41948
41949 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
41950 return ix86_builtin_decl (code, true);
41951 else
41952 return NULL_TREE;
41953 }
41954
41955 /* Return function decl for target specific builtin
41956 for given MPX builtin passed i FCODE. */
41957 static tree
41958 ix86_builtin_mpx_function (unsigned fcode)
41959 {
41960 switch (fcode)
41961 {
41962 case BUILT_IN_CHKP_BNDMK:
41963 return ix86_builtins[IX86_BUILTIN_BNDMK];
41964
41965 case BUILT_IN_CHKP_BNDSTX:
41966 return ix86_builtins[IX86_BUILTIN_BNDSTX];
41967
41968 case BUILT_IN_CHKP_BNDLDX:
41969 return ix86_builtins[IX86_BUILTIN_BNDLDX];
41970
41971 case BUILT_IN_CHKP_BNDCL:
41972 return ix86_builtins[IX86_BUILTIN_BNDCL];
41973
41974 case BUILT_IN_CHKP_BNDCU:
41975 return ix86_builtins[IX86_BUILTIN_BNDCU];
41976
41977 case BUILT_IN_CHKP_BNDRET:
41978 return ix86_builtins[IX86_BUILTIN_BNDRET];
41979
41980 case BUILT_IN_CHKP_INTERSECT:
41981 return ix86_builtins[IX86_BUILTIN_BNDINT];
41982
41983 case BUILT_IN_CHKP_NARROW:
41984 return ix86_builtins[IX86_BUILTIN_BNDNARROW];
41985
41986 case BUILT_IN_CHKP_SIZEOF:
41987 return ix86_builtins[IX86_BUILTIN_SIZEOF];
41988
41989 case BUILT_IN_CHKP_EXTRACT_LOWER:
41990 return ix86_builtins[IX86_BUILTIN_BNDLOWER];
41991
41992 case BUILT_IN_CHKP_EXTRACT_UPPER:
41993 return ix86_builtins[IX86_BUILTIN_BNDUPPER];
41994
41995 default:
41996 return NULL_TREE;
41997 }
41998
41999 gcc_unreachable ();
42000 }
42001
42002 /* Helper function for ix86_load_bounds and ix86_store_bounds.
42003
42004 Return an address to be used to load/store bounds for pointer
42005 passed in SLOT.
42006
42007 SLOT_NO is an integer constant holding number of a target
42008 dependent special slot to be used in case SLOT is not a memory.
42009
42010 SPECIAL_BASE is a pointer to be used as a base of fake address
42011 to access special slots in Bounds Table. SPECIAL_BASE[-1],
42012 SPECIAL_BASE[-2] etc. will be used as fake pointer locations. */
42013
42014 static rtx
42015 ix86_get_arg_address_for_bt (rtx slot, rtx slot_no, rtx special_base)
42016 {
42017 rtx addr = NULL;
42018
42019 /* NULL slot means we pass bounds for pointer not passed to the
42020 function at all. Register slot means we pass pointer in a
42021 register. In both these cases bounds are passed via Bounds
42022 Table. Since we do not have actual pointer stored in memory,
42023 we have to use fake addresses to access Bounds Table. We
42024 start with (special_base - sizeof (void*)) and decrease this
42025 address by pointer size to get addresses for other slots. */
42026 if (!slot || REG_P (slot))
42027 {
42028 gcc_assert (CONST_INT_P (slot_no));
42029 addr = plus_constant (Pmode, special_base,
42030 -(INTVAL (slot_no) + 1) * GET_MODE_SIZE (Pmode));
42031 }
42032 /* If pointer is passed in a memory then its address is used to
42033 access Bounds Table. */
42034 else if (MEM_P (slot))
42035 {
42036 addr = XEXP (slot, 0);
42037 if (!register_operand (addr, Pmode))
42038 addr = copy_addr_to_reg (addr);
42039 }
42040 else
42041 gcc_unreachable ();
42042
42043 return addr;
42044 }
42045
42046 /* Expand pass uses this hook to load bounds for function parameter
42047 PTR passed in SLOT in case its bounds are not passed in a register.
42048
42049 If SLOT is a memory, then bounds are loaded as for regular pointer
42050 loaded from memory. PTR may be NULL in case SLOT is a memory.
42051 In such case value of PTR (if required) may be loaded from SLOT.
42052
42053 If SLOT is NULL or a register then SLOT_NO is an integer constant
42054 holding number of the target dependent special slot which should be
42055 used to obtain bounds.
42056
42057 Return loaded bounds. */
42058
42059 static rtx
42060 ix86_load_bounds (rtx slot, rtx ptr, rtx slot_no)
42061 {
42062 rtx reg = gen_reg_rtx (BNDmode);
42063 rtx addr;
42064
42065 /* Get address to be used to access Bounds Table. Special slots start
42066 at the location of return address of the current function. */
42067 addr = ix86_get_arg_address_for_bt (slot, slot_no, arg_pointer_rtx);
42068
42069 /* Load pointer value from a memory if we don't have it. */
42070 if (!ptr)
42071 {
42072 gcc_assert (MEM_P (slot));
42073 ptr = copy_addr_to_reg (slot);
42074 }
42075
42076 if (!register_operand (ptr, Pmode))
42077 ptr = ix86_zero_extend_to_Pmode (ptr);
42078
42079 emit_insn (BNDmode == BND64mode
42080 ? gen_bnd64_ldx (reg, addr, ptr)
42081 : gen_bnd32_ldx (reg, addr, ptr));
42082
42083 return reg;
42084 }
42085
42086 /* Expand pass uses this hook to store BOUNDS for call argument PTR
42087 passed in SLOT in case BOUNDS are not passed in a register.
42088
42089 If SLOT is a memory, then BOUNDS are stored as for regular pointer
42090 stored in memory. PTR may be NULL in case SLOT is a memory.
42091 In such case value of PTR (if required) may be loaded from SLOT.
42092
42093 If SLOT is NULL or a register then SLOT_NO is an integer constant
42094 holding number of the target dependent special slot which should be
42095 used to store BOUNDS. */
42096
42097 static void
42098 ix86_store_bounds (rtx ptr, rtx slot, rtx bounds, rtx slot_no)
42099 {
42100 rtx addr;
42101
42102 /* Get address to be used to access Bounds Table. Special slots start
42103 at the location of return address of a called function. */
42104 addr = ix86_get_arg_address_for_bt (slot, slot_no, stack_pointer_rtx);
42105
42106 /* Load pointer value from a memory if we don't have it. */
42107 if (!ptr)
42108 {
42109 gcc_assert (MEM_P (slot));
42110 ptr = copy_addr_to_reg (slot);
42111 }
42112
42113 if (!register_operand (ptr, Pmode))
42114 ptr = ix86_zero_extend_to_Pmode (ptr);
42115
42116 gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds)));
42117 if (!register_operand (bounds, BNDmode))
42118 bounds = copy_to_mode_reg (BNDmode, bounds);
42119
42120 emit_insn (BNDmode == BND64mode
42121 ? gen_bnd64_stx (addr, ptr, bounds)
42122 : gen_bnd32_stx (addr, ptr, bounds));
42123 }
42124
42125 /* Load and return bounds returned by function in SLOT. */
42126
42127 static rtx
42128 ix86_load_returned_bounds (rtx slot)
42129 {
42130 rtx res;
42131
42132 gcc_assert (REG_P (slot));
42133 res = gen_reg_rtx (BNDmode);
42134 emit_move_insn (res, slot);
42135
42136 return res;
42137 }
42138
42139 /* Store BOUNDS returned by function into SLOT. */
42140
42141 static void
42142 ix86_store_returned_bounds (rtx slot, rtx bounds)
42143 {
42144 gcc_assert (REG_P (slot));
42145 emit_move_insn (slot, bounds);
42146 }
42147
42148 /* Returns a function decl for a vectorized version of the combined function
42149 with combined_fn code FN and the result vector type TYPE, or NULL_TREE
42150 if it is not available. */
42151
42152 static tree
42153 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
42154 tree type_in)
42155 {
42156 machine_mode in_mode, out_mode;
42157 int in_n, out_n;
42158
42159 if (TREE_CODE (type_out) != VECTOR_TYPE
42160 || TREE_CODE (type_in) != VECTOR_TYPE)
42161 return NULL_TREE;
42162
42163 out_mode = TYPE_MODE (TREE_TYPE (type_out));
42164 out_n = TYPE_VECTOR_SUBPARTS (type_out);
42165 in_mode = TYPE_MODE (TREE_TYPE (type_in));
42166 in_n = TYPE_VECTOR_SUBPARTS (type_in);
42167
42168 switch (fn)
42169 {
42170 CASE_CFN_EXP2:
42171 if (out_mode == SFmode && in_mode == SFmode)
42172 {
42173 if (out_n == 16 && in_n == 16)
42174 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
42175 }
42176 break;
42177
42178 CASE_CFN_IFLOOR:
42179 CASE_CFN_LFLOOR:
42180 CASE_CFN_LLFLOOR:
42181 /* The round insn does not trap on denormals. */
42182 if (flag_trapping_math || !TARGET_ROUND)
42183 break;
42184
42185 if (out_mode == SImode && in_mode == DFmode)
42186 {
42187 if (out_n == 4 && in_n == 2)
42188 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
42189 else if (out_n == 8 && in_n == 4)
42190 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
42191 else if (out_n == 16 && in_n == 8)
42192 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
42193 }
42194 if (out_mode == SImode && in_mode == SFmode)
42195 {
42196 if (out_n == 4 && in_n == 4)
42197 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
42198 else if (out_n == 8 && in_n == 8)
42199 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
42200 }
42201 break;
42202
42203 CASE_CFN_ICEIL:
42204 CASE_CFN_LCEIL:
42205 CASE_CFN_LLCEIL:
42206 /* The round insn does not trap on denormals. */
42207 if (flag_trapping_math || !TARGET_ROUND)
42208 break;
42209
42210 if (out_mode == SImode && in_mode == DFmode)
42211 {
42212 if (out_n == 4 && in_n == 2)
42213 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
42214 else if (out_n == 8 && in_n == 4)
42215 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
42216 else if (out_n == 16 && in_n == 8)
42217 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
42218 }
42219 if (out_mode == SImode && in_mode == SFmode)
42220 {
42221 if (out_n == 4 && in_n == 4)
42222 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
42223 else if (out_n == 8 && in_n == 8)
42224 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
42225 }
42226 break;
42227
42228 CASE_CFN_IRINT:
42229 CASE_CFN_LRINT:
42230 CASE_CFN_LLRINT:
42231 if (out_mode == SImode && in_mode == DFmode)
42232 {
42233 if (out_n == 4 && in_n == 2)
42234 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
42235 else if (out_n == 8 && in_n == 4)
42236 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
42237 }
42238 if (out_mode == SImode && in_mode == SFmode)
42239 {
42240 if (out_n == 4 && in_n == 4)
42241 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
42242 else if (out_n == 8 && in_n == 8)
42243 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
42244 }
42245 break;
42246
42247 CASE_CFN_IROUND:
42248 CASE_CFN_LROUND:
42249 CASE_CFN_LLROUND:
42250 /* The round insn does not trap on denormals. */
42251 if (flag_trapping_math || !TARGET_ROUND)
42252 break;
42253
42254 if (out_mode == SImode && in_mode == DFmode)
42255 {
42256 if (out_n == 4 && in_n == 2)
42257 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
42258 else if (out_n == 8 && in_n == 4)
42259 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
42260 else if (out_n == 16 && in_n == 8)
42261 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
42262 }
42263 if (out_mode == SImode && in_mode == SFmode)
42264 {
42265 if (out_n == 4 && in_n == 4)
42266 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
42267 else if (out_n == 8 && in_n == 8)
42268 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
42269 }
42270 break;
42271
42272 CASE_CFN_FLOOR:
42273 /* The round insn does not trap on denormals. */
42274 if (flag_trapping_math || !TARGET_ROUND)
42275 break;
42276
42277 if (out_mode == DFmode && in_mode == DFmode)
42278 {
42279 if (out_n == 2 && in_n == 2)
42280 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
42281 else if (out_n == 4 && in_n == 4)
42282 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
42283 }
42284 if (out_mode == SFmode && in_mode == SFmode)
42285 {
42286 if (out_n == 4 && in_n == 4)
42287 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
42288 else if (out_n == 8 && in_n == 8)
42289 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
42290 }
42291 break;
42292
42293 CASE_CFN_CEIL:
42294 /* The round insn does not trap on denormals. */
42295 if (flag_trapping_math || !TARGET_ROUND)
42296 break;
42297
42298 if (out_mode == DFmode && in_mode == DFmode)
42299 {
42300 if (out_n == 2 && in_n == 2)
42301 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
42302 else if (out_n == 4 && in_n == 4)
42303 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
42304 }
42305 if (out_mode == SFmode && in_mode == SFmode)
42306 {
42307 if (out_n == 4 && in_n == 4)
42308 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
42309 else if (out_n == 8 && in_n == 8)
42310 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
42311 }
42312 break;
42313
42314 CASE_CFN_TRUNC:
42315 /* The round insn does not trap on denormals. */
42316 if (flag_trapping_math || !TARGET_ROUND)
42317 break;
42318
42319 if (out_mode == DFmode && in_mode == DFmode)
42320 {
42321 if (out_n == 2 && in_n == 2)
42322 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
42323 else if (out_n == 4 && in_n == 4)
42324 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
42325 }
42326 if (out_mode == SFmode && in_mode == SFmode)
42327 {
42328 if (out_n == 4 && in_n == 4)
42329 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
42330 else if (out_n == 8 && in_n == 8)
42331 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
42332 }
42333 break;
42334
42335 CASE_CFN_RINT:
42336 /* The round insn does not trap on denormals. */
42337 if (flag_trapping_math || !TARGET_ROUND)
42338 break;
42339
42340 if (out_mode == DFmode && in_mode == DFmode)
42341 {
42342 if (out_n == 2 && in_n == 2)
42343 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
42344 else if (out_n == 4 && in_n == 4)
42345 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
42346 }
42347 if (out_mode == SFmode && in_mode == SFmode)
42348 {
42349 if (out_n == 4 && in_n == 4)
42350 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
42351 else if (out_n == 8 && in_n == 8)
42352 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
42353 }
42354 break;
42355
42356 CASE_CFN_FMA:
42357 if (out_mode == DFmode && in_mode == DFmode)
42358 {
42359 if (out_n == 2 && in_n == 2)
42360 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
42361 if (out_n == 4 && in_n == 4)
42362 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
42363 }
42364 if (out_mode == SFmode && in_mode == SFmode)
42365 {
42366 if (out_n == 4 && in_n == 4)
42367 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
42368 if (out_n == 8 && in_n == 8)
42369 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
42370 }
42371 break;
42372
42373 default:
42374 break;
42375 }
42376
42377 /* Dispatch to a handler for a vectorization library. */
42378 if (ix86_veclib_handler)
42379 return ix86_veclib_handler (combined_fn (fn), type_out, type_in);
42380
42381 return NULL_TREE;
42382 }
42383
42384 /* Handler for an SVML-style interface to
42385 a library with vectorized intrinsics. */
42386
42387 static tree
42388 ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in)
42389 {
42390 char name[20];
42391 tree fntype, new_fndecl, args;
42392 unsigned arity;
42393 const char *bname;
42394 machine_mode el_mode, in_mode;
42395 int n, in_n;
42396
42397 /* The SVML is suitable for unsafe math only. */
42398 if (!flag_unsafe_math_optimizations)
42399 return NULL_TREE;
42400
42401 el_mode = TYPE_MODE (TREE_TYPE (type_out));
42402 n = TYPE_VECTOR_SUBPARTS (type_out);
42403 in_mode = TYPE_MODE (TREE_TYPE (type_in));
42404 in_n = TYPE_VECTOR_SUBPARTS (type_in);
42405 if (el_mode != in_mode
42406 || n != in_n)
42407 return NULL_TREE;
42408
42409 switch (fn)
42410 {
42411 CASE_CFN_EXP:
42412 CASE_CFN_LOG:
42413 CASE_CFN_LOG10:
42414 CASE_CFN_POW:
42415 CASE_CFN_TANH:
42416 CASE_CFN_TAN:
42417 CASE_CFN_ATAN:
42418 CASE_CFN_ATAN2:
42419 CASE_CFN_ATANH:
42420 CASE_CFN_CBRT:
42421 CASE_CFN_SINH:
42422 CASE_CFN_SIN:
42423 CASE_CFN_ASINH:
42424 CASE_CFN_ASIN:
42425 CASE_CFN_COSH:
42426 CASE_CFN_COS:
42427 CASE_CFN_ACOSH:
42428 CASE_CFN_ACOS:
42429 if ((el_mode != DFmode || n != 2)
42430 && (el_mode != SFmode || n != 4))
42431 return NULL_TREE;
42432 break;
42433
42434 default:
42435 return NULL_TREE;
42436 }
42437
42438 tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
42439 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
42440
42441 if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOGF)
42442 strcpy (name, "vmlsLn4");
42443 else if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOG)
42444 strcpy (name, "vmldLn2");
42445 else if (n == 4)
42446 {
42447 sprintf (name, "vmls%s", bname+10);
42448 name[strlen (name)-1] = '4';
42449 }
42450 else
42451 sprintf (name, "vmld%s2", bname+10);
42452
42453 /* Convert to uppercase. */
42454 name[4] &= ~0x20;
42455
42456 arity = 0;
42457 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
42458 arity++;
42459
42460 if (arity == 1)
42461 fntype = build_function_type_list (type_out, type_in, NULL);
42462 else
42463 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
42464
42465 /* Build a function declaration for the vectorized function. */
42466 new_fndecl = build_decl (BUILTINS_LOCATION,
42467 FUNCTION_DECL, get_identifier (name), fntype);
42468 TREE_PUBLIC (new_fndecl) = 1;
42469 DECL_EXTERNAL (new_fndecl) = 1;
42470 DECL_IS_NOVOPS (new_fndecl) = 1;
42471 TREE_READONLY (new_fndecl) = 1;
42472
42473 return new_fndecl;
42474 }
42475
42476 /* Handler for an ACML-style interface to
42477 a library with vectorized intrinsics. */
42478
42479 static tree
42480 ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in)
42481 {
42482 char name[20] = "__vr.._";
42483 tree fntype, new_fndecl, args;
42484 unsigned arity;
42485 const char *bname;
42486 machine_mode el_mode, in_mode;
42487 int n, in_n;
42488
42489 /* The ACML is 64bits only and suitable for unsafe math only as
42490 it does not correctly support parts of IEEE with the required
42491 precision such as denormals. */
42492 if (!TARGET_64BIT
42493 || !flag_unsafe_math_optimizations)
42494 return NULL_TREE;
42495
42496 el_mode = TYPE_MODE (TREE_TYPE (type_out));
42497 n = TYPE_VECTOR_SUBPARTS (type_out);
42498 in_mode = TYPE_MODE (TREE_TYPE (type_in));
42499 in_n = TYPE_VECTOR_SUBPARTS (type_in);
42500 if (el_mode != in_mode
42501 || n != in_n)
42502 return NULL_TREE;
42503
42504 switch (fn)
42505 {
42506 CASE_CFN_SIN:
42507 CASE_CFN_COS:
42508 CASE_CFN_EXP:
42509 CASE_CFN_LOG:
42510 CASE_CFN_LOG2:
42511 CASE_CFN_LOG10:
42512 if (el_mode == DFmode && n == 2)
42513 {
42514 name[4] = 'd';
42515 name[5] = '2';
42516 }
42517 else if (el_mode == SFmode && n == 4)
42518 {
42519 name[4] = 's';
42520 name[5] = '4';
42521 }
42522 else
42523 return NULL_TREE;
42524 break;
42525
42526 default:
42527 return NULL_TREE;
42528 }
42529
42530 tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
42531 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
42532 sprintf (name + 7, "%s", bname+10);
42533
42534 arity = 0;
42535 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
42536 arity++;
42537
42538 if (arity == 1)
42539 fntype = build_function_type_list (type_out, type_in, NULL);
42540 else
42541 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
42542
42543 /* Build a function declaration for the vectorized function. */
42544 new_fndecl = build_decl (BUILTINS_LOCATION,
42545 FUNCTION_DECL, get_identifier (name), fntype);
42546 TREE_PUBLIC (new_fndecl) = 1;
42547 DECL_EXTERNAL (new_fndecl) = 1;
42548 DECL_IS_NOVOPS (new_fndecl) = 1;
42549 TREE_READONLY (new_fndecl) = 1;
42550
42551 return new_fndecl;
42552 }
42553
42554 /* Returns a decl of a function that implements gather load with
42555 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
42556 Return NULL_TREE if it is not available. */
42557
42558 static tree
42559 ix86_vectorize_builtin_gather (const_tree mem_vectype,
42560 const_tree index_type, int scale)
42561 {
42562 bool si;
42563 enum ix86_builtins code;
42564
42565 if (! TARGET_AVX2)
42566 return NULL_TREE;
42567
42568 if ((TREE_CODE (index_type) != INTEGER_TYPE
42569 && !POINTER_TYPE_P (index_type))
42570 || (TYPE_MODE (index_type) != SImode
42571 && TYPE_MODE (index_type) != DImode))
42572 return NULL_TREE;
42573
42574 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
42575 return NULL_TREE;
42576
42577 /* v*gather* insn sign extends index to pointer mode. */
42578 if (TYPE_PRECISION (index_type) < POINTER_SIZE
42579 && TYPE_UNSIGNED (index_type))
42580 return NULL_TREE;
42581
42582 if (scale <= 0
42583 || scale > 8
42584 || (scale & (scale - 1)) != 0)
42585 return NULL_TREE;
42586
42587 si = TYPE_MODE (index_type) == SImode;
42588 switch (TYPE_MODE (mem_vectype))
42589 {
42590 case V2DFmode:
42591 if (TARGET_AVX512VL)
42592 code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
42593 else
42594 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
42595 break;
42596 case V4DFmode:
42597 if (TARGET_AVX512VL)
42598 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
42599 else
42600 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
42601 break;
42602 case V2DImode:
42603 if (TARGET_AVX512VL)
42604 code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
42605 else
42606 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
42607 break;
42608 case V4DImode:
42609 if (TARGET_AVX512VL)
42610 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
42611 else
42612 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
42613 break;
42614 case V4SFmode:
42615 if (TARGET_AVX512VL)
42616 code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
42617 else
42618 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
42619 break;
42620 case V8SFmode:
42621 if (TARGET_AVX512VL)
42622 code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
42623 else
42624 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
42625 break;
42626 case V4SImode:
42627 if (TARGET_AVX512VL)
42628 code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
42629 else
42630 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
42631 break;
42632 case V8SImode:
42633 if (TARGET_AVX512VL)
42634 code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
42635 else
42636 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
42637 break;
42638 case V8DFmode:
42639 if (TARGET_AVX512F)
42640 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
42641 else
42642 return NULL_TREE;
42643 break;
42644 case V8DImode:
42645 if (TARGET_AVX512F)
42646 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
42647 else
42648 return NULL_TREE;
42649 break;
42650 case V16SFmode:
42651 if (TARGET_AVX512F)
42652 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
42653 else
42654 return NULL_TREE;
42655 break;
42656 case V16SImode:
42657 if (TARGET_AVX512F)
42658 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
42659 else
42660 return NULL_TREE;
42661 break;
42662 default:
42663 return NULL_TREE;
42664 }
42665
42666 return ix86_get_builtin (code);
42667 }
42668
42669 /* Returns a decl of a function that implements scatter store with
42670 register type VECTYPE and index type INDEX_TYPE and SCALE.
42671 Return NULL_TREE if it is not available. */
42672
42673 static tree
42674 ix86_vectorize_builtin_scatter (const_tree vectype,
42675 const_tree index_type, int scale)
42676 {
42677 bool si;
42678 enum ix86_builtins code;
42679
42680 if (!TARGET_AVX512F)
42681 return NULL_TREE;
42682
42683 if ((TREE_CODE (index_type) != INTEGER_TYPE
42684 && !POINTER_TYPE_P (index_type))
42685 || (TYPE_MODE (index_type) != SImode
42686 && TYPE_MODE (index_type) != DImode))
42687 return NULL_TREE;
42688
42689 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
42690 return NULL_TREE;
42691
42692 /* v*scatter* insn sign extends index to pointer mode. */
42693 if (TYPE_PRECISION (index_type) < POINTER_SIZE
42694 && TYPE_UNSIGNED (index_type))
42695 return NULL_TREE;
42696
42697 /* Scale can be 1, 2, 4 or 8. */
42698 if (scale <= 0
42699 || scale > 8
42700 || (scale & (scale - 1)) != 0)
42701 return NULL_TREE;
42702
42703 si = TYPE_MODE (index_type) == SImode;
42704 switch (TYPE_MODE (vectype))
42705 {
42706 case V8DFmode:
42707 code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF;
42708 break;
42709 case V8DImode:
42710 code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI;
42711 break;
42712 case V16SFmode:
42713 code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF;
42714 break;
42715 case V16SImode:
42716 code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI;
42717 break;
42718 default:
42719 return NULL_TREE;
42720 }
42721
42722 return ix86_builtins[code];
42723 }
42724
42725 /* Return true if it is safe to use the rsqrt optabs to optimize
42726 1.0/sqrt. */
42727
42728 static bool
42729 use_rsqrt_p ()
42730 {
42731 return (TARGET_SSE_MATH
42732 && flag_finite_math_only
42733 && !flag_trapping_math
42734 && flag_unsafe_math_optimizations);
42735 }
42736
42737 /* Returns a code for a target-specific builtin that implements
42738 reciprocal of the function, or NULL_TREE if not available. */
42739
42740 static tree
42741 ix86_builtin_reciprocal (tree fndecl)
42742 {
42743 switch (DECL_FUNCTION_CODE (fndecl))
42744 {
42745 /* Vectorized version of sqrt to rsqrt conversion. */
42746 case IX86_BUILTIN_SQRTPS_NR:
42747 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
42748
42749 case IX86_BUILTIN_SQRTPS_NR256:
42750 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
42751
42752 default:
42753 return NULL_TREE;
42754 }
42755 }
42756 \f
42757 /* Helper for avx_vpermilps256_operand et al. This is also used by
42758 the expansion functions to turn the parallel back into a mask.
42759 The return value is 0 for no match and the imm8+1 for a match. */
42760
42761 int
42762 avx_vpermilp_parallel (rtx par, machine_mode mode)
42763 {
42764 unsigned i, nelt = GET_MODE_NUNITS (mode);
42765 unsigned mask = 0;
42766 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
42767
42768 if (XVECLEN (par, 0) != (int) nelt)
42769 return 0;
42770
42771 /* Validate that all of the elements are constants, and not totally
42772 out of range. Copy the data into an integral array to make the
42773 subsequent checks easier. */
42774 for (i = 0; i < nelt; ++i)
42775 {
42776 rtx er = XVECEXP (par, 0, i);
42777 unsigned HOST_WIDE_INT ei;
42778
42779 if (!CONST_INT_P (er))
42780 return 0;
42781 ei = INTVAL (er);
42782 if (ei >= nelt)
42783 return 0;
42784 ipar[i] = ei;
42785 }
42786
42787 switch (mode)
42788 {
42789 case V8DFmode:
42790 /* In the 512-bit DFmode case, we can only move elements within
42791 a 128-bit lane. First fill the second part of the mask,
42792 then fallthru. */
42793 for (i = 4; i < 6; ++i)
42794 {
42795 if (ipar[i] < 4 || ipar[i] >= 6)
42796 return 0;
42797 mask |= (ipar[i] - 4) << i;
42798 }
42799 for (i = 6; i < 8; ++i)
42800 {
42801 if (ipar[i] < 6)
42802 return 0;
42803 mask |= (ipar[i] - 6) << i;
42804 }
42805 /* FALLTHRU */
42806
42807 case V4DFmode:
42808 /* In the 256-bit DFmode case, we can only move elements within
42809 a 128-bit lane. */
42810 for (i = 0; i < 2; ++i)
42811 {
42812 if (ipar[i] >= 2)
42813 return 0;
42814 mask |= ipar[i] << i;
42815 }
42816 for (i = 2; i < 4; ++i)
42817 {
42818 if (ipar[i] < 2)
42819 return 0;
42820 mask |= (ipar[i] - 2) << i;
42821 }
42822 break;
42823
42824 case V16SFmode:
42825 /* In 512 bit SFmode case, permutation in the upper 256 bits
42826 must mirror the permutation in the lower 256-bits. */
42827 for (i = 0; i < 8; ++i)
42828 if (ipar[i] + 8 != ipar[i + 8])
42829 return 0;
42830 /* FALLTHRU */
42831
42832 case V8SFmode:
42833 /* In 256 bit SFmode case, we have full freedom of
42834 movement within the low 128-bit lane, but the high 128-bit
42835 lane must mirror the exact same pattern. */
42836 for (i = 0; i < 4; ++i)
42837 if (ipar[i] + 4 != ipar[i + 4])
42838 return 0;
42839 nelt = 4;
42840 /* FALLTHRU */
42841
42842 case V2DFmode:
42843 case V4SFmode:
42844 /* In the 128-bit case, we've full freedom in the placement of
42845 the elements from the source operand. */
42846 for (i = 0; i < nelt; ++i)
42847 mask |= ipar[i] << (i * (nelt / 2));
42848 break;
42849
42850 default:
42851 gcc_unreachable ();
42852 }
42853
42854 /* Make sure success has a non-zero value by adding one. */
42855 return mask + 1;
42856 }
42857
42858 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
42859 the expansion functions to turn the parallel back into a mask.
42860 The return value is 0 for no match and the imm8+1 for a match. */
42861
42862 int
42863 avx_vperm2f128_parallel (rtx par, machine_mode mode)
42864 {
42865 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
42866 unsigned mask = 0;
42867 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
42868
42869 if (XVECLEN (par, 0) != (int) nelt)
42870 return 0;
42871
42872 /* Validate that all of the elements are constants, and not totally
42873 out of range. Copy the data into an integral array to make the
42874 subsequent checks easier. */
42875 for (i = 0; i < nelt; ++i)
42876 {
42877 rtx er = XVECEXP (par, 0, i);
42878 unsigned HOST_WIDE_INT ei;
42879
42880 if (!CONST_INT_P (er))
42881 return 0;
42882 ei = INTVAL (er);
42883 if (ei >= 2 * nelt)
42884 return 0;
42885 ipar[i] = ei;
42886 }
42887
42888 /* Validate that the halves of the permute are halves. */
42889 for (i = 0; i < nelt2 - 1; ++i)
42890 if (ipar[i] + 1 != ipar[i + 1])
42891 return 0;
42892 for (i = nelt2; i < nelt - 1; ++i)
42893 if (ipar[i] + 1 != ipar[i + 1])
42894 return 0;
42895
42896 /* Reconstruct the mask. */
42897 for (i = 0; i < 2; ++i)
42898 {
42899 unsigned e = ipar[i * nelt2];
42900 if (e % nelt2)
42901 return 0;
42902 e /= nelt2;
42903 mask |= e << (i * 4);
42904 }
42905
42906 /* Make sure success has a non-zero value by adding one. */
42907 return mask + 1;
42908 }
42909 \f
42910 /* Return a register priority for hard reg REGNO. */
42911 static int
42912 ix86_register_priority (int hard_regno)
42913 {
42914 /* ebp and r13 as the base always wants a displacement, r12 as the
42915 base always wants an index. So discourage their usage in an
42916 address. */
42917 if (hard_regno == R12_REG || hard_regno == R13_REG)
42918 return 0;
42919 if (hard_regno == BP_REG)
42920 return 1;
42921 /* New x86-64 int registers result in bigger code size. Discourage
42922 them. */
42923 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
42924 return 2;
42925 /* New x86-64 SSE registers result in bigger code size. Discourage
42926 them. */
42927 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
42928 return 2;
42929 /* Usage of AX register results in smaller code. Prefer it. */
42930 if (hard_regno == AX_REG)
42931 return 4;
42932 return 3;
42933 }
42934
42935 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
42936
42937 Put float CONST_DOUBLE in the constant pool instead of fp regs.
42938 QImode must go into class Q_REGS.
42939 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
42940 movdf to do mem-to-mem moves through integer regs. */
42941
42942 static reg_class_t
42943 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
42944 {
42945 machine_mode mode = GET_MODE (x);
42946
42947 /* We're only allowed to return a subclass of CLASS. Many of the
42948 following checks fail for NO_REGS, so eliminate that early. */
42949 if (regclass == NO_REGS)
42950 return NO_REGS;
42951
42952 /* All classes can load zeros. */
42953 if (x == CONST0_RTX (mode))
42954 return regclass;
42955
42956 /* Force constants into memory if we are loading a (nonzero) constant into
42957 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
42958 instructions to load from a constant. */
42959 if (CONSTANT_P (x)
42960 && (MAYBE_MMX_CLASS_P (regclass)
42961 || MAYBE_SSE_CLASS_P (regclass)
42962 || MAYBE_MASK_CLASS_P (regclass)))
42963 return NO_REGS;
42964
42965 /* Prefer SSE regs only, if we can use them for math. */
42966 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
42967 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
42968
42969 /* Floating-point constants need more complex checks. */
42970 if (CONST_DOUBLE_P (x))
42971 {
42972 /* General regs can load everything. */
42973 if (reg_class_subset_p (regclass, GENERAL_REGS))
42974 return regclass;
42975
42976 /* Floats can load 0 and 1 plus some others. Note that we eliminated
42977 zero above. We only want to wind up preferring 80387 registers if
42978 we plan on doing computation with them. */
42979 if (TARGET_80387
42980 && standard_80387_constant_p (x) > 0)
42981 {
42982 /* Limit class to non-sse. */
42983 if (regclass == FLOAT_SSE_REGS)
42984 return FLOAT_REGS;
42985 if (regclass == FP_TOP_SSE_REGS)
42986 return FP_TOP_REG;
42987 if (regclass == FP_SECOND_SSE_REGS)
42988 return FP_SECOND_REG;
42989 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
42990 return regclass;
42991 }
42992
42993 return NO_REGS;
42994 }
42995
42996 /* Generally when we see PLUS here, it's the function invariant
42997 (plus soft-fp const_int). Which can only be computed into general
42998 regs. */
42999 if (GET_CODE (x) == PLUS)
43000 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
43001
43002 /* QImode constants are easy to load, but non-constant QImode data
43003 must go into Q_REGS. */
43004 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
43005 {
43006 if (reg_class_subset_p (regclass, Q_REGS))
43007 return regclass;
43008 if (reg_class_subset_p (Q_REGS, regclass))
43009 return Q_REGS;
43010 return NO_REGS;
43011 }
43012
43013 return regclass;
43014 }
43015
43016 /* Discourage putting floating-point values in SSE registers unless
43017 SSE math is being used, and likewise for the 387 registers. */
43018 static reg_class_t
43019 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
43020 {
43021 machine_mode mode = GET_MODE (x);
43022
43023 /* Restrict the output reload class to the register bank that we are doing
43024 math on. If we would like not to return a subset of CLASS, reject this
43025 alternative: if reload cannot do this, it will still use its choice. */
43026 mode = GET_MODE (x);
43027 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
43028 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
43029
43030 if (X87_FLOAT_MODE_P (mode))
43031 {
43032 if (regclass == FP_TOP_SSE_REGS)
43033 return FP_TOP_REG;
43034 else if (regclass == FP_SECOND_SSE_REGS)
43035 return FP_SECOND_REG;
43036 else
43037 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
43038 }
43039
43040 return regclass;
43041 }
43042
43043 static reg_class_t
43044 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
43045 machine_mode mode, secondary_reload_info *sri)
43046 {
43047 /* Double-word spills from general registers to non-offsettable memory
43048 references (zero-extended addresses) require special handling. */
43049 if (TARGET_64BIT
43050 && MEM_P (x)
43051 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
43052 && INTEGER_CLASS_P (rclass)
43053 && !offsettable_memref_p (x))
43054 {
43055 sri->icode = (in_p
43056 ? CODE_FOR_reload_noff_load
43057 : CODE_FOR_reload_noff_store);
43058 /* Add the cost of moving address to a temporary. */
43059 sri->extra_cost = 1;
43060
43061 return NO_REGS;
43062 }
43063
43064 /* QImode spills from non-QI registers require
43065 intermediate register on 32bit targets. */
43066 if (mode == QImode
43067 && (MAYBE_MASK_CLASS_P (rclass)
43068 || (!TARGET_64BIT && !in_p
43069 && INTEGER_CLASS_P (rclass)
43070 && MAYBE_NON_Q_CLASS_P (rclass))))
43071 {
43072 int regno;
43073
43074 if (REG_P (x))
43075 regno = REGNO (x);
43076 else
43077 regno = -1;
43078
43079 if (regno >= FIRST_PSEUDO_REGISTER || SUBREG_P (x))
43080 regno = true_regnum (x);
43081
43082 /* Return Q_REGS if the operand is in memory. */
43083 if (regno == -1)
43084 return Q_REGS;
43085 }
43086
43087 /* This condition handles corner case where an expression involving
43088 pointers gets vectorized. We're trying to use the address of a
43089 stack slot as a vector initializer.
43090
43091 (set (reg:V2DI 74 [ vect_cst_.2 ])
43092 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
43093
43094 Eventually frame gets turned into sp+offset like this:
43095
43096 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
43097 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
43098 (const_int 392 [0x188]))))
43099
43100 That later gets turned into:
43101
43102 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
43103 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
43104 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
43105
43106 We'll have the following reload recorded:
43107
43108 Reload 0: reload_in (DI) =
43109 (plus:DI (reg/f:DI 7 sp)
43110 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
43111 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
43112 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
43113 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
43114 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
43115 reload_reg_rtx: (reg:V2DI 22 xmm1)
43116
43117 Which isn't going to work since SSE instructions can't handle scalar
43118 additions. Returning GENERAL_REGS forces the addition into integer
43119 register and reload can handle subsequent reloads without problems. */
43120
43121 if (in_p && GET_CODE (x) == PLUS
43122 && SSE_CLASS_P (rclass)
43123 && SCALAR_INT_MODE_P (mode))
43124 return GENERAL_REGS;
43125
43126 return NO_REGS;
43127 }
43128
43129 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
43130
43131 static bool
43132 ix86_class_likely_spilled_p (reg_class_t rclass)
43133 {
43134 switch (rclass)
43135 {
43136 case AREG:
43137 case DREG:
43138 case CREG:
43139 case BREG:
43140 case AD_REGS:
43141 case SIREG:
43142 case DIREG:
43143 case SSE_FIRST_REG:
43144 case FP_TOP_REG:
43145 case FP_SECOND_REG:
43146 case BND_REGS:
43147 return true;
43148
43149 default:
43150 break;
43151 }
43152
43153 return false;
43154 }
43155
43156 /* If we are copying between general and FP registers, we need a memory
43157 location. The same is true for SSE and MMX registers.
43158
43159 To optimize register_move_cost performance, allow inline variant.
43160
43161 The macro can't work reliably when one of the CLASSES is class containing
43162 registers from multiple units (SSE, MMX, integer). We avoid this by never
43163 combining those units in single alternative in the machine description.
43164 Ensure that this constraint holds to avoid unexpected surprises.
43165
43166 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
43167 enforce these sanity checks. */
43168
43169 static inline bool
43170 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
43171 machine_mode mode, int strict)
43172 {
43173 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
43174 return false;
43175 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
43176 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
43177 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
43178 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
43179 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
43180 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
43181 {
43182 gcc_assert (!strict || lra_in_progress);
43183 return true;
43184 }
43185
43186 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
43187 return true;
43188
43189 /* Between mask and general, we have moves no larger than word size. */
43190 if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
43191 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
43192 return true;
43193
43194 /* ??? This is a lie. We do have moves between mmx/general, and for
43195 mmx/sse2. But by saying we need secondary memory we discourage the
43196 register allocator from using the mmx registers unless needed. */
43197 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
43198 return true;
43199
43200 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
43201 {
43202 /* SSE1 doesn't have any direct moves from other classes. */
43203 if (!TARGET_SSE2)
43204 return true;
43205
43206 /* If the target says that inter-unit moves are more expensive
43207 than moving through memory, then don't generate them. */
43208 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
43209 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
43210 return true;
43211
43212 /* Between SSE and general, we have moves no larger than word size. */
43213 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
43214 return true;
43215 }
43216
43217 return false;
43218 }
43219
43220 bool
43221 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
43222 machine_mode mode, int strict)
43223 {
43224 return inline_secondary_memory_needed (class1, class2, mode, strict);
43225 }
43226
43227 /* Implement the TARGET_CLASS_MAX_NREGS hook.
43228
43229 On the 80386, this is the size of MODE in words,
43230 except in the FP regs, where a single reg is always enough. */
43231
43232 static unsigned char
43233 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
43234 {
43235 if (MAYBE_INTEGER_CLASS_P (rclass))
43236 {
43237 if (mode == XFmode)
43238 return (TARGET_64BIT ? 2 : 3);
43239 else if (mode == XCmode)
43240 return (TARGET_64BIT ? 4 : 6);
43241 else
43242 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
43243 }
43244 else
43245 {
43246 if (COMPLEX_MODE_P (mode))
43247 return 2;
43248 else
43249 return 1;
43250 }
43251 }
43252
43253 /* Return true if the registers in CLASS cannot represent the change from
43254 modes FROM to TO. */
43255
43256 bool
43257 ix86_cannot_change_mode_class (machine_mode from, machine_mode to,
43258 enum reg_class regclass)
43259 {
43260 if (from == to)
43261 return false;
43262
43263 /* x87 registers can't do subreg at all, as all values are reformatted
43264 to extended precision. */
43265 if (MAYBE_FLOAT_CLASS_P (regclass))
43266 return true;
43267
43268 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
43269 {
43270 int from_size = GET_MODE_SIZE (from);
43271 int to_size = GET_MODE_SIZE (to);
43272
43273 /* Vector registers do not support QI or HImode loads. If we don't
43274 disallow a change to these modes, reload will assume it's ok to
43275 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
43276 the vec_dupv4hi pattern. */
43277 if (from_size < 4)
43278 return true;
43279
43280 /* Further, we cannot allow word_mode subregs of full vector modes.
43281 Otherwise the middle-end will assume it's ok to store to
43282 (subreg:DI (reg:TI 100) 0) in order to modify only the low 64 bits
43283 of the 128-bit register. However, after reload the subreg will
43284 be dropped leaving a plain DImode store. This is indistinguishable
43285 from a "normal" DImode move, and so we're justified to use movsd,
43286 which modifies the entire 128-bit register. */
43287 if (to_size == UNITS_PER_WORD && from_size > UNITS_PER_WORD)
43288 return true;
43289 }
43290
43291 return false;
43292 }
43293
43294 /* Return the cost of moving data of mode M between a
43295 register and memory. A value of 2 is the default; this cost is
43296 relative to those in `REGISTER_MOVE_COST'.
43297
43298 This function is used extensively by register_move_cost that is used to
43299 build tables at startup. Make it inline in this case.
43300 When IN is 2, return maximum of in and out move cost.
43301
43302 If moving between registers and memory is more expensive than
43303 between two registers, you should define this macro to express the
43304 relative cost.
43305
43306 Model also increased moving costs of QImode registers in non
43307 Q_REGS classes.
43308 */
43309 static inline int
43310 inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
43311 int in)
43312 {
43313 int cost;
43314 if (FLOAT_CLASS_P (regclass))
43315 {
43316 int index;
43317 switch (mode)
43318 {
43319 case SFmode:
43320 index = 0;
43321 break;
43322 case DFmode:
43323 index = 1;
43324 break;
43325 case XFmode:
43326 index = 2;
43327 break;
43328 default:
43329 return 100;
43330 }
43331 if (in == 2)
43332 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
43333 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
43334 }
43335 if (SSE_CLASS_P (regclass))
43336 {
43337 int index;
43338 switch (GET_MODE_SIZE (mode))
43339 {
43340 case 4:
43341 index = 0;
43342 break;
43343 case 8:
43344 index = 1;
43345 break;
43346 case 16:
43347 index = 2;
43348 break;
43349 default:
43350 return 100;
43351 }
43352 if (in == 2)
43353 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
43354 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
43355 }
43356 if (MMX_CLASS_P (regclass))
43357 {
43358 int index;
43359 switch (GET_MODE_SIZE (mode))
43360 {
43361 case 4:
43362 index = 0;
43363 break;
43364 case 8:
43365 index = 1;
43366 break;
43367 default:
43368 return 100;
43369 }
43370 if (in)
43371 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
43372 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
43373 }
43374 switch (GET_MODE_SIZE (mode))
43375 {
43376 case 1:
43377 if (Q_CLASS_P (regclass) || TARGET_64BIT)
43378 {
43379 if (!in)
43380 return ix86_cost->int_store[0];
43381 if (TARGET_PARTIAL_REG_DEPENDENCY
43382 && optimize_function_for_speed_p (cfun))
43383 cost = ix86_cost->movzbl_load;
43384 else
43385 cost = ix86_cost->int_load[0];
43386 if (in == 2)
43387 return MAX (cost, ix86_cost->int_store[0]);
43388 return cost;
43389 }
43390 else
43391 {
43392 if (in == 2)
43393 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
43394 if (in)
43395 return ix86_cost->movzbl_load;
43396 else
43397 return ix86_cost->int_store[0] + 4;
43398 }
43399 break;
43400 case 2:
43401 if (in == 2)
43402 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
43403 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
43404 default:
43405 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
43406 if (mode == TFmode)
43407 mode = XFmode;
43408 if (in == 2)
43409 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
43410 else if (in)
43411 cost = ix86_cost->int_load[2];
43412 else
43413 cost = ix86_cost->int_store[2];
43414 return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD);
43415 }
43416 }
43417
43418 static int
43419 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass,
43420 bool in)
43421 {
43422 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
43423 }
43424
43425
43426 /* Return the cost of moving data from a register in class CLASS1 to
43427 one in class CLASS2.
43428
43429 It is not required that the cost always equal 2 when FROM is the same as TO;
43430 on some machines it is expensive to move between registers if they are not
43431 general registers. */
43432
43433 static int
43434 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
43435 reg_class_t class2_i)
43436 {
43437 enum reg_class class1 = (enum reg_class) class1_i;
43438 enum reg_class class2 = (enum reg_class) class2_i;
43439
43440 /* In case we require secondary memory, compute cost of the store followed
43441 by load. In order to avoid bad register allocation choices, we need
43442 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
43443
43444 if (inline_secondary_memory_needed (class1, class2, mode, 0))
43445 {
43446 int cost = 1;
43447
43448 cost += inline_memory_move_cost (mode, class1, 2);
43449 cost += inline_memory_move_cost (mode, class2, 2);
43450
43451 /* In case of copying from general_purpose_register we may emit multiple
43452 stores followed by single load causing memory size mismatch stall.
43453 Count this as arbitrarily high cost of 20. */
43454 if (targetm.class_max_nregs (class1, mode)
43455 > targetm.class_max_nregs (class2, mode))
43456 cost += 20;
43457
43458 /* In the case of FP/MMX moves, the registers actually overlap, and we
43459 have to switch modes in order to treat them differently. */
43460 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
43461 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
43462 cost += 20;
43463
43464 return cost;
43465 }
43466
43467 /* Moves between SSE/MMX and integer unit are expensive. */
43468 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
43469 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
43470
43471 /* ??? By keeping returned value relatively high, we limit the number
43472 of moves between integer and MMX/SSE registers for all targets.
43473 Additionally, high value prevents problem with x86_modes_tieable_p(),
43474 where integer modes in MMX/SSE registers are not tieable
43475 because of missing QImode and HImode moves to, from or between
43476 MMX/SSE registers. */
43477 return MAX (8, ix86_cost->mmxsse_to_integer);
43478
43479 if (MAYBE_FLOAT_CLASS_P (class1))
43480 return ix86_cost->fp_move;
43481 if (MAYBE_SSE_CLASS_P (class1))
43482 return ix86_cost->sse_move;
43483 if (MAYBE_MMX_CLASS_P (class1))
43484 return ix86_cost->mmx_move;
43485 return 2;
43486 }
43487
43488 /* Return TRUE if hard register REGNO can hold a value of machine-mode
43489 MODE. */
43490
43491 bool
43492 ix86_hard_regno_mode_ok (int regno, machine_mode mode)
43493 {
43494 /* Flags and only flags can only hold CCmode values. */
43495 if (CC_REGNO_P (regno))
43496 return GET_MODE_CLASS (mode) == MODE_CC;
43497 if (GET_MODE_CLASS (mode) == MODE_CC
43498 || GET_MODE_CLASS (mode) == MODE_RANDOM
43499 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
43500 return false;
43501 if (STACK_REGNO_P (regno))
43502 return VALID_FP_MODE_P (mode);
43503 if (MASK_REGNO_P (regno))
43504 return (VALID_MASK_REG_MODE (mode)
43505 || (TARGET_AVX512BW
43506 && VALID_MASK_AVX512BW_MODE (mode)));
43507 if (BND_REGNO_P (regno))
43508 return VALID_BND_REG_MODE (mode);
43509 if (SSE_REGNO_P (regno))
43510 {
43511 /* We implement the move patterns for all vector modes into and
43512 out of SSE registers, even when no operation instructions
43513 are available. */
43514
43515 /* For AVX-512 we allow, regardless of regno:
43516 - XI mode
43517 - any of 512-bit wide vector mode
43518 - any scalar mode. */
43519 if (TARGET_AVX512F
43520 && (mode == XImode
43521 || VALID_AVX512F_REG_MODE (mode)
43522 || VALID_AVX512F_SCALAR_MODE (mode)))
43523 return true;
43524
43525 /* TODO check for QI/HI scalars. */
43526 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
43527 if (TARGET_AVX512VL
43528 && (mode == OImode
43529 || mode == TImode
43530 || VALID_AVX256_REG_MODE (mode)
43531 || VALID_AVX512VL_128_REG_MODE (mode)))
43532 return true;
43533
43534 /* xmm16-xmm31 are only available for AVX-512. */
43535 if (EXT_REX_SSE_REGNO_P (regno))
43536 return false;
43537
43538 /* OImode and AVX modes are available only when AVX is enabled. */
43539 return ((TARGET_AVX
43540 && VALID_AVX256_REG_OR_OI_MODE (mode))
43541 || VALID_SSE_REG_MODE (mode)
43542 || VALID_SSE2_REG_MODE (mode)
43543 || VALID_MMX_REG_MODE (mode)
43544 || VALID_MMX_REG_MODE_3DNOW (mode));
43545 }
43546 if (MMX_REGNO_P (regno))
43547 {
43548 /* We implement the move patterns for 3DNOW modes even in MMX mode,
43549 so if the register is available at all, then we can move data of
43550 the given mode into or out of it. */
43551 return (VALID_MMX_REG_MODE (mode)
43552 || VALID_MMX_REG_MODE_3DNOW (mode));
43553 }
43554
43555 if (mode == QImode)
43556 {
43557 /* Take care for QImode values - they can be in non-QI regs,
43558 but then they do cause partial register stalls. */
43559 if (ANY_QI_REGNO_P (regno))
43560 return true;
43561 if (!TARGET_PARTIAL_REG_STALL)
43562 return true;
43563 /* LRA checks if the hard register is OK for the given mode.
43564 QImode values can live in non-QI regs, so we allow all
43565 registers here. */
43566 if (lra_in_progress)
43567 return true;
43568 return !can_create_pseudo_p ();
43569 }
43570 /* We handle both integer and floats in the general purpose registers. */
43571 else if (VALID_INT_MODE_P (mode))
43572 return true;
43573 else if (VALID_FP_MODE_P (mode))
43574 return true;
43575 else if (VALID_DFP_MODE_P (mode))
43576 return true;
43577 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
43578 on to use that value in smaller contexts, this can easily force a
43579 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
43580 supporting DImode, allow it. */
43581 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
43582 return true;
43583
43584 return false;
43585 }
43586
43587 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
43588 tieable integer mode. */
43589
43590 static bool
43591 ix86_tieable_integer_mode_p (machine_mode mode)
43592 {
43593 switch (mode)
43594 {
43595 case HImode:
43596 case SImode:
43597 return true;
43598
43599 case QImode:
43600 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
43601
43602 case DImode:
43603 return TARGET_64BIT;
43604
43605 default:
43606 return false;
43607 }
43608 }
43609
43610 /* Return true if MODE1 is accessible in a register that can hold MODE2
43611 without copying. That is, all register classes that can hold MODE2
43612 can also hold MODE1. */
43613
43614 bool
43615 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
43616 {
43617 if (mode1 == mode2)
43618 return true;
43619
43620 if (ix86_tieable_integer_mode_p (mode1)
43621 && ix86_tieable_integer_mode_p (mode2))
43622 return true;
43623
43624 /* MODE2 being XFmode implies fp stack or general regs, which means we
43625 can tie any smaller floating point modes to it. Note that we do not
43626 tie this with TFmode. */
43627 if (mode2 == XFmode)
43628 return mode1 == SFmode || mode1 == DFmode;
43629
43630 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
43631 that we can tie it with SFmode. */
43632 if (mode2 == DFmode)
43633 return mode1 == SFmode;
43634
43635 /* If MODE2 is only appropriate for an SSE register, then tie with
43636 any other mode acceptable to SSE registers. */
43637 if (GET_MODE_SIZE (mode2) == 32
43638 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
43639 return (GET_MODE_SIZE (mode1) == 32
43640 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
43641 if (GET_MODE_SIZE (mode2) == 16
43642 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
43643 return (GET_MODE_SIZE (mode1) == 16
43644 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
43645
43646 /* If MODE2 is appropriate for an MMX register, then tie
43647 with any other mode acceptable to MMX registers. */
43648 if (GET_MODE_SIZE (mode2) == 8
43649 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
43650 return (GET_MODE_SIZE (mode1) == 8
43651 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
43652
43653 return false;
43654 }
43655
43656 /* Return the cost of moving between two registers of mode MODE. */
43657
43658 static int
43659 ix86_set_reg_reg_cost (machine_mode mode)
43660 {
43661 unsigned int units = UNITS_PER_WORD;
43662
43663 switch (GET_MODE_CLASS (mode))
43664 {
43665 default:
43666 break;
43667
43668 case MODE_CC:
43669 units = GET_MODE_SIZE (CCmode);
43670 break;
43671
43672 case MODE_FLOAT:
43673 if ((TARGET_SSE && mode == TFmode)
43674 || (TARGET_80387 && mode == XFmode)
43675 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
43676 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
43677 units = GET_MODE_SIZE (mode);
43678 break;
43679
43680 case MODE_COMPLEX_FLOAT:
43681 if ((TARGET_SSE && mode == TCmode)
43682 || (TARGET_80387 && mode == XCmode)
43683 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
43684 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
43685 units = GET_MODE_SIZE (mode);
43686 break;
43687
43688 case MODE_VECTOR_INT:
43689 case MODE_VECTOR_FLOAT:
43690 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
43691 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
43692 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
43693 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
43694 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
43695 units = GET_MODE_SIZE (mode);
43696 }
43697
43698 /* Return the cost of moving between two registers of mode MODE,
43699 assuming that the move will be in pieces of at most UNITS bytes. */
43700 return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units));
43701 }
43702
43703 /* Compute a (partial) cost for rtx X. Return true if the complete
43704 cost has been computed, and false if subexpressions should be
43705 scanned. In either case, *TOTAL contains the cost result. */
43706
43707 static bool
43708 ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
43709 int *total, bool speed)
43710 {
43711 rtx mask;
43712 enum rtx_code code = GET_CODE (x);
43713 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
43714 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
43715
43716 switch (code)
43717 {
43718 case SET:
43719 if (register_operand (SET_DEST (x), VOIDmode)
43720 && reg_or_0_operand (SET_SRC (x), VOIDmode))
43721 {
43722 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
43723 return true;
43724 }
43725 return false;
43726
43727 case CONST_INT:
43728 case CONST:
43729 case LABEL_REF:
43730 case SYMBOL_REF:
43731 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
43732 *total = 3;
43733 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
43734 *total = 2;
43735 else if (flag_pic && SYMBOLIC_CONST (x)
43736 && !(TARGET_64BIT
43737 && (GET_CODE (x) == LABEL_REF
43738 || (GET_CODE (x) == SYMBOL_REF
43739 && SYMBOL_REF_LOCAL_P (x))))
43740 /* Use 0 cost for CONST to improve its propagation. */
43741 && (TARGET_64BIT || GET_CODE (x) != CONST))
43742 *total = 1;
43743 else
43744 *total = 0;
43745 return true;
43746
43747 case CONST_WIDE_INT:
43748 *total = 0;
43749 return true;
43750
43751 case CONST_DOUBLE:
43752 switch (standard_80387_constant_p (x))
43753 {
43754 case 1: /* 0.0 */
43755 *total = 1;
43756 return true;
43757 default: /* Other constants */
43758 *total = 2;
43759 return true;
43760 case 0:
43761 case -1:
43762 break;
43763 }
43764 if (SSE_FLOAT_MODE_P (mode))
43765 {
43766 case CONST_VECTOR:
43767 switch (standard_sse_constant_p (x))
43768 {
43769 case 0:
43770 break;
43771 case 1: /* 0: xor eliminates false dependency */
43772 *total = 0;
43773 return true;
43774 default: /* -1: cmp contains false dependency */
43775 *total = 1;
43776 return true;
43777 }
43778 }
43779 /* Fall back to (MEM (SYMBOL_REF)), since that's where
43780 it'll probably end up. Add a penalty for size. */
43781 *total = (COSTS_N_INSNS (1)
43782 + (flag_pic != 0 && !TARGET_64BIT)
43783 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
43784 return true;
43785
43786 case ZERO_EXTEND:
43787 /* The zero extensions is often completely free on x86_64, so make
43788 it as cheap as possible. */
43789 if (TARGET_64BIT && mode == DImode
43790 && GET_MODE (XEXP (x, 0)) == SImode)
43791 *total = 1;
43792 else if (TARGET_ZERO_EXTEND_WITH_AND)
43793 *total = cost->add;
43794 else
43795 *total = cost->movzx;
43796 return false;
43797
43798 case SIGN_EXTEND:
43799 *total = cost->movsx;
43800 return false;
43801
43802 case ASHIFT:
43803 if (SCALAR_INT_MODE_P (mode)
43804 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
43805 && CONST_INT_P (XEXP (x, 1)))
43806 {
43807 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
43808 if (value == 1)
43809 {
43810 *total = cost->add;
43811 return false;
43812 }
43813 if ((value == 2 || value == 3)
43814 && cost->lea <= cost->shift_const)
43815 {
43816 *total = cost->lea;
43817 return false;
43818 }
43819 }
43820 /* FALLTHRU */
43821
43822 case ROTATE:
43823 case ASHIFTRT:
43824 case LSHIFTRT:
43825 case ROTATERT:
43826 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
43827 {
43828 /* ??? Should be SSE vector operation cost. */
43829 /* At least for published AMD latencies, this really is the same
43830 as the latency for a simple fpu operation like fabs. */
43831 /* V*QImode is emulated with 1-11 insns. */
43832 if (mode == V16QImode || mode == V32QImode)
43833 {
43834 int count = 11;
43835 if (TARGET_XOP && mode == V16QImode)
43836 {
43837 /* For XOP we use vpshab, which requires a broadcast of the
43838 value to the variable shift insn. For constants this
43839 means a V16Q const in mem; even when we can perform the
43840 shift with one insn set the cost to prefer paddb. */
43841 if (CONSTANT_P (XEXP (x, 1)))
43842 {
43843 *total = (cost->fabs
43844 + rtx_cost (XEXP (x, 0), mode, code, 0, speed)
43845 + (speed ? 2 : COSTS_N_BYTES (16)));
43846 return true;
43847 }
43848 count = 3;
43849 }
43850 else if (TARGET_SSSE3)
43851 count = 7;
43852 *total = cost->fabs * count;
43853 }
43854 else
43855 *total = cost->fabs;
43856 }
43857 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
43858 {
43859 if (CONST_INT_P (XEXP (x, 1)))
43860 {
43861 if (INTVAL (XEXP (x, 1)) > 32)
43862 *total = cost->shift_const + COSTS_N_INSNS (2);
43863 else
43864 *total = cost->shift_const * 2;
43865 }
43866 else
43867 {
43868 if (GET_CODE (XEXP (x, 1)) == AND)
43869 *total = cost->shift_var * 2;
43870 else
43871 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
43872 }
43873 }
43874 else
43875 {
43876 if (CONST_INT_P (XEXP (x, 1)))
43877 *total = cost->shift_const;
43878 else if (SUBREG_P (XEXP (x, 1))
43879 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
43880 {
43881 /* Return the cost after shift-and truncation. */
43882 *total = cost->shift_var;
43883 return true;
43884 }
43885 else
43886 *total = cost->shift_var;
43887 }
43888 return false;
43889
43890 case FMA:
43891 {
43892 rtx sub;
43893
43894 gcc_assert (FLOAT_MODE_P (mode));
43895 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
43896
43897 /* ??? SSE scalar/vector cost should be used here. */
43898 /* ??? Bald assumption that fma has the same cost as fmul. */
43899 *total = cost->fmul;
43900 *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed);
43901
43902 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
43903 sub = XEXP (x, 0);
43904 if (GET_CODE (sub) == NEG)
43905 sub = XEXP (sub, 0);
43906 *total += rtx_cost (sub, mode, FMA, 0, speed);
43907
43908 sub = XEXP (x, 2);
43909 if (GET_CODE (sub) == NEG)
43910 sub = XEXP (sub, 0);
43911 *total += rtx_cost (sub, mode, FMA, 2, speed);
43912 return true;
43913 }
43914
43915 case MULT:
43916 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
43917 {
43918 /* ??? SSE scalar cost should be used here. */
43919 *total = cost->fmul;
43920 return false;
43921 }
43922 else if (X87_FLOAT_MODE_P (mode))
43923 {
43924 *total = cost->fmul;
43925 return false;
43926 }
43927 else if (FLOAT_MODE_P (mode))
43928 {
43929 /* ??? SSE vector cost should be used here. */
43930 *total = cost->fmul;
43931 return false;
43932 }
43933 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
43934 {
43935 /* V*QImode is emulated with 7-13 insns. */
43936 if (mode == V16QImode || mode == V32QImode)
43937 {
43938 int extra = 11;
43939 if (TARGET_XOP && mode == V16QImode)
43940 extra = 5;
43941 else if (TARGET_SSSE3)
43942 extra = 6;
43943 *total = cost->fmul * 2 + cost->fabs * extra;
43944 }
43945 /* V*DImode is emulated with 5-8 insns. */
43946 else if (mode == V2DImode || mode == V4DImode)
43947 {
43948 if (TARGET_XOP && mode == V2DImode)
43949 *total = cost->fmul * 2 + cost->fabs * 3;
43950 else
43951 *total = cost->fmul * 3 + cost->fabs * 5;
43952 }
43953 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
43954 insns, including two PMULUDQ. */
43955 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
43956 *total = cost->fmul * 2 + cost->fabs * 5;
43957 else
43958 *total = cost->fmul;
43959 return false;
43960 }
43961 else
43962 {
43963 rtx op0 = XEXP (x, 0);
43964 rtx op1 = XEXP (x, 1);
43965 int nbits;
43966 if (CONST_INT_P (XEXP (x, 1)))
43967 {
43968 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
43969 for (nbits = 0; value != 0; value &= value - 1)
43970 nbits++;
43971 }
43972 else
43973 /* This is arbitrary. */
43974 nbits = 7;
43975
43976 /* Compute costs correctly for widening multiplication. */
43977 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
43978 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
43979 == GET_MODE_SIZE (mode))
43980 {
43981 int is_mulwiden = 0;
43982 machine_mode inner_mode = GET_MODE (op0);
43983
43984 if (GET_CODE (op0) == GET_CODE (op1))
43985 is_mulwiden = 1, op1 = XEXP (op1, 0);
43986 else if (CONST_INT_P (op1))
43987 {
43988 if (GET_CODE (op0) == SIGN_EXTEND)
43989 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
43990 == INTVAL (op1);
43991 else
43992 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
43993 }
43994
43995 if (is_mulwiden)
43996 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
43997 }
43998
43999 *total = (cost->mult_init[MODE_INDEX (mode)]
44000 + nbits * cost->mult_bit
44001 + rtx_cost (op0, mode, outer_code, opno, speed)
44002 + rtx_cost (op1, mode, outer_code, opno, speed));
44003
44004 return true;
44005 }
44006
44007 case DIV:
44008 case UDIV:
44009 case MOD:
44010 case UMOD:
44011 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
44012 /* ??? SSE cost should be used here. */
44013 *total = cost->fdiv;
44014 else if (X87_FLOAT_MODE_P (mode))
44015 *total = cost->fdiv;
44016 else if (FLOAT_MODE_P (mode))
44017 /* ??? SSE vector cost should be used here. */
44018 *total = cost->fdiv;
44019 else
44020 *total = cost->divide[MODE_INDEX (mode)];
44021 return false;
44022
44023 case PLUS:
44024 if (GET_MODE_CLASS (mode) == MODE_INT
44025 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
44026 {
44027 if (GET_CODE (XEXP (x, 0)) == PLUS
44028 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
44029 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
44030 && CONSTANT_P (XEXP (x, 1)))
44031 {
44032 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
44033 if (val == 2 || val == 4 || val == 8)
44034 {
44035 *total = cost->lea;
44036 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
44037 outer_code, opno, speed);
44038 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
44039 outer_code, opno, speed);
44040 *total += rtx_cost (XEXP (x, 1), mode,
44041 outer_code, opno, speed);
44042 return true;
44043 }
44044 }
44045 else if (GET_CODE (XEXP (x, 0)) == MULT
44046 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
44047 {
44048 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
44049 if (val == 2 || val == 4 || val == 8)
44050 {
44051 *total = cost->lea;
44052 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
44053 outer_code, opno, speed);
44054 *total += rtx_cost (XEXP (x, 1), mode,
44055 outer_code, opno, speed);
44056 return true;
44057 }
44058 }
44059 else if (GET_CODE (XEXP (x, 0)) == PLUS)
44060 {
44061 *total = cost->lea;
44062 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
44063 outer_code, opno, speed);
44064 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
44065 outer_code, opno, speed);
44066 *total += rtx_cost (XEXP (x, 1), mode,
44067 outer_code, opno, speed);
44068 return true;
44069 }
44070 }
44071 /* FALLTHRU */
44072
44073 case MINUS:
44074 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
44075 {
44076 /* ??? SSE cost should be used here. */
44077 *total = cost->fadd;
44078 return false;
44079 }
44080 else if (X87_FLOAT_MODE_P (mode))
44081 {
44082 *total = cost->fadd;
44083 return false;
44084 }
44085 else if (FLOAT_MODE_P (mode))
44086 {
44087 /* ??? SSE vector cost should be used here. */
44088 *total = cost->fadd;
44089 return false;
44090 }
44091 /* FALLTHRU */
44092
44093 case AND:
44094 case IOR:
44095 case XOR:
44096 if (GET_MODE_CLASS (mode) == MODE_INT
44097 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
44098 {
44099 *total = (cost->add * 2
44100 + (rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
44101 << (GET_MODE (XEXP (x, 0)) != DImode))
44102 + (rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed)
44103 << (GET_MODE (XEXP (x, 1)) != DImode)));
44104 return true;
44105 }
44106 /* FALLTHRU */
44107
44108 case NEG:
44109 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
44110 {
44111 /* ??? SSE cost should be used here. */
44112 *total = cost->fchs;
44113 return false;
44114 }
44115 else if (X87_FLOAT_MODE_P (mode))
44116 {
44117 *total = cost->fchs;
44118 return false;
44119 }
44120 else if (FLOAT_MODE_P (mode))
44121 {
44122 /* ??? SSE vector cost should be used here. */
44123 *total = cost->fchs;
44124 return false;
44125 }
44126 /* FALLTHRU */
44127
44128 case NOT:
44129 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
44130 {
44131 /* ??? Should be SSE vector operation cost. */
44132 /* At least for published AMD latencies, this really is the same
44133 as the latency for a simple fpu operation like fabs. */
44134 *total = cost->fabs;
44135 }
44136 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
44137 *total = cost->add * 2;
44138 else
44139 *total = cost->add;
44140 return false;
44141
44142 case COMPARE:
44143 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
44144 && XEXP (XEXP (x, 0), 1) == const1_rtx
44145 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
44146 && XEXP (x, 1) == const0_rtx)
44147 {
44148 /* This kind of construct is implemented using test[bwl].
44149 Treat it as if we had an AND. */
44150 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
44151 *total = (cost->add
44152 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, outer_code,
44153 opno, speed)
44154 + rtx_cost (const1_rtx, mode, outer_code, opno, speed));
44155 return true;
44156 }
44157
44158 /* The embedded comparison operand is completely free. */
44159 if (!general_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0)))
44160 && XEXP (x, 1) == const0_rtx)
44161 *total = 0;
44162
44163 return false;
44164
44165 case FLOAT_EXTEND:
44166 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
44167 *total = 0;
44168 return false;
44169
44170 case ABS:
44171 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
44172 /* ??? SSE cost should be used here. */
44173 *total = cost->fabs;
44174 else if (X87_FLOAT_MODE_P (mode))
44175 *total = cost->fabs;
44176 else if (FLOAT_MODE_P (mode))
44177 /* ??? SSE vector cost should be used here. */
44178 *total = cost->fabs;
44179 return false;
44180
44181 case SQRT:
44182 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
44183 /* ??? SSE cost should be used here. */
44184 *total = cost->fsqrt;
44185 else if (X87_FLOAT_MODE_P (mode))
44186 *total = cost->fsqrt;
44187 else if (FLOAT_MODE_P (mode))
44188 /* ??? SSE vector cost should be used here. */
44189 *total = cost->fsqrt;
44190 return false;
44191
44192 case UNSPEC:
44193 if (XINT (x, 1) == UNSPEC_TP)
44194 *total = 0;
44195 return false;
44196
44197 case VEC_SELECT:
44198 case VEC_CONCAT:
44199 case VEC_DUPLICATE:
44200 /* ??? Assume all of these vector manipulation patterns are
44201 recognizable. In which case they all pretty much have the
44202 same cost. */
44203 *total = cost->fabs;
44204 return true;
44205 case VEC_MERGE:
44206 mask = XEXP (x, 2);
44207 /* This is masked instruction, assume the same cost,
44208 as nonmasked variant. */
44209 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
44210 *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed);
44211 else
44212 *total = cost->fabs;
44213 return true;
44214
44215 default:
44216 return false;
44217 }
44218 }
44219
44220 #if TARGET_MACHO
44221
44222 static int current_machopic_label_num;
44223
44224 /* Given a symbol name and its associated stub, write out the
44225 definition of the stub. */
44226
44227 void
44228 machopic_output_stub (FILE *file, const char *symb, const char *stub)
44229 {
44230 unsigned int length;
44231 char *binder_name, *symbol_name, lazy_ptr_name[32];
44232 int label = ++current_machopic_label_num;
44233
44234 /* For 64-bit we shouldn't get here. */
44235 gcc_assert (!TARGET_64BIT);
44236
44237 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
44238 symb = targetm.strip_name_encoding (symb);
44239
44240 length = strlen (stub);
44241 binder_name = XALLOCAVEC (char, length + 32);
44242 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
44243
44244 length = strlen (symb);
44245 symbol_name = XALLOCAVEC (char, length + 32);
44246 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
44247
44248 sprintf (lazy_ptr_name, "L%d$lz", label);
44249
44250 if (MACHOPIC_ATT_STUB)
44251 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
44252 else if (MACHOPIC_PURE)
44253 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
44254 else
44255 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
44256
44257 fprintf (file, "%s:\n", stub);
44258 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
44259
44260 if (MACHOPIC_ATT_STUB)
44261 {
44262 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
44263 }
44264 else if (MACHOPIC_PURE)
44265 {
44266 /* PIC stub. */
44267 /* 25-byte PIC stub using "CALL get_pc_thunk". */
44268 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
44269 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
44270 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
44271 label, lazy_ptr_name, label);
44272 fprintf (file, "\tjmp\t*%%ecx\n");
44273 }
44274 else
44275 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
44276
44277 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
44278 it needs no stub-binding-helper. */
44279 if (MACHOPIC_ATT_STUB)
44280 return;
44281
44282 fprintf (file, "%s:\n", binder_name);
44283
44284 if (MACHOPIC_PURE)
44285 {
44286 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
44287 fprintf (file, "\tpushl\t%%ecx\n");
44288 }
44289 else
44290 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
44291
44292 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
44293
44294 /* N.B. Keep the correspondence of these
44295 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
44296 old-pic/new-pic/non-pic stubs; altering this will break
44297 compatibility with existing dylibs. */
44298 if (MACHOPIC_PURE)
44299 {
44300 /* 25-byte PIC stub using "CALL get_pc_thunk". */
44301 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
44302 }
44303 else
44304 /* 16-byte -mdynamic-no-pic stub. */
44305 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
44306
44307 fprintf (file, "%s:\n", lazy_ptr_name);
44308 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
44309 fprintf (file, ASM_LONG "%s\n", binder_name);
44310 }
44311 #endif /* TARGET_MACHO */
44312
44313 /* Order the registers for register allocator. */
44314
44315 void
44316 x86_order_regs_for_local_alloc (void)
44317 {
44318 int pos = 0;
44319 int i;
44320
44321 /* First allocate the local general purpose registers. */
44322 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
44323 if (GENERAL_REGNO_P (i) && call_used_regs[i])
44324 reg_alloc_order [pos++] = i;
44325
44326 /* Global general purpose registers. */
44327 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
44328 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
44329 reg_alloc_order [pos++] = i;
44330
44331 /* x87 registers come first in case we are doing FP math
44332 using them. */
44333 if (!TARGET_SSE_MATH)
44334 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
44335 reg_alloc_order [pos++] = i;
44336
44337 /* SSE registers. */
44338 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
44339 reg_alloc_order [pos++] = i;
44340 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
44341 reg_alloc_order [pos++] = i;
44342
44343 /* Extended REX SSE registers. */
44344 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
44345 reg_alloc_order [pos++] = i;
44346
44347 /* Mask register. */
44348 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
44349 reg_alloc_order [pos++] = i;
44350
44351 /* MPX bound registers. */
44352 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
44353 reg_alloc_order [pos++] = i;
44354
44355 /* x87 registers. */
44356 if (TARGET_SSE_MATH)
44357 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
44358 reg_alloc_order [pos++] = i;
44359
44360 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
44361 reg_alloc_order [pos++] = i;
44362
44363 /* Initialize the rest of array as we do not allocate some registers
44364 at all. */
44365 while (pos < FIRST_PSEUDO_REGISTER)
44366 reg_alloc_order [pos++] = 0;
44367 }
44368
44369 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
44370 in struct attribute_spec handler. */
44371 static tree
44372 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
44373 tree args,
44374 int,
44375 bool *no_add_attrs)
44376 {
44377 if (TREE_CODE (*node) != FUNCTION_TYPE
44378 && TREE_CODE (*node) != METHOD_TYPE
44379 && TREE_CODE (*node) != FIELD_DECL
44380 && TREE_CODE (*node) != TYPE_DECL)
44381 {
44382 warning (OPT_Wattributes, "%qE attribute only applies to functions",
44383 name);
44384 *no_add_attrs = true;
44385 return NULL_TREE;
44386 }
44387 if (TARGET_64BIT)
44388 {
44389 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
44390 name);
44391 *no_add_attrs = true;
44392 return NULL_TREE;
44393 }
44394 if (is_attribute_p ("callee_pop_aggregate_return", name))
44395 {
44396 tree cst;
44397
44398 cst = TREE_VALUE (args);
44399 if (TREE_CODE (cst) != INTEGER_CST)
44400 {
44401 warning (OPT_Wattributes,
44402 "%qE attribute requires an integer constant argument",
44403 name);
44404 *no_add_attrs = true;
44405 }
44406 else if (compare_tree_int (cst, 0) != 0
44407 && compare_tree_int (cst, 1) != 0)
44408 {
44409 warning (OPT_Wattributes,
44410 "argument to %qE attribute is neither zero, nor one",
44411 name);
44412 *no_add_attrs = true;
44413 }
44414
44415 return NULL_TREE;
44416 }
44417
44418 return NULL_TREE;
44419 }
44420
44421 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
44422 struct attribute_spec.handler. */
44423 static tree
44424 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
44425 bool *no_add_attrs)
44426 {
44427 if (TREE_CODE (*node) != FUNCTION_TYPE
44428 && TREE_CODE (*node) != METHOD_TYPE
44429 && TREE_CODE (*node) != FIELD_DECL
44430 && TREE_CODE (*node) != TYPE_DECL)
44431 {
44432 warning (OPT_Wattributes, "%qE attribute only applies to functions",
44433 name);
44434 *no_add_attrs = true;
44435 return NULL_TREE;
44436 }
44437
44438 /* Can combine regparm with all attributes but fastcall. */
44439 if (is_attribute_p ("ms_abi", name))
44440 {
44441 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
44442 {
44443 error ("ms_abi and sysv_abi attributes are not compatible");
44444 }
44445
44446 return NULL_TREE;
44447 }
44448 else if (is_attribute_p ("sysv_abi", name))
44449 {
44450 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
44451 {
44452 error ("ms_abi and sysv_abi attributes are not compatible");
44453 }
44454
44455 return NULL_TREE;
44456 }
44457
44458 return NULL_TREE;
44459 }
44460
44461 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
44462 struct attribute_spec.handler. */
44463 static tree
44464 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
44465 bool *no_add_attrs)
44466 {
44467 tree *type = NULL;
44468 if (DECL_P (*node))
44469 {
44470 if (TREE_CODE (*node) == TYPE_DECL)
44471 type = &TREE_TYPE (*node);
44472 }
44473 else
44474 type = node;
44475
44476 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
44477 {
44478 warning (OPT_Wattributes, "%qE attribute ignored",
44479 name);
44480 *no_add_attrs = true;
44481 }
44482
44483 else if ((is_attribute_p ("ms_struct", name)
44484 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
44485 || ((is_attribute_p ("gcc_struct", name)
44486 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
44487 {
44488 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
44489 name);
44490 *no_add_attrs = true;
44491 }
44492
44493 return NULL_TREE;
44494 }
44495
44496 static tree
44497 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
44498 bool *no_add_attrs)
44499 {
44500 if (TREE_CODE (*node) != FUNCTION_DECL)
44501 {
44502 warning (OPT_Wattributes, "%qE attribute only applies to functions",
44503 name);
44504 *no_add_attrs = true;
44505 }
44506 return NULL_TREE;
44507 }
44508
44509 static bool
44510 ix86_ms_bitfield_layout_p (const_tree record_type)
44511 {
44512 return ((TARGET_MS_BITFIELD_LAYOUT
44513 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
44514 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
44515 }
44516
44517 /* Returns an expression indicating where the this parameter is
44518 located on entry to the FUNCTION. */
44519
44520 static rtx
44521 x86_this_parameter (tree function)
44522 {
44523 tree type = TREE_TYPE (function);
44524 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
44525 int nregs;
44526
44527 if (TARGET_64BIT)
44528 {
44529 const int *parm_regs;
44530
44531 if (ix86_function_type_abi (type) == MS_ABI)
44532 parm_regs = x86_64_ms_abi_int_parameter_registers;
44533 else
44534 parm_regs = x86_64_int_parameter_registers;
44535 return gen_rtx_REG (Pmode, parm_regs[aggr]);
44536 }
44537
44538 nregs = ix86_function_regparm (type, function);
44539
44540 if (nregs > 0 && !stdarg_p (type))
44541 {
44542 int regno;
44543 unsigned int ccvt = ix86_get_callcvt (type);
44544
44545 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
44546 regno = aggr ? DX_REG : CX_REG;
44547 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
44548 {
44549 regno = CX_REG;
44550 if (aggr)
44551 return gen_rtx_MEM (SImode,
44552 plus_constant (Pmode, stack_pointer_rtx, 4));
44553 }
44554 else
44555 {
44556 regno = AX_REG;
44557 if (aggr)
44558 {
44559 regno = DX_REG;
44560 if (nregs == 1)
44561 return gen_rtx_MEM (SImode,
44562 plus_constant (Pmode,
44563 stack_pointer_rtx, 4));
44564 }
44565 }
44566 return gen_rtx_REG (SImode, regno);
44567 }
44568
44569 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
44570 aggr ? 8 : 4));
44571 }
44572
44573 /* Determine whether x86_output_mi_thunk can succeed. */
44574
44575 static bool
44576 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
44577 const_tree function)
44578 {
44579 /* 64-bit can handle anything. */
44580 if (TARGET_64BIT)
44581 return true;
44582
44583 /* For 32-bit, everything's fine if we have one free register. */
44584 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
44585 return true;
44586
44587 /* Need a free register for vcall_offset. */
44588 if (vcall_offset)
44589 return false;
44590
44591 /* Need a free register for GOT references. */
44592 if (flag_pic && !targetm.binds_local_p (function))
44593 return false;
44594
44595 /* Otherwise ok. */
44596 return true;
44597 }
44598
44599 /* Output the assembler code for a thunk function. THUNK_DECL is the
44600 declaration for the thunk function itself, FUNCTION is the decl for
44601 the target function. DELTA is an immediate constant offset to be
44602 added to THIS. If VCALL_OFFSET is nonzero, the word at
44603 *(*this + vcall_offset) should be added to THIS. */
44604
44605 static void
44606 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
44607 HOST_WIDE_INT vcall_offset, tree function)
44608 {
44609 rtx this_param = x86_this_parameter (function);
44610 rtx this_reg, tmp, fnaddr;
44611 unsigned int tmp_regno;
44612 rtx_insn *insn;
44613
44614 if (TARGET_64BIT)
44615 tmp_regno = R10_REG;
44616 else
44617 {
44618 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
44619 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
44620 tmp_regno = AX_REG;
44621 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
44622 tmp_regno = DX_REG;
44623 else
44624 tmp_regno = CX_REG;
44625 }
44626
44627 emit_note (NOTE_INSN_PROLOGUE_END);
44628
44629 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
44630 pull it in now and let DELTA benefit. */
44631 if (REG_P (this_param))
44632 this_reg = this_param;
44633 else if (vcall_offset)
44634 {
44635 /* Put the this parameter into %eax. */
44636 this_reg = gen_rtx_REG (Pmode, AX_REG);
44637 emit_move_insn (this_reg, this_param);
44638 }
44639 else
44640 this_reg = NULL_RTX;
44641
44642 /* Adjust the this parameter by a fixed constant. */
44643 if (delta)
44644 {
44645 rtx delta_rtx = GEN_INT (delta);
44646 rtx delta_dst = this_reg ? this_reg : this_param;
44647
44648 if (TARGET_64BIT)
44649 {
44650 if (!x86_64_general_operand (delta_rtx, Pmode))
44651 {
44652 tmp = gen_rtx_REG (Pmode, tmp_regno);
44653 emit_move_insn (tmp, delta_rtx);
44654 delta_rtx = tmp;
44655 }
44656 }
44657
44658 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
44659 }
44660
44661 /* Adjust the this parameter by a value stored in the vtable. */
44662 if (vcall_offset)
44663 {
44664 rtx vcall_addr, vcall_mem, this_mem;
44665
44666 tmp = gen_rtx_REG (Pmode, tmp_regno);
44667
44668 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
44669 if (Pmode != ptr_mode)
44670 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
44671 emit_move_insn (tmp, this_mem);
44672
44673 /* Adjust the this parameter. */
44674 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
44675 if (TARGET_64BIT
44676 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
44677 {
44678 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
44679 emit_move_insn (tmp2, GEN_INT (vcall_offset));
44680 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
44681 }
44682
44683 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
44684 if (Pmode != ptr_mode)
44685 emit_insn (gen_addsi_1_zext (this_reg,
44686 gen_rtx_REG (ptr_mode,
44687 REGNO (this_reg)),
44688 vcall_mem));
44689 else
44690 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
44691 }
44692
44693 /* If necessary, drop THIS back to its stack slot. */
44694 if (this_reg && this_reg != this_param)
44695 emit_move_insn (this_param, this_reg);
44696
44697 fnaddr = XEXP (DECL_RTL (function), 0);
44698 if (TARGET_64BIT)
44699 {
44700 if (!flag_pic || targetm.binds_local_p (function)
44701 || TARGET_PECOFF)
44702 ;
44703 else
44704 {
44705 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
44706 tmp = gen_rtx_CONST (Pmode, tmp);
44707 fnaddr = gen_const_mem (Pmode, tmp);
44708 }
44709 }
44710 else
44711 {
44712 if (!flag_pic || targetm.binds_local_p (function))
44713 ;
44714 #if TARGET_MACHO
44715 else if (TARGET_MACHO)
44716 {
44717 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
44718 fnaddr = XEXP (fnaddr, 0);
44719 }
44720 #endif /* TARGET_MACHO */
44721 else
44722 {
44723 tmp = gen_rtx_REG (Pmode, CX_REG);
44724 output_set_got (tmp, NULL_RTX);
44725
44726 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
44727 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
44728 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
44729 fnaddr = gen_const_mem (Pmode, fnaddr);
44730 }
44731 }
44732
44733 /* Our sibling call patterns do not allow memories, because we have no
44734 predicate that can distinguish between frame and non-frame memory.
44735 For our purposes here, we can get away with (ab)using a jump pattern,
44736 because we're going to do no optimization. */
44737 if (MEM_P (fnaddr))
44738 {
44739 if (sibcall_insn_operand (fnaddr, word_mode))
44740 {
44741 fnaddr = XEXP (DECL_RTL (function), 0);
44742 tmp = gen_rtx_MEM (QImode, fnaddr);
44743 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
44744 tmp = emit_call_insn (tmp);
44745 SIBLING_CALL_P (tmp) = 1;
44746 }
44747 else
44748 emit_jump_insn (gen_indirect_jump (fnaddr));
44749 }
44750 else
44751 {
44752 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
44753 {
44754 // CM_LARGE_PIC always uses pseudo PIC register which is
44755 // uninitialized. Since FUNCTION is local and calling it
44756 // doesn't go through PLT, we use scratch register %r11 as
44757 // PIC register and initialize it here.
44758 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
44759 ix86_init_large_pic_reg (tmp_regno);
44760 fnaddr = legitimize_pic_address (fnaddr,
44761 gen_rtx_REG (Pmode, tmp_regno));
44762 }
44763
44764 if (!sibcall_insn_operand (fnaddr, word_mode))
44765 {
44766 tmp = gen_rtx_REG (word_mode, tmp_regno);
44767 if (GET_MODE (fnaddr) != word_mode)
44768 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
44769 emit_move_insn (tmp, fnaddr);
44770 fnaddr = tmp;
44771 }
44772
44773 tmp = gen_rtx_MEM (QImode, fnaddr);
44774 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
44775 tmp = emit_call_insn (tmp);
44776 SIBLING_CALL_P (tmp) = 1;
44777 }
44778 emit_barrier ();
44779
44780 /* Emit just enough of rest_of_compilation to get the insns emitted.
44781 Note that use_thunk calls assemble_start_function et al. */
44782 insn = get_insns ();
44783 shorten_branches (insn);
44784 final_start_function (insn, file, 1);
44785 final (insn, file, 1);
44786 final_end_function ();
44787 }
44788
44789 static void
44790 x86_file_start (void)
44791 {
44792 default_file_start ();
44793 if (TARGET_16BIT)
44794 fputs ("\t.code16gcc\n", asm_out_file);
44795 #if TARGET_MACHO
44796 darwin_file_start ();
44797 #endif
44798 if (X86_FILE_START_VERSION_DIRECTIVE)
44799 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
44800 if (X86_FILE_START_FLTUSED)
44801 fputs ("\t.global\t__fltused\n", asm_out_file);
44802 if (ix86_asm_dialect == ASM_INTEL)
44803 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
44804 }
44805
44806 int
44807 x86_field_alignment (tree field, int computed)
44808 {
44809 machine_mode mode;
44810 tree type = TREE_TYPE (field);
44811
44812 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
44813 return computed;
44814 if (TARGET_IAMCU)
44815 return iamcu_alignment (type, computed);
44816 mode = TYPE_MODE (strip_array_types (type));
44817 if (mode == DFmode || mode == DCmode
44818 || GET_MODE_CLASS (mode) == MODE_INT
44819 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
44820 return MIN (32, computed);
44821 return computed;
44822 }
44823
44824 /* Print call to TARGET to FILE. */
44825
44826 static void
44827 x86_print_call_or_nop (FILE *file, const char *target)
44828 {
44829 if (flag_nop_mcount)
44830 fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
44831 else
44832 fprintf (file, "1:\tcall\t%s\n", target);
44833 }
44834
44835 /* Output assembler code to FILE to increment profiler label # LABELNO
44836 for profiling a function entry. */
44837 void
44838 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
44839 {
44840 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
44841 : MCOUNT_NAME);
44842 if (TARGET_64BIT)
44843 {
44844 #ifndef NO_PROFILE_COUNTERS
44845 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
44846 #endif
44847
44848 if (!TARGET_PECOFF && flag_pic)
44849 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
44850 else
44851 x86_print_call_or_nop (file, mcount_name);
44852 }
44853 else if (flag_pic)
44854 {
44855 #ifndef NO_PROFILE_COUNTERS
44856 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
44857 LPREFIX, labelno);
44858 #endif
44859 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
44860 }
44861 else
44862 {
44863 #ifndef NO_PROFILE_COUNTERS
44864 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
44865 LPREFIX, labelno);
44866 #endif
44867 x86_print_call_or_nop (file, mcount_name);
44868 }
44869
44870 if (flag_record_mcount)
44871 {
44872 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
44873 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
44874 fprintf (file, "\t.previous\n");
44875 }
44876 }
44877
44878 /* We don't have exact information about the insn sizes, but we may assume
44879 quite safely that we are informed about all 1 byte insns and memory
44880 address sizes. This is enough to eliminate unnecessary padding in
44881 99% of cases. */
44882
44883 static int
44884 min_insn_size (rtx_insn *insn)
44885 {
44886 int l = 0, len;
44887
44888 if (!INSN_P (insn) || !active_insn_p (insn))
44889 return 0;
44890
44891 /* Discard alignments we've emit and jump instructions. */
44892 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
44893 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
44894 return 0;
44895
44896 /* Important case - calls are always 5 bytes.
44897 It is common to have many calls in the row. */
44898 if (CALL_P (insn)
44899 && symbolic_reference_mentioned_p (PATTERN (insn))
44900 && !SIBLING_CALL_P (insn))
44901 return 5;
44902 len = get_attr_length (insn);
44903 if (len <= 1)
44904 return 1;
44905
44906 /* For normal instructions we rely on get_attr_length being exact,
44907 with a few exceptions. */
44908 if (!JUMP_P (insn))
44909 {
44910 enum attr_type type = get_attr_type (insn);
44911
44912 switch (type)
44913 {
44914 case TYPE_MULTI:
44915 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
44916 || asm_noperands (PATTERN (insn)) >= 0)
44917 return 0;
44918 break;
44919 case TYPE_OTHER:
44920 case TYPE_FCMP:
44921 break;
44922 default:
44923 /* Otherwise trust get_attr_length. */
44924 return len;
44925 }
44926
44927 l = get_attr_length_address (insn);
44928 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
44929 l = 4;
44930 }
44931 if (l)
44932 return 1+l;
44933 else
44934 return 2;
44935 }
44936
44937 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
44938
44939 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
44940 window. */
44941
44942 static void
44943 ix86_avoid_jump_mispredicts (void)
44944 {
44945 rtx_insn *insn, *start = get_insns ();
44946 int nbytes = 0, njumps = 0;
44947 bool isjump = false;
44948
44949 /* Look for all minimal intervals of instructions containing 4 jumps.
44950 The intervals are bounded by START and INSN. NBYTES is the total
44951 size of instructions in the interval including INSN and not including
44952 START. When the NBYTES is smaller than 16 bytes, it is possible
44953 that the end of START and INSN ends up in the same 16byte page.
44954
44955 The smallest offset in the page INSN can start is the case where START
44956 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
44957 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
44958
44959 Don't consider asm goto as jump, while it can contain a jump, it doesn't
44960 have to, control transfer to label(s) can be performed through other
44961 means, and also we estimate minimum length of all asm stmts as 0. */
44962 for (insn = start; insn; insn = NEXT_INSN (insn))
44963 {
44964 int min_size;
44965
44966 if (LABEL_P (insn))
44967 {
44968 int align = label_to_alignment (insn);
44969 int max_skip = label_to_max_skip (insn);
44970
44971 if (max_skip > 15)
44972 max_skip = 15;
44973 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
44974 already in the current 16 byte page, because otherwise
44975 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
44976 bytes to reach 16 byte boundary. */
44977 if (align <= 0
44978 || (align <= 3 && max_skip != (1 << align) - 1))
44979 max_skip = 0;
44980 if (dump_file)
44981 fprintf (dump_file, "Label %i with max_skip %i\n",
44982 INSN_UID (insn), max_skip);
44983 if (max_skip)
44984 {
44985 while (nbytes + max_skip >= 16)
44986 {
44987 start = NEXT_INSN (start);
44988 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
44989 || CALL_P (start))
44990 njumps--, isjump = true;
44991 else
44992 isjump = false;
44993 nbytes -= min_insn_size (start);
44994 }
44995 }
44996 continue;
44997 }
44998
44999 min_size = min_insn_size (insn);
45000 nbytes += min_size;
45001 if (dump_file)
45002 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
45003 INSN_UID (insn), min_size);
45004 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
45005 || CALL_P (insn))
45006 njumps++;
45007 else
45008 continue;
45009
45010 while (njumps > 3)
45011 {
45012 start = NEXT_INSN (start);
45013 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
45014 || CALL_P (start))
45015 njumps--, isjump = true;
45016 else
45017 isjump = false;
45018 nbytes -= min_insn_size (start);
45019 }
45020 gcc_assert (njumps >= 0);
45021 if (dump_file)
45022 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
45023 INSN_UID (start), INSN_UID (insn), nbytes);
45024
45025 if (njumps == 3 && isjump && nbytes < 16)
45026 {
45027 int padsize = 15 - nbytes + min_insn_size (insn);
45028
45029 if (dump_file)
45030 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
45031 INSN_UID (insn), padsize);
45032 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
45033 }
45034 }
45035 }
45036 #endif
45037
45038 /* AMD Athlon works faster
45039 when RET is not destination of conditional jump or directly preceded
45040 by other jump instruction. We avoid the penalty by inserting NOP just
45041 before the RET instructions in such cases. */
45042 static void
45043 ix86_pad_returns (void)
45044 {
45045 edge e;
45046 edge_iterator ei;
45047
45048 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
45049 {
45050 basic_block bb = e->src;
45051 rtx_insn *ret = BB_END (bb);
45052 rtx_insn *prev;
45053 bool replace = false;
45054
45055 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
45056 || optimize_bb_for_size_p (bb))
45057 continue;
45058 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
45059 if (active_insn_p (prev) || LABEL_P (prev))
45060 break;
45061 if (prev && LABEL_P (prev))
45062 {
45063 edge e;
45064 edge_iterator ei;
45065
45066 FOR_EACH_EDGE (e, ei, bb->preds)
45067 if (EDGE_FREQUENCY (e) && e->src->index >= 0
45068 && !(e->flags & EDGE_FALLTHRU))
45069 {
45070 replace = true;
45071 break;
45072 }
45073 }
45074 if (!replace)
45075 {
45076 prev = prev_active_insn (ret);
45077 if (prev
45078 && ((JUMP_P (prev) && any_condjump_p (prev))
45079 || CALL_P (prev)))
45080 replace = true;
45081 /* Empty functions get branch mispredict even when
45082 the jump destination is not visible to us. */
45083 if (!prev && !optimize_function_for_size_p (cfun))
45084 replace = true;
45085 }
45086 if (replace)
45087 {
45088 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
45089 delete_insn (ret);
45090 }
45091 }
45092 }
45093
45094 /* Count the minimum number of instructions in BB. Return 4 if the
45095 number of instructions >= 4. */
45096
45097 static int
45098 ix86_count_insn_bb (basic_block bb)
45099 {
45100 rtx_insn *insn;
45101 int insn_count = 0;
45102
45103 /* Count number of instructions in this block. Return 4 if the number
45104 of instructions >= 4. */
45105 FOR_BB_INSNS (bb, insn)
45106 {
45107 /* Only happen in exit blocks. */
45108 if (JUMP_P (insn)
45109 && ANY_RETURN_P (PATTERN (insn)))
45110 break;
45111
45112 if (NONDEBUG_INSN_P (insn)
45113 && GET_CODE (PATTERN (insn)) != USE
45114 && GET_CODE (PATTERN (insn)) != CLOBBER)
45115 {
45116 insn_count++;
45117 if (insn_count >= 4)
45118 return insn_count;
45119 }
45120 }
45121
45122 return insn_count;
45123 }
45124
45125
45126 /* Count the minimum number of instructions in code path in BB.
45127 Return 4 if the number of instructions >= 4. */
45128
45129 static int
45130 ix86_count_insn (basic_block bb)
45131 {
45132 edge e;
45133 edge_iterator ei;
45134 int min_prev_count;
45135
45136 /* Only bother counting instructions along paths with no
45137 more than 2 basic blocks between entry and exit. Given
45138 that BB has an edge to exit, determine if a predecessor
45139 of BB has an edge from entry. If so, compute the number
45140 of instructions in the predecessor block. If there
45141 happen to be multiple such blocks, compute the minimum. */
45142 min_prev_count = 4;
45143 FOR_EACH_EDGE (e, ei, bb->preds)
45144 {
45145 edge prev_e;
45146 edge_iterator prev_ei;
45147
45148 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
45149 {
45150 min_prev_count = 0;
45151 break;
45152 }
45153 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
45154 {
45155 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
45156 {
45157 int count = ix86_count_insn_bb (e->src);
45158 if (count < min_prev_count)
45159 min_prev_count = count;
45160 break;
45161 }
45162 }
45163 }
45164
45165 if (min_prev_count < 4)
45166 min_prev_count += ix86_count_insn_bb (bb);
45167
45168 return min_prev_count;
45169 }
45170
45171 /* Pad short function to 4 instructions. */
45172
45173 static void
45174 ix86_pad_short_function (void)
45175 {
45176 edge e;
45177 edge_iterator ei;
45178
45179 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
45180 {
45181 rtx_insn *ret = BB_END (e->src);
45182 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
45183 {
45184 int insn_count = ix86_count_insn (e->src);
45185
45186 /* Pad short function. */
45187 if (insn_count < 4)
45188 {
45189 rtx_insn *insn = ret;
45190
45191 /* Find epilogue. */
45192 while (insn
45193 && (!NOTE_P (insn)
45194 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
45195 insn = PREV_INSN (insn);
45196
45197 if (!insn)
45198 insn = ret;
45199
45200 /* Two NOPs count as one instruction. */
45201 insn_count = 2 * (4 - insn_count);
45202 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
45203 }
45204 }
45205 }
45206 }
45207
45208 /* Fix up a Windows system unwinder issue. If an EH region falls through into
45209 the epilogue, the Windows system unwinder will apply epilogue logic and
45210 produce incorrect offsets. This can be avoided by adding a nop between
45211 the last insn that can throw and the first insn of the epilogue. */
45212
45213 static void
45214 ix86_seh_fixup_eh_fallthru (void)
45215 {
45216 edge e;
45217 edge_iterator ei;
45218
45219 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
45220 {
45221 rtx_insn *insn, *next;
45222
45223 /* Find the beginning of the epilogue. */
45224 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
45225 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
45226 break;
45227 if (insn == NULL)
45228 continue;
45229
45230 /* We only care about preceding insns that can throw. */
45231 insn = prev_active_insn (insn);
45232 if (insn == NULL || !can_throw_internal (insn))
45233 continue;
45234
45235 /* Do not separate calls from their debug information. */
45236 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
45237 if (NOTE_P (next)
45238 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
45239 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
45240 insn = next;
45241 else
45242 break;
45243
45244 emit_insn_after (gen_nops (const1_rtx), insn);
45245 }
45246 }
45247
45248 /* Given a register number BASE, the lowest of a group of registers, update
45249 regsets IN and OUT with the registers that should be avoided in input
45250 and output operands respectively when trying to avoid generating a modr/m
45251 byte for -fmitigate-rop. */
45252
45253 static void
45254 set_rop_modrm_reg_bits (int base, HARD_REG_SET &in, HARD_REG_SET &out)
45255 {
45256 SET_HARD_REG_BIT (out, base);
45257 SET_HARD_REG_BIT (out, base + 1);
45258 SET_HARD_REG_BIT (in, base + 2);
45259 SET_HARD_REG_BIT (in, base + 3);
45260 }
45261
45262 /* Called if -fmitigate_rop is in effect. Try to rewrite instructions so
45263 that certain encodings of modr/m bytes do not occur. */
45264 static void
45265 ix86_mitigate_rop (void)
45266 {
45267 HARD_REG_SET input_risky;
45268 HARD_REG_SET output_risky;
45269 HARD_REG_SET inout_risky;
45270
45271 CLEAR_HARD_REG_SET (output_risky);
45272 CLEAR_HARD_REG_SET (input_risky);
45273 SET_HARD_REG_BIT (output_risky, AX_REG);
45274 SET_HARD_REG_BIT (output_risky, CX_REG);
45275 SET_HARD_REG_BIT (input_risky, BX_REG);
45276 SET_HARD_REG_BIT (input_risky, DX_REG);
45277 set_rop_modrm_reg_bits (FIRST_SSE_REG, input_risky, output_risky);
45278 set_rop_modrm_reg_bits (FIRST_REX_INT_REG, input_risky, output_risky);
45279 set_rop_modrm_reg_bits (FIRST_REX_SSE_REG, input_risky, output_risky);
45280 set_rop_modrm_reg_bits (FIRST_EXT_REX_SSE_REG, input_risky, output_risky);
45281 set_rop_modrm_reg_bits (FIRST_MASK_REG, input_risky, output_risky);
45282 set_rop_modrm_reg_bits (FIRST_BND_REG, input_risky, output_risky);
45283 COPY_HARD_REG_SET (inout_risky, input_risky);
45284 IOR_HARD_REG_SET (inout_risky, output_risky);
45285
45286 df_note_add_problem ();
45287 /* Fix up what stack-regs did. */
45288 df_insn_rescan_all ();
45289 df_analyze ();
45290
45291 regrename_init (true);
45292 regrename_analyze (NULL);
45293
45294 auto_vec<du_head_p> cands;
45295
45296 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
45297 {
45298 if (!NONDEBUG_INSN_P (insn))
45299 continue;
45300
45301 if (GET_CODE (PATTERN (insn)) == USE
45302 || GET_CODE (PATTERN (insn)) == CLOBBER)
45303 continue;
45304
45305 extract_insn (insn);
45306
45307 int opno0, opno1;
45308 int modrm = ix86_get_modrm_for_rop (insn, recog_data.operand,
45309 recog_data.n_operands, &opno0,
45310 &opno1);
45311
45312 if (!ix86_rop_should_change_byte_p (modrm))
45313 continue;
45314
45315 insn_rr_info *info = &insn_rr[INSN_UID (insn)];
45316
45317 /* This happens when regrename has to fail a block. */
45318 if (!info->op_info)
45319 continue;
45320
45321 if (info->op_info[opno0].n_chains != 0)
45322 {
45323 gcc_assert (info->op_info[opno0].n_chains == 1);
45324 du_head_p op0c;
45325 op0c = regrename_chain_from_id (info->op_info[opno0].heads[0]->id);
45326 if (op0c->target_data_1 + op0c->target_data_2 == 0
45327 && !op0c->cannot_rename)
45328 cands.safe_push (op0c);
45329
45330 op0c->target_data_1++;
45331 }
45332 if (info->op_info[opno1].n_chains != 0)
45333 {
45334 gcc_assert (info->op_info[opno1].n_chains == 1);
45335 du_head_p op1c;
45336 op1c = regrename_chain_from_id (info->op_info[opno1].heads[0]->id);
45337 if (op1c->target_data_1 + op1c->target_data_2 == 0
45338 && !op1c->cannot_rename)
45339 cands.safe_push (op1c);
45340
45341 op1c->target_data_2++;
45342 }
45343 }
45344
45345 int i;
45346 du_head_p head;
45347 FOR_EACH_VEC_ELT (cands, i, head)
45348 {
45349 int old_reg, best_reg;
45350 HARD_REG_SET unavailable;
45351
45352 CLEAR_HARD_REG_SET (unavailable);
45353 if (head->target_data_1)
45354 IOR_HARD_REG_SET (unavailable, output_risky);
45355 if (head->target_data_2)
45356 IOR_HARD_REG_SET (unavailable, input_risky);
45357
45358 int n_uses;
45359 reg_class superclass = regrename_find_superclass (head, &n_uses,
45360 &unavailable);
45361 old_reg = head->regno;
45362 best_reg = find_rename_reg (head, superclass, &unavailable,
45363 old_reg, false);
45364 bool ok = regrename_do_replace (head, best_reg);
45365 gcc_assert (ok);
45366 if (dump_file)
45367 fprintf (dump_file, "Chain %d renamed as %s in %s\n", head->id,
45368 reg_names[best_reg], reg_class_names[superclass]);
45369
45370 }
45371
45372 regrename_finish ();
45373
45374 df_analyze ();
45375
45376 basic_block bb;
45377 regset_head live;
45378
45379 INIT_REG_SET (&live);
45380
45381 FOR_EACH_BB_FN (bb, cfun)
45382 {
45383 rtx_insn *insn;
45384
45385 COPY_REG_SET (&live, DF_LR_OUT (bb));
45386 df_simulate_initialize_backwards (bb, &live);
45387
45388 FOR_BB_INSNS_REVERSE (bb, insn)
45389 {
45390 if (!NONDEBUG_INSN_P (insn))
45391 continue;
45392
45393 df_simulate_one_insn_backwards (bb, insn, &live);
45394
45395 if (GET_CODE (PATTERN (insn)) == USE
45396 || GET_CODE (PATTERN (insn)) == CLOBBER)
45397 continue;
45398
45399 extract_insn (insn);
45400 constrain_operands_cached (insn, reload_completed);
45401 int opno0, opno1;
45402 int modrm = ix86_get_modrm_for_rop (insn, recog_data.operand,
45403 recog_data.n_operands, &opno0,
45404 &opno1);
45405 if (modrm < 0
45406 || !ix86_rop_should_change_byte_p (modrm)
45407 || opno0 == opno1)
45408 continue;
45409
45410 rtx oldreg = recog_data.operand[opno1];
45411 preprocess_constraints (insn);
45412 const operand_alternative *alt = which_op_alt ();
45413
45414 int i;
45415 for (i = 0; i < recog_data.n_operands; i++)
45416 if (i != opno1
45417 && alt[i].earlyclobber
45418 && reg_overlap_mentioned_p (recog_data.operand[i],
45419 oldreg))
45420 break;
45421
45422 if (i < recog_data.n_operands)
45423 continue;
45424
45425 if (dump_file)
45426 fprintf (dump_file,
45427 "attempting to fix modrm byte in insn %d:"
45428 " reg %d class %s", INSN_UID (insn), REGNO (oldreg),
45429 reg_class_names[alt[opno1].cl]);
45430
45431 HARD_REG_SET unavailable;
45432 REG_SET_TO_HARD_REG_SET (unavailable, &live);
45433 SET_HARD_REG_BIT (unavailable, REGNO (oldreg));
45434 IOR_COMPL_HARD_REG_SET (unavailable, call_used_reg_set);
45435 IOR_HARD_REG_SET (unavailable, fixed_reg_set);
45436 IOR_HARD_REG_SET (unavailable, output_risky);
45437 IOR_COMPL_HARD_REG_SET (unavailable,
45438 reg_class_contents[alt[opno1].cl]);
45439
45440 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
45441 if (!TEST_HARD_REG_BIT (unavailable, i))
45442 break;
45443 if (i == FIRST_PSEUDO_REGISTER)
45444 {
45445 if (dump_file)
45446 fprintf (dump_file, ", none available\n");
45447 continue;
45448 }
45449 if (dump_file)
45450 fprintf (dump_file, " -> %d\n", i);
45451 rtx newreg = gen_rtx_REG (recog_data.operand_mode[opno1], i);
45452 validate_change (insn, recog_data.operand_loc[opno1], newreg, false);
45453 insn = emit_insn_before (gen_move_insn (newreg, oldreg), insn);
45454 }
45455 }
45456 }
45457
45458 /* Implement machine specific optimizations. We implement padding of returns
45459 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
45460 static void
45461 ix86_reorg (void)
45462 {
45463 /* We are freeing block_for_insn in the toplev to keep compatibility
45464 with old MDEP_REORGS that are not CFG based. Recompute it now. */
45465 compute_bb_for_insn ();
45466
45467 if (flag_mitigate_rop)
45468 ix86_mitigate_rop ();
45469
45470 if (TARGET_SEH && current_function_has_exception_handlers ())
45471 ix86_seh_fixup_eh_fallthru ();
45472
45473 if (optimize && optimize_function_for_speed_p (cfun))
45474 {
45475 if (TARGET_PAD_SHORT_FUNCTION)
45476 ix86_pad_short_function ();
45477 else if (TARGET_PAD_RETURNS)
45478 ix86_pad_returns ();
45479 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
45480 if (TARGET_FOUR_JUMP_LIMIT)
45481 ix86_avoid_jump_mispredicts ();
45482 #endif
45483 }
45484 }
45485
45486 /* Return nonzero when QImode register that must be represented via REX prefix
45487 is used. */
45488 bool
45489 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
45490 {
45491 int i;
45492 extract_insn_cached (insn);
45493 for (i = 0; i < recog_data.n_operands; i++)
45494 if (GENERAL_REG_P (recog_data.operand[i])
45495 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
45496 return true;
45497 return false;
45498 }
45499
45500 /* Return true when INSN mentions register that must be encoded using REX
45501 prefix. */
45502 bool
45503 x86_extended_reg_mentioned_p (rtx insn)
45504 {
45505 subrtx_iterator::array_type array;
45506 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
45507 {
45508 const_rtx x = *iter;
45509 if (REG_P (x)
45510 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
45511 return true;
45512 }
45513 return false;
45514 }
45515
45516 /* If profitable, negate (without causing overflow) integer constant
45517 of mode MODE at location LOC. Return true in this case. */
45518 bool
45519 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
45520 {
45521 HOST_WIDE_INT val;
45522
45523 if (!CONST_INT_P (*loc))
45524 return false;
45525
45526 switch (mode)
45527 {
45528 case DImode:
45529 /* DImode x86_64 constants must fit in 32 bits. */
45530 gcc_assert (x86_64_immediate_operand (*loc, mode));
45531
45532 mode = SImode;
45533 break;
45534
45535 case SImode:
45536 case HImode:
45537 case QImode:
45538 break;
45539
45540 default:
45541 gcc_unreachable ();
45542 }
45543
45544 /* Avoid overflows. */
45545 if (mode_signbit_p (mode, *loc))
45546 return false;
45547
45548 val = INTVAL (*loc);
45549
45550 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
45551 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
45552 if ((val < 0 && val != -128)
45553 || val == 128)
45554 {
45555 *loc = GEN_INT (-val);
45556 return true;
45557 }
45558
45559 return false;
45560 }
45561
45562 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
45563 optabs would emit if we didn't have TFmode patterns. */
45564
45565 void
45566 x86_emit_floatuns (rtx operands[2])
45567 {
45568 rtx_code_label *neglab, *donelab;
45569 rtx i0, i1, f0, in, out;
45570 machine_mode mode, inmode;
45571
45572 inmode = GET_MODE (operands[1]);
45573 gcc_assert (inmode == SImode || inmode == DImode);
45574
45575 out = operands[0];
45576 in = force_reg (inmode, operands[1]);
45577 mode = GET_MODE (out);
45578 neglab = gen_label_rtx ();
45579 donelab = gen_label_rtx ();
45580 f0 = gen_reg_rtx (mode);
45581
45582 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
45583
45584 expand_float (out, in, 0);
45585
45586 emit_jump_insn (gen_jump (donelab));
45587 emit_barrier ();
45588
45589 emit_label (neglab);
45590
45591 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
45592 1, OPTAB_DIRECT);
45593 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
45594 1, OPTAB_DIRECT);
45595 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
45596
45597 expand_float (f0, i0, 0);
45598
45599 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
45600
45601 emit_label (donelab);
45602 }
45603 \f
45604 static bool canonicalize_perm (struct expand_vec_perm_d *d);
45605 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
45606 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
45607 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
45608
45609 /* Get a vector mode of the same size as the original but with elements
45610 twice as wide. This is only guaranteed to apply to integral vectors. */
45611
45612 static inline machine_mode
45613 get_mode_wider_vector (machine_mode o)
45614 {
45615 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
45616 machine_mode n = GET_MODE_WIDER_MODE (o);
45617 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
45618 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
45619 return n;
45620 }
45621
45622 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
45623 fill target with val via vec_duplicate. */
45624
45625 static bool
45626 ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
45627 {
45628 bool ok;
45629 rtx_insn *insn;
45630 rtx dup;
45631
45632 /* First attempt to recognize VAL as-is. */
45633 dup = gen_rtx_VEC_DUPLICATE (mode, val);
45634 insn = emit_insn (gen_rtx_SET (target, dup));
45635 if (recog_memoized (insn) < 0)
45636 {
45637 rtx_insn *seq;
45638 /* If that fails, force VAL into a register. */
45639
45640 start_sequence ();
45641 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
45642 seq = get_insns ();
45643 end_sequence ();
45644 if (seq)
45645 emit_insn_before (seq, insn);
45646
45647 ok = recog_memoized (insn) >= 0;
45648 gcc_assert (ok);
45649 }
45650 return true;
45651 }
45652
45653 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
45654 with all elements equal to VAR. Return true if successful. */
45655
45656 static bool
45657 ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
45658 rtx target, rtx val)
45659 {
45660 bool ok;
45661
45662 switch (mode)
45663 {
45664 case V2SImode:
45665 case V2SFmode:
45666 if (!mmx_ok)
45667 return false;
45668 /* FALLTHRU */
45669
45670 case V4DFmode:
45671 case V4DImode:
45672 case V8SFmode:
45673 case V8SImode:
45674 case V2DFmode:
45675 case V2DImode:
45676 case V4SFmode:
45677 case V4SImode:
45678 case V16SImode:
45679 case V8DImode:
45680 case V16SFmode:
45681 case V8DFmode:
45682 return ix86_vector_duplicate_value (mode, target, val);
45683
45684 case V4HImode:
45685 if (!mmx_ok)
45686 return false;
45687 if (TARGET_SSE || TARGET_3DNOW_A)
45688 {
45689 rtx x;
45690
45691 val = gen_lowpart (SImode, val);
45692 x = gen_rtx_TRUNCATE (HImode, val);
45693 x = gen_rtx_VEC_DUPLICATE (mode, x);
45694 emit_insn (gen_rtx_SET (target, x));
45695 return true;
45696 }
45697 goto widen;
45698
45699 case V8QImode:
45700 if (!mmx_ok)
45701 return false;
45702 goto widen;
45703
45704 case V8HImode:
45705 if (TARGET_AVX2)
45706 return ix86_vector_duplicate_value (mode, target, val);
45707
45708 if (TARGET_SSE2)
45709 {
45710 struct expand_vec_perm_d dperm;
45711 rtx tmp1, tmp2;
45712
45713 permute:
45714 memset (&dperm, 0, sizeof (dperm));
45715 dperm.target = target;
45716 dperm.vmode = mode;
45717 dperm.nelt = GET_MODE_NUNITS (mode);
45718 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
45719 dperm.one_operand_p = true;
45720
45721 /* Extend to SImode using a paradoxical SUBREG. */
45722 tmp1 = gen_reg_rtx (SImode);
45723 emit_move_insn (tmp1, gen_lowpart (SImode, val));
45724
45725 /* Insert the SImode value as low element of a V4SImode vector. */
45726 tmp2 = gen_reg_rtx (V4SImode);
45727 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
45728 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
45729
45730 ok = (expand_vec_perm_1 (&dperm)
45731 || expand_vec_perm_broadcast_1 (&dperm));
45732 gcc_assert (ok);
45733 return ok;
45734 }
45735 goto widen;
45736
45737 case V16QImode:
45738 if (TARGET_AVX2)
45739 return ix86_vector_duplicate_value (mode, target, val);
45740
45741 if (TARGET_SSE2)
45742 goto permute;
45743 goto widen;
45744
45745 widen:
45746 /* Replicate the value once into the next wider mode and recurse. */
45747 {
45748 machine_mode smode, wsmode, wvmode;
45749 rtx x;
45750
45751 smode = GET_MODE_INNER (mode);
45752 wvmode = get_mode_wider_vector (mode);
45753 wsmode = GET_MODE_INNER (wvmode);
45754
45755 val = convert_modes (wsmode, smode, val, true);
45756 x = expand_simple_binop (wsmode, ASHIFT, val,
45757 GEN_INT (GET_MODE_BITSIZE (smode)),
45758 NULL_RTX, 1, OPTAB_LIB_WIDEN);
45759 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
45760
45761 x = gen_reg_rtx (wvmode);
45762 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
45763 gcc_assert (ok);
45764 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
45765 return ok;
45766 }
45767
45768 case V16HImode:
45769 case V32QImode:
45770 if (TARGET_AVX2)
45771 return ix86_vector_duplicate_value (mode, target, val);
45772 else
45773 {
45774 machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
45775 rtx x = gen_reg_rtx (hvmode);
45776
45777 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
45778 gcc_assert (ok);
45779
45780 x = gen_rtx_VEC_CONCAT (mode, x, x);
45781 emit_insn (gen_rtx_SET (target, x));
45782 }
45783 return true;
45784
45785 case V64QImode:
45786 case V32HImode:
45787 if (TARGET_AVX512BW)
45788 return ix86_vector_duplicate_value (mode, target, val);
45789 else
45790 {
45791 machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
45792 rtx x = gen_reg_rtx (hvmode);
45793
45794 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
45795 gcc_assert (ok);
45796
45797 x = gen_rtx_VEC_CONCAT (mode, x, x);
45798 emit_insn (gen_rtx_SET (target, x));
45799 }
45800 return true;
45801
45802 default:
45803 return false;
45804 }
45805 }
45806
45807 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
45808 whose ONE_VAR element is VAR, and other elements are zero. Return true
45809 if successful. */
45810
45811 static bool
45812 ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
45813 rtx target, rtx var, int one_var)
45814 {
45815 machine_mode vsimode;
45816 rtx new_target;
45817 rtx x, tmp;
45818 bool use_vector_set = false;
45819
45820 switch (mode)
45821 {
45822 case V2DImode:
45823 /* For SSE4.1, we normally use vector set. But if the second
45824 element is zero and inter-unit moves are OK, we use movq
45825 instead. */
45826 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
45827 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
45828 && one_var == 0));
45829 break;
45830 case V16QImode:
45831 case V4SImode:
45832 case V4SFmode:
45833 use_vector_set = TARGET_SSE4_1;
45834 break;
45835 case V8HImode:
45836 use_vector_set = TARGET_SSE2;
45837 break;
45838 case V4HImode:
45839 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
45840 break;
45841 case V32QImode:
45842 case V16HImode:
45843 case V8SImode:
45844 case V8SFmode:
45845 case V4DFmode:
45846 use_vector_set = TARGET_AVX;
45847 break;
45848 case V4DImode:
45849 /* Use ix86_expand_vector_set in 64bit mode only. */
45850 use_vector_set = TARGET_AVX && TARGET_64BIT;
45851 break;
45852 default:
45853 break;
45854 }
45855
45856 if (use_vector_set)
45857 {
45858 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
45859 var = force_reg (GET_MODE_INNER (mode), var);
45860 ix86_expand_vector_set (mmx_ok, target, var, one_var);
45861 return true;
45862 }
45863
45864 switch (mode)
45865 {
45866 case V2SFmode:
45867 case V2SImode:
45868 if (!mmx_ok)
45869 return false;
45870 /* FALLTHRU */
45871
45872 case V2DFmode:
45873 case V2DImode:
45874 if (one_var != 0)
45875 return false;
45876 var = force_reg (GET_MODE_INNER (mode), var);
45877 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
45878 emit_insn (gen_rtx_SET (target, x));
45879 return true;
45880
45881 case V4SFmode:
45882 case V4SImode:
45883 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
45884 new_target = gen_reg_rtx (mode);
45885 else
45886 new_target = target;
45887 var = force_reg (GET_MODE_INNER (mode), var);
45888 x = gen_rtx_VEC_DUPLICATE (mode, var);
45889 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
45890 emit_insn (gen_rtx_SET (new_target, x));
45891 if (one_var != 0)
45892 {
45893 /* We need to shuffle the value to the correct position, so
45894 create a new pseudo to store the intermediate result. */
45895
45896 /* With SSE2, we can use the integer shuffle insns. */
45897 if (mode != V4SFmode && TARGET_SSE2)
45898 {
45899 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
45900 const1_rtx,
45901 GEN_INT (one_var == 1 ? 0 : 1),
45902 GEN_INT (one_var == 2 ? 0 : 1),
45903 GEN_INT (one_var == 3 ? 0 : 1)));
45904 if (target != new_target)
45905 emit_move_insn (target, new_target);
45906 return true;
45907 }
45908
45909 /* Otherwise convert the intermediate result to V4SFmode and
45910 use the SSE1 shuffle instructions. */
45911 if (mode != V4SFmode)
45912 {
45913 tmp = gen_reg_rtx (V4SFmode);
45914 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
45915 }
45916 else
45917 tmp = new_target;
45918
45919 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
45920 const1_rtx,
45921 GEN_INT (one_var == 1 ? 0 : 1),
45922 GEN_INT (one_var == 2 ? 0+4 : 1+4),
45923 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
45924
45925 if (mode != V4SFmode)
45926 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
45927 else if (tmp != target)
45928 emit_move_insn (target, tmp);
45929 }
45930 else if (target != new_target)
45931 emit_move_insn (target, new_target);
45932 return true;
45933
45934 case V8HImode:
45935 case V16QImode:
45936 vsimode = V4SImode;
45937 goto widen;
45938 case V4HImode:
45939 case V8QImode:
45940 if (!mmx_ok)
45941 return false;
45942 vsimode = V2SImode;
45943 goto widen;
45944 widen:
45945 if (one_var != 0)
45946 return false;
45947
45948 /* Zero extend the variable element to SImode and recurse. */
45949 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
45950
45951 x = gen_reg_rtx (vsimode);
45952 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
45953 var, one_var))
45954 gcc_unreachable ();
45955
45956 emit_move_insn (target, gen_lowpart (mode, x));
45957 return true;
45958
45959 default:
45960 return false;
45961 }
45962 }
45963
45964 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
45965 consisting of the values in VALS. It is known that all elements
45966 except ONE_VAR are constants. Return true if successful. */
45967
45968 static bool
45969 ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
45970 rtx target, rtx vals, int one_var)
45971 {
45972 rtx var = XVECEXP (vals, 0, one_var);
45973 machine_mode wmode;
45974 rtx const_vec, x;
45975
45976 const_vec = copy_rtx (vals);
45977 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
45978 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
45979
45980 switch (mode)
45981 {
45982 case V2DFmode:
45983 case V2DImode:
45984 case V2SFmode:
45985 case V2SImode:
45986 /* For the two element vectors, it's just as easy to use
45987 the general case. */
45988 return false;
45989
45990 case V4DImode:
45991 /* Use ix86_expand_vector_set in 64bit mode only. */
45992 if (!TARGET_64BIT)
45993 return false;
45994 case V4DFmode:
45995 case V8SFmode:
45996 case V8SImode:
45997 case V16HImode:
45998 case V32QImode:
45999 case V4SFmode:
46000 case V4SImode:
46001 case V8HImode:
46002 case V4HImode:
46003 break;
46004
46005 case V16QImode:
46006 if (TARGET_SSE4_1)
46007 break;
46008 wmode = V8HImode;
46009 goto widen;
46010 case V8QImode:
46011 wmode = V4HImode;
46012 goto widen;
46013 widen:
46014 /* There's no way to set one QImode entry easily. Combine
46015 the variable value with its adjacent constant value, and
46016 promote to an HImode set. */
46017 x = XVECEXP (vals, 0, one_var ^ 1);
46018 if (one_var & 1)
46019 {
46020 var = convert_modes (HImode, QImode, var, true);
46021 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
46022 NULL_RTX, 1, OPTAB_LIB_WIDEN);
46023 x = GEN_INT (INTVAL (x) & 0xff);
46024 }
46025 else
46026 {
46027 var = convert_modes (HImode, QImode, var, true);
46028 x = gen_int_mode (INTVAL (x) << 8, HImode);
46029 }
46030 if (x != const0_rtx)
46031 var = expand_simple_binop (HImode, IOR, var, x, var,
46032 1, OPTAB_LIB_WIDEN);
46033
46034 x = gen_reg_rtx (wmode);
46035 emit_move_insn (x, gen_lowpart (wmode, const_vec));
46036 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
46037
46038 emit_move_insn (target, gen_lowpart (mode, x));
46039 return true;
46040
46041 default:
46042 return false;
46043 }
46044
46045 emit_move_insn (target, const_vec);
46046 ix86_expand_vector_set (mmx_ok, target, var, one_var);
46047 return true;
46048 }
46049
46050 /* A subroutine of ix86_expand_vector_init_general. Use vector
46051 concatenate to handle the most general case: all values variable,
46052 and none identical. */
46053
46054 static void
46055 ix86_expand_vector_init_concat (machine_mode mode,
46056 rtx target, rtx *ops, int n)
46057 {
46058 machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
46059 rtx first[16], second[8], third[4];
46060 rtvec v;
46061 int i, j;
46062
46063 switch (n)
46064 {
46065 case 2:
46066 switch (mode)
46067 {
46068 case V16SImode:
46069 cmode = V8SImode;
46070 break;
46071 case V16SFmode:
46072 cmode = V8SFmode;
46073 break;
46074 case V8DImode:
46075 cmode = V4DImode;
46076 break;
46077 case V8DFmode:
46078 cmode = V4DFmode;
46079 break;
46080 case V8SImode:
46081 cmode = V4SImode;
46082 break;
46083 case V8SFmode:
46084 cmode = V4SFmode;
46085 break;
46086 case V4DImode:
46087 cmode = V2DImode;
46088 break;
46089 case V4DFmode:
46090 cmode = V2DFmode;
46091 break;
46092 case V4SImode:
46093 cmode = V2SImode;
46094 break;
46095 case V4SFmode:
46096 cmode = V2SFmode;
46097 break;
46098 case V2DImode:
46099 cmode = DImode;
46100 break;
46101 case V2SImode:
46102 cmode = SImode;
46103 break;
46104 case V2DFmode:
46105 cmode = DFmode;
46106 break;
46107 case V2SFmode:
46108 cmode = SFmode;
46109 break;
46110 default:
46111 gcc_unreachable ();
46112 }
46113
46114 if (!register_operand (ops[1], cmode))
46115 ops[1] = force_reg (cmode, ops[1]);
46116 if (!register_operand (ops[0], cmode))
46117 ops[0] = force_reg (cmode, ops[0]);
46118 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, ops[0],
46119 ops[1])));
46120 break;
46121
46122 case 4:
46123 switch (mode)
46124 {
46125 case V4DImode:
46126 cmode = V2DImode;
46127 break;
46128 case V4DFmode:
46129 cmode = V2DFmode;
46130 break;
46131 case V4SImode:
46132 cmode = V2SImode;
46133 break;
46134 case V4SFmode:
46135 cmode = V2SFmode;
46136 break;
46137 default:
46138 gcc_unreachable ();
46139 }
46140 goto half;
46141
46142 case 8:
46143 switch (mode)
46144 {
46145 case V8DImode:
46146 cmode = V2DImode;
46147 hmode = V4DImode;
46148 break;
46149 case V8DFmode:
46150 cmode = V2DFmode;
46151 hmode = V4DFmode;
46152 break;
46153 case V8SImode:
46154 cmode = V2SImode;
46155 hmode = V4SImode;
46156 break;
46157 case V8SFmode:
46158 cmode = V2SFmode;
46159 hmode = V4SFmode;
46160 break;
46161 default:
46162 gcc_unreachable ();
46163 }
46164 goto half;
46165
46166 case 16:
46167 switch (mode)
46168 {
46169 case V16SImode:
46170 cmode = V2SImode;
46171 hmode = V4SImode;
46172 gmode = V8SImode;
46173 break;
46174 case V16SFmode:
46175 cmode = V2SFmode;
46176 hmode = V4SFmode;
46177 gmode = V8SFmode;
46178 break;
46179 default:
46180 gcc_unreachable ();
46181 }
46182 goto half;
46183
46184 half:
46185 /* FIXME: We process inputs backward to help RA. PR 36222. */
46186 i = n - 1;
46187 j = (n >> 1) - 1;
46188 for (; i > 0; i -= 2, j--)
46189 {
46190 first[j] = gen_reg_rtx (cmode);
46191 v = gen_rtvec (2, ops[i - 1], ops[i]);
46192 ix86_expand_vector_init (false, first[j],
46193 gen_rtx_PARALLEL (cmode, v));
46194 }
46195
46196 n >>= 1;
46197 if (n > 4)
46198 {
46199 gcc_assert (hmode != VOIDmode);
46200 gcc_assert (gmode != VOIDmode);
46201 for (i = j = 0; i < n; i += 2, j++)
46202 {
46203 second[j] = gen_reg_rtx (hmode);
46204 ix86_expand_vector_init_concat (hmode, second [j],
46205 &first [i], 2);
46206 }
46207 n >>= 1;
46208 for (i = j = 0; i < n; i += 2, j++)
46209 {
46210 third[j] = gen_reg_rtx (gmode);
46211 ix86_expand_vector_init_concat (gmode, third[j],
46212 &second[i], 2);
46213 }
46214 n >>= 1;
46215 ix86_expand_vector_init_concat (mode, target, third, n);
46216 }
46217 else if (n > 2)
46218 {
46219 gcc_assert (hmode != VOIDmode);
46220 for (i = j = 0; i < n; i += 2, j++)
46221 {
46222 second[j] = gen_reg_rtx (hmode);
46223 ix86_expand_vector_init_concat (hmode, second [j],
46224 &first [i], 2);
46225 }
46226 n >>= 1;
46227 ix86_expand_vector_init_concat (mode, target, second, n);
46228 }
46229 else
46230 ix86_expand_vector_init_concat (mode, target, first, n);
46231 break;
46232
46233 default:
46234 gcc_unreachable ();
46235 }
46236 }
46237
46238 /* A subroutine of ix86_expand_vector_init_general. Use vector
46239 interleave to handle the most general case: all values variable,
46240 and none identical. */
46241
46242 static void
46243 ix86_expand_vector_init_interleave (machine_mode mode,
46244 rtx target, rtx *ops, int n)
46245 {
46246 machine_mode first_imode, second_imode, third_imode, inner_mode;
46247 int i, j;
46248 rtx op0, op1;
46249 rtx (*gen_load_even) (rtx, rtx, rtx);
46250 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
46251 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
46252
46253 switch (mode)
46254 {
46255 case V8HImode:
46256 gen_load_even = gen_vec_setv8hi;
46257 gen_interleave_first_low = gen_vec_interleave_lowv4si;
46258 gen_interleave_second_low = gen_vec_interleave_lowv2di;
46259 inner_mode = HImode;
46260 first_imode = V4SImode;
46261 second_imode = V2DImode;
46262 third_imode = VOIDmode;
46263 break;
46264 case V16QImode:
46265 gen_load_even = gen_vec_setv16qi;
46266 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
46267 gen_interleave_second_low = gen_vec_interleave_lowv4si;
46268 inner_mode = QImode;
46269 first_imode = V8HImode;
46270 second_imode = V4SImode;
46271 third_imode = V2DImode;
46272 break;
46273 default:
46274 gcc_unreachable ();
46275 }
46276
46277 for (i = 0; i < n; i++)
46278 {
46279 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
46280 op0 = gen_reg_rtx (SImode);
46281 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
46282
46283 /* Insert the SImode value as low element of V4SImode vector. */
46284 op1 = gen_reg_rtx (V4SImode);
46285 op0 = gen_rtx_VEC_MERGE (V4SImode,
46286 gen_rtx_VEC_DUPLICATE (V4SImode,
46287 op0),
46288 CONST0_RTX (V4SImode),
46289 const1_rtx);
46290 emit_insn (gen_rtx_SET (op1, op0));
46291
46292 /* Cast the V4SImode vector back to a vector in orignal mode. */
46293 op0 = gen_reg_rtx (mode);
46294 emit_move_insn (op0, gen_lowpart (mode, op1));
46295
46296 /* Load even elements into the second position. */
46297 emit_insn (gen_load_even (op0,
46298 force_reg (inner_mode,
46299 ops [i + i + 1]),
46300 const1_rtx));
46301
46302 /* Cast vector to FIRST_IMODE vector. */
46303 ops[i] = gen_reg_rtx (first_imode);
46304 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
46305 }
46306
46307 /* Interleave low FIRST_IMODE vectors. */
46308 for (i = j = 0; i < n; i += 2, j++)
46309 {
46310 op0 = gen_reg_rtx (first_imode);
46311 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
46312
46313 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
46314 ops[j] = gen_reg_rtx (second_imode);
46315 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
46316 }
46317
46318 /* Interleave low SECOND_IMODE vectors. */
46319 switch (second_imode)
46320 {
46321 case V4SImode:
46322 for (i = j = 0; i < n / 2; i += 2, j++)
46323 {
46324 op0 = gen_reg_rtx (second_imode);
46325 emit_insn (gen_interleave_second_low (op0, ops[i],
46326 ops[i + 1]));
46327
46328 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
46329 vector. */
46330 ops[j] = gen_reg_rtx (third_imode);
46331 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
46332 }
46333 second_imode = V2DImode;
46334 gen_interleave_second_low = gen_vec_interleave_lowv2di;
46335 /* FALLTHRU */
46336
46337 case V2DImode:
46338 op0 = gen_reg_rtx (second_imode);
46339 emit_insn (gen_interleave_second_low (op0, ops[0],
46340 ops[1]));
46341
46342 /* Cast the SECOND_IMODE vector back to a vector on original
46343 mode. */
46344 emit_insn (gen_rtx_SET (target, gen_lowpart (mode, op0)));
46345 break;
46346
46347 default:
46348 gcc_unreachable ();
46349 }
46350 }
46351
46352 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
46353 all values variable, and none identical. */
46354
46355 static void
46356 ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
46357 rtx target, rtx vals)
46358 {
46359 rtx ops[64], op0, op1, op2, op3, op4, op5;
46360 machine_mode half_mode = VOIDmode;
46361 machine_mode quarter_mode = VOIDmode;
46362 int n, i;
46363
46364 switch (mode)
46365 {
46366 case V2SFmode:
46367 case V2SImode:
46368 if (!mmx_ok && !TARGET_SSE)
46369 break;
46370 /* FALLTHRU */
46371
46372 case V16SImode:
46373 case V16SFmode:
46374 case V8DFmode:
46375 case V8DImode:
46376 case V8SFmode:
46377 case V8SImode:
46378 case V4DFmode:
46379 case V4DImode:
46380 case V4SFmode:
46381 case V4SImode:
46382 case V2DFmode:
46383 case V2DImode:
46384 n = GET_MODE_NUNITS (mode);
46385 for (i = 0; i < n; i++)
46386 ops[i] = XVECEXP (vals, 0, i);
46387 ix86_expand_vector_init_concat (mode, target, ops, n);
46388 return;
46389
46390 case V32QImode:
46391 half_mode = V16QImode;
46392 goto half;
46393
46394 case V16HImode:
46395 half_mode = V8HImode;
46396 goto half;
46397
46398 half:
46399 n = GET_MODE_NUNITS (mode);
46400 for (i = 0; i < n; i++)
46401 ops[i] = XVECEXP (vals, 0, i);
46402 op0 = gen_reg_rtx (half_mode);
46403 op1 = gen_reg_rtx (half_mode);
46404 ix86_expand_vector_init_interleave (half_mode, op0, ops,
46405 n >> 2);
46406 ix86_expand_vector_init_interleave (half_mode, op1,
46407 &ops [n >> 1], n >> 2);
46408 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op0, op1)));
46409 return;
46410
46411 case V64QImode:
46412 quarter_mode = V16QImode;
46413 half_mode = V32QImode;
46414 goto quarter;
46415
46416 case V32HImode:
46417 quarter_mode = V8HImode;
46418 half_mode = V16HImode;
46419 goto quarter;
46420
46421 quarter:
46422 n = GET_MODE_NUNITS (mode);
46423 for (i = 0; i < n; i++)
46424 ops[i] = XVECEXP (vals, 0, i);
46425 op0 = gen_reg_rtx (quarter_mode);
46426 op1 = gen_reg_rtx (quarter_mode);
46427 op2 = gen_reg_rtx (quarter_mode);
46428 op3 = gen_reg_rtx (quarter_mode);
46429 op4 = gen_reg_rtx (half_mode);
46430 op5 = gen_reg_rtx (half_mode);
46431 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
46432 n >> 3);
46433 ix86_expand_vector_init_interleave (quarter_mode, op1,
46434 &ops [n >> 2], n >> 3);
46435 ix86_expand_vector_init_interleave (quarter_mode, op2,
46436 &ops [n >> 1], n >> 3);
46437 ix86_expand_vector_init_interleave (quarter_mode, op3,
46438 &ops [(n >> 1) | (n >> 2)], n >> 3);
46439 emit_insn (gen_rtx_SET (op4, gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
46440 emit_insn (gen_rtx_SET (op5, gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
46441 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op4, op5)));
46442 return;
46443
46444 case V16QImode:
46445 if (!TARGET_SSE4_1)
46446 break;
46447 /* FALLTHRU */
46448
46449 case V8HImode:
46450 if (!TARGET_SSE2)
46451 break;
46452
46453 /* Don't use ix86_expand_vector_init_interleave if we can't
46454 move from GPR to SSE register directly. */
46455 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
46456 break;
46457
46458 n = GET_MODE_NUNITS (mode);
46459 for (i = 0; i < n; i++)
46460 ops[i] = XVECEXP (vals, 0, i);
46461 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
46462 return;
46463
46464 case V4HImode:
46465 case V8QImode:
46466 break;
46467
46468 default:
46469 gcc_unreachable ();
46470 }
46471
46472 {
46473 int i, j, n_elts, n_words, n_elt_per_word;
46474 machine_mode inner_mode;
46475 rtx words[4], shift;
46476
46477 inner_mode = GET_MODE_INNER (mode);
46478 n_elts = GET_MODE_NUNITS (mode);
46479 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
46480 n_elt_per_word = n_elts / n_words;
46481 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
46482
46483 for (i = 0; i < n_words; ++i)
46484 {
46485 rtx word = NULL_RTX;
46486
46487 for (j = 0; j < n_elt_per_word; ++j)
46488 {
46489 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
46490 elt = convert_modes (word_mode, inner_mode, elt, true);
46491
46492 if (j == 0)
46493 word = elt;
46494 else
46495 {
46496 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
46497 word, 1, OPTAB_LIB_WIDEN);
46498 word = expand_simple_binop (word_mode, IOR, word, elt,
46499 word, 1, OPTAB_LIB_WIDEN);
46500 }
46501 }
46502
46503 words[i] = word;
46504 }
46505
46506 if (n_words == 1)
46507 emit_move_insn (target, gen_lowpart (mode, words[0]));
46508 else if (n_words == 2)
46509 {
46510 rtx tmp = gen_reg_rtx (mode);
46511 emit_clobber (tmp);
46512 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
46513 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
46514 emit_move_insn (target, tmp);
46515 }
46516 else if (n_words == 4)
46517 {
46518 rtx tmp = gen_reg_rtx (V4SImode);
46519 gcc_assert (word_mode == SImode);
46520 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
46521 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
46522 emit_move_insn (target, gen_lowpart (mode, tmp));
46523 }
46524 else
46525 gcc_unreachable ();
46526 }
46527 }
46528
46529 /* Initialize vector TARGET via VALS. Suppress the use of MMX
46530 instructions unless MMX_OK is true. */
46531
46532 void
46533 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
46534 {
46535 machine_mode mode = GET_MODE (target);
46536 machine_mode inner_mode = GET_MODE_INNER (mode);
46537 int n_elts = GET_MODE_NUNITS (mode);
46538 int n_var = 0, one_var = -1;
46539 bool all_same = true, all_const_zero = true;
46540 int i;
46541 rtx x;
46542
46543 for (i = 0; i < n_elts; ++i)
46544 {
46545 x = XVECEXP (vals, 0, i);
46546 if (!(CONST_SCALAR_INT_P (x)
46547 || CONST_DOUBLE_P (x)
46548 || CONST_FIXED_P (x)))
46549 n_var++, one_var = i;
46550 else if (x != CONST0_RTX (inner_mode))
46551 all_const_zero = false;
46552 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
46553 all_same = false;
46554 }
46555
46556 /* Constants are best loaded from the constant pool. */
46557 if (n_var == 0)
46558 {
46559 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
46560 return;
46561 }
46562
46563 /* If all values are identical, broadcast the value. */
46564 if (all_same
46565 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
46566 XVECEXP (vals, 0, 0)))
46567 return;
46568
46569 /* Values where only one field is non-constant are best loaded from
46570 the pool and overwritten via move later. */
46571 if (n_var == 1)
46572 {
46573 if (all_const_zero
46574 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
46575 XVECEXP (vals, 0, one_var),
46576 one_var))
46577 return;
46578
46579 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
46580 return;
46581 }
46582
46583 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
46584 }
46585
46586 void
46587 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
46588 {
46589 machine_mode mode = GET_MODE (target);
46590 machine_mode inner_mode = GET_MODE_INNER (mode);
46591 machine_mode half_mode;
46592 bool use_vec_merge = false;
46593 rtx tmp;
46594 static rtx (*gen_extract[6][2]) (rtx, rtx)
46595 = {
46596 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
46597 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
46598 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
46599 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
46600 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
46601 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
46602 };
46603 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
46604 = {
46605 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
46606 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
46607 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
46608 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
46609 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
46610 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
46611 };
46612 int i, j, n;
46613 machine_mode mmode = VOIDmode;
46614 rtx (*gen_blendm) (rtx, rtx, rtx, rtx);
46615
46616 switch (mode)
46617 {
46618 case V2SFmode:
46619 case V2SImode:
46620 if (mmx_ok)
46621 {
46622 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
46623 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
46624 if (elt == 0)
46625 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
46626 else
46627 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
46628 emit_insn (gen_rtx_SET (target, tmp));
46629 return;
46630 }
46631 break;
46632
46633 case V2DImode:
46634 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
46635 if (use_vec_merge)
46636 break;
46637
46638 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
46639 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
46640 if (elt == 0)
46641 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
46642 else
46643 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
46644 emit_insn (gen_rtx_SET (target, tmp));
46645 return;
46646
46647 case V2DFmode:
46648 {
46649 rtx op0, op1;
46650
46651 /* For the two element vectors, we implement a VEC_CONCAT with
46652 the extraction of the other element. */
46653
46654 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
46655 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
46656
46657 if (elt == 0)
46658 op0 = val, op1 = tmp;
46659 else
46660 op0 = tmp, op1 = val;
46661
46662 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
46663 emit_insn (gen_rtx_SET (target, tmp));
46664 }
46665 return;
46666
46667 case V4SFmode:
46668 use_vec_merge = TARGET_SSE4_1;
46669 if (use_vec_merge)
46670 break;
46671
46672 switch (elt)
46673 {
46674 case 0:
46675 use_vec_merge = true;
46676 break;
46677
46678 case 1:
46679 /* tmp = target = A B C D */
46680 tmp = copy_to_reg (target);
46681 /* target = A A B B */
46682 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
46683 /* target = X A B B */
46684 ix86_expand_vector_set (false, target, val, 0);
46685 /* target = A X C D */
46686 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
46687 const1_rtx, const0_rtx,
46688 GEN_INT (2+4), GEN_INT (3+4)));
46689 return;
46690
46691 case 2:
46692 /* tmp = target = A B C D */
46693 tmp = copy_to_reg (target);
46694 /* tmp = X B C D */
46695 ix86_expand_vector_set (false, tmp, val, 0);
46696 /* target = A B X D */
46697 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
46698 const0_rtx, const1_rtx,
46699 GEN_INT (0+4), GEN_INT (3+4)));
46700 return;
46701
46702 case 3:
46703 /* tmp = target = A B C D */
46704 tmp = copy_to_reg (target);
46705 /* tmp = X B C D */
46706 ix86_expand_vector_set (false, tmp, val, 0);
46707 /* target = A B X D */
46708 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
46709 const0_rtx, const1_rtx,
46710 GEN_INT (2+4), GEN_INT (0+4)));
46711 return;
46712
46713 default:
46714 gcc_unreachable ();
46715 }
46716 break;
46717
46718 case V4SImode:
46719 use_vec_merge = TARGET_SSE4_1;
46720 if (use_vec_merge)
46721 break;
46722
46723 /* Element 0 handled by vec_merge below. */
46724 if (elt == 0)
46725 {
46726 use_vec_merge = true;
46727 break;
46728 }
46729
46730 if (TARGET_SSE2)
46731 {
46732 /* With SSE2, use integer shuffles to swap element 0 and ELT,
46733 store into element 0, then shuffle them back. */
46734
46735 rtx order[4];
46736
46737 order[0] = GEN_INT (elt);
46738 order[1] = const1_rtx;
46739 order[2] = const2_rtx;
46740 order[3] = GEN_INT (3);
46741 order[elt] = const0_rtx;
46742
46743 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
46744 order[1], order[2], order[3]));
46745
46746 ix86_expand_vector_set (false, target, val, 0);
46747
46748 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
46749 order[1], order[2], order[3]));
46750 }
46751 else
46752 {
46753 /* For SSE1, we have to reuse the V4SF code. */
46754 rtx t = gen_reg_rtx (V4SFmode);
46755 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
46756 emit_move_insn (target, gen_lowpart (mode, t));
46757 }
46758 return;
46759
46760 case V8HImode:
46761 use_vec_merge = TARGET_SSE2;
46762 break;
46763 case V4HImode:
46764 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
46765 break;
46766
46767 case V16QImode:
46768 use_vec_merge = TARGET_SSE4_1;
46769 break;
46770
46771 case V8QImode:
46772 break;
46773
46774 case V32QImode:
46775 half_mode = V16QImode;
46776 j = 0;
46777 n = 16;
46778 goto half;
46779
46780 case V16HImode:
46781 half_mode = V8HImode;
46782 j = 1;
46783 n = 8;
46784 goto half;
46785
46786 case V8SImode:
46787 half_mode = V4SImode;
46788 j = 2;
46789 n = 4;
46790 goto half;
46791
46792 case V4DImode:
46793 half_mode = V2DImode;
46794 j = 3;
46795 n = 2;
46796 goto half;
46797
46798 case V8SFmode:
46799 half_mode = V4SFmode;
46800 j = 4;
46801 n = 4;
46802 goto half;
46803
46804 case V4DFmode:
46805 half_mode = V2DFmode;
46806 j = 5;
46807 n = 2;
46808 goto half;
46809
46810 half:
46811 /* Compute offset. */
46812 i = elt / n;
46813 elt %= n;
46814
46815 gcc_assert (i <= 1);
46816
46817 /* Extract the half. */
46818 tmp = gen_reg_rtx (half_mode);
46819 emit_insn (gen_extract[j][i] (tmp, target));
46820
46821 /* Put val in tmp at elt. */
46822 ix86_expand_vector_set (false, tmp, val, elt);
46823
46824 /* Put it back. */
46825 emit_insn (gen_insert[j][i] (target, target, tmp));
46826 return;
46827
46828 case V8DFmode:
46829 if (TARGET_AVX512F)
46830 {
46831 mmode = QImode;
46832 gen_blendm = gen_avx512f_blendmv8df;
46833 }
46834 break;
46835
46836 case V8DImode:
46837 if (TARGET_AVX512F)
46838 {
46839 mmode = QImode;
46840 gen_blendm = gen_avx512f_blendmv8di;
46841 }
46842 break;
46843
46844 case V16SFmode:
46845 if (TARGET_AVX512F)
46846 {
46847 mmode = HImode;
46848 gen_blendm = gen_avx512f_blendmv16sf;
46849 }
46850 break;
46851
46852 case V16SImode:
46853 if (TARGET_AVX512F)
46854 {
46855 mmode = HImode;
46856 gen_blendm = gen_avx512f_blendmv16si;
46857 }
46858 break;
46859
46860 case V32HImode:
46861 if (TARGET_AVX512F && TARGET_AVX512BW)
46862 {
46863 mmode = SImode;
46864 gen_blendm = gen_avx512bw_blendmv32hi;
46865 }
46866 break;
46867
46868 case V64QImode:
46869 if (TARGET_AVX512F && TARGET_AVX512BW)
46870 {
46871 mmode = DImode;
46872 gen_blendm = gen_avx512bw_blendmv64qi;
46873 }
46874 break;
46875
46876 default:
46877 break;
46878 }
46879
46880 if (mmode != VOIDmode)
46881 {
46882 tmp = gen_reg_rtx (mode);
46883 emit_insn (gen_rtx_SET (tmp, gen_rtx_VEC_DUPLICATE (mode, val)));
46884 emit_insn (gen_blendm (target, tmp, target,
46885 force_reg (mmode,
46886 gen_int_mode (1 << elt, mmode))));
46887 }
46888 else if (use_vec_merge)
46889 {
46890 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
46891 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
46892 emit_insn (gen_rtx_SET (target, tmp));
46893 }
46894 else
46895 {
46896 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
46897
46898 emit_move_insn (mem, target);
46899
46900 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
46901 emit_move_insn (tmp, val);
46902
46903 emit_move_insn (target, mem);
46904 }
46905 }
46906
46907 void
46908 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
46909 {
46910 machine_mode mode = GET_MODE (vec);
46911 machine_mode inner_mode = GET_MODE_INNER (mode);
46912 bool use_vec_extr = false;
46913 rtx tmp;
46914
46915 switch (mode)
46916 {
46917 case V2SImode:
46918 case V2SFmode:
46919 if (!mmx_ok)
46920 break;
46921 /* FALLTHRU */
46922
46923 case V2DFmode:
46924 case V2DImode:
46925 use_vec_extr = true;
46926 break;
46927
46928 case V4SFmode:
46929 use_vec_extr = TARGET_SSE4_1;
46930 if (use_vec_extr)
46931 break;
46932
46933 switch (elt)
46934 {
46935 case 0:
46936 tmp = vec;
46937 break;
46938
46939 case 1:
46940 case 3:
46941 tmp = gen_reg_rtx (mode);
46942 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
46943 GEN_INT (elt), GEN_INT (elt),
46944 GEN_INT (elt+4), GEN_INT (elt+4)));
46945 break;
46946
46947 case 2:
46948 tmp = gen_reg_rtx (mode);
46949 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
46950 break;
46951
46952 default:
46953 gcc_unreachable ();
46954 }
46955 vec = tmp;
46956 use_vec_extr = true;
46957 elt = 0;
46958 break;
46959
46960 case V4SImode:
46961 use_vec_extr = TARGET_SSE4_1;
46962 if (use_vec_extr)
46963 break;
46964
46965 if (TARGET_SSE2)
46966 {
46967 switch (elt)
46968 {
46969 case 0:
46970 tmp = vec;
46971 break;
46972
46973 case 1:
46974 case 3:
46975 tmp = gen_reg_rtx (mode);
46976 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
46977 GEN_INT (elt), GEN_INT (elt),
46978 GEN_INT (elt), GEN_INT (elt)));
46979 break;
46980
46981 case 2:
46982 tmp = gen_reg_rtx (mode);
46983 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
46984 break;
46985
46986 default:
46987 gcc_unreachable ();
46988 }
46989 vec = tmp;
46990 use_vec_extr = true;
46991 elt = 0;
46992 }
46993 else
46994 {
46995 /* For SSE1, we have to reuse the V4SF code. */
46996 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
46997 gen_lowpart (V4SFmode, vec), elt);
46998 return;
46999 }
47000 break;
47001
47002 case V8HImode:
47003 use_vec_extr = TARGET_SSE2;
47004 break;
47005 case V4HImode:
47006 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
47007 break;
47008
47009 case V16QImode:
47010 use_vec_extr = TARGET_SSE4_1;
47011 break;
47012
47013 case V8SFmode:
47014 if (TARGET_AVX)
47015 {
47016 tmp = gen_reg_rtx (V4SFmode);
47017 if (elt < 4)
47018 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
47019 else
47020 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
47021 ix86_expand_vector_extract (false, target, tmp, elt & 3);
47022 return;
47023 }
47024 break;
47025
47026 case V4DFmode:
47027 if (TARGET_AVX)
47028 {
47029 tmp = gen_reg_rtx (V2DFmode);
47030 if (elt < 2)
47031 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
47032 else
47033 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
47034 ix86_expand_vector_extract (false, target, tmp, elt & 1);
47035 return;
47036 }
47037 break;
47038
47039 case V32QImode:
47040 if (TARGET_AVX)
47041 {
47042 tmp = gen_reg_rtx (V16QImode);
47043 if (elt < 16)
47044 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
47045 else
47046 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
47047 ix86_expand_vector_extract (false, target, tmp, elt & 15);
47048 return;
47049 }
47050 break;
47051
47052 case V16HImode:
47053 if (TARGET_AVX)
47054 {
47055 tmp = gen_reg_rtx (V8HImode);
47056 if (elt < 8)
47057 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
47058 else
47059 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
47060 ix86_expand_vector_extract (false, target, tmp, elt & 7);
47061 return;
47062 }
47063 break;
47064
47065 case V8SImode:
47066 if (TARGET_AVX)
47067 {
47068 tmp = gen_reg_rtx (V4SImode);
47069 if (elt < 4)
47070 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
47071 else
47072 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
47073 ix86_expand_vector_extract (false, target, tmp, elt & 3);
47074 return;
47075 }
47076 break;
47077
47078 case V4DImode:
47079 if (TARGET_AVX)
47080 {
47081 tmp = gen_reg_rtx (V2DImode);
47082 if (elt < 2)
47083 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
47084 else
47085 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
47086 ix86_expand_vector_extract (false, target, tmp, elt & 1);
47087 return;
47088 }
47089 break;
47090
47091 case V32HImode:
47092 if (TARGET_AVX512BW)
47093 {
47094 tmp = gen_reg_rtx (V16HImode);
47095 if (elt < 16)
47096 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
47097 else
47098 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
47099 ix86_expand_vector_extract (false, target, tmp, elt & 15);
47100 return;
47101 }
47102 break;
47103
47104 case V64QImode:
47105 if (TARGET_AVX512BW)
47106 {
47107 tmp = gen_reg_rtx (V32QImode);
47108 if (elt < 32)
47109 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
47110 else
47111 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
47112 ix86_expand_vector_extract (false, target, tmp, elt & 31);
47113 return;
47114 }
47115 break;
47116
47117 case V16SFmode:
47118 tmp = gen_reg_rtx (V8SFmode);
47119 if (elt < 8)
47120 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
47121 else
47122 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
47123 ix86_expand_vector_extract (false, target, tmp, elt & 7);
47124 return;
47125
47126 case V8DFmode:
47127 tmp = gen_reg_rtx (V4DFmode);
47128 if (elt < 4)
47129 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
47130 else
47131 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
47132 ix86_expand_vector_extract (false, target, tmp, elt & 3);
47133 return;
47134
47135 case V16SImode:
47136 tmp = gen_reg_rtx (V8SImode);
47137 if (elt < 8)
47138 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
47139 else
47140 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
47141 ix86_expand_vector_extract (false, target, tmp, elt & 7);
47142 return;
47143
47144 case V8DImode:
47145 tmp = gen_reg_rtx (V4DImode);
47146 if (elt < 4)
47147 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
47148 else
47149 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
47150 ix86_expand_vector_extract (false, target, tmp, elt & 3);
47151 return;
47152
47153 case V8QImode:
47154 /* ??? Could extract the appropriate HImode element and shift. */
47155 default:
47156 break;
47157 }
47158
47159 if (use_vec_extr)
47160 {
47161 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
47162 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
47163
47164 /* Let the rtl optimizers know about the zero extension performed. */
47165 if (inner_mode == QImode || inner_mode == HImode)
47166 {
47167 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
47168 target = gen_lowpart (SImode, target);
47169 }
47170
47171 emit_insn (gen_rtx_SET (target, tmp));
47172 }
47173 else
47174 {
47175 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
47176
47177 emit_move_insn (mem, vec);
47178
47179 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
47180 emit_move_insn (target, tmp);
47181 }
47182 }
47183
47184 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
47185 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
47186 The upper bits of DEST are undefined, though they shouldn't cause
47187 exceptions (some bits from src or all zeros are ok). */
47188
47189 static void
47190 emit_reduc_half (rtx dest, rtx src, int i)
47191 {
47192 rtx tem, d = dest;
47193 switch (GET_MODE (src))
47194 {
47195 case V4SFmode:
47196 if (i == 128)
47197 tem = gen_sse_movhlps (dest, src, src);
47198 else
47199 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
47200 GEN_INT (1 + 4), GEN_INT (1 + 4));
47201 break;
47202 case V2DFmode:
47203 tem = gen_vec_interleave_highv2df (dest, src, src);
47204 break;
47205 case V16QImode:
47206 case V8HImode:
47207 case V4SImode:
47208 case V2DImode:
47209 d = gen_reg_rtx (V1TImode);
47210 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
47211 GEN_INT (i / 2));
47212 break;
47213 case V8SFmode:
47214 if (i == 256)
47215 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
47216 else
47217 tem = gen_avx_shufps256 (dest, src, src,
47218 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
47219 break;
47220 case V4DFmode:
47221 if (i == 256)
47222 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
47223 else
47224 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
47225 break;
47226 case V32QImode:
47227 case V16HImode:
47228 case V8SImode:
47229 case V4DImode:
47230 if (i == 256)
47231 {
47232 if (GET_MODE (dest) != V4DImode)
47233 d = gen_reg_rtx (V4DImode);
47234 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
47235 gen_lowpart (V4DImode, src),
47236 const1_rtx);
47237 }
47238 else
47239 {
47240 d = gen_reg_rtx (V2TImode);
47241 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
47242 GEN_INT (i / 2));
47243 }
47244 break;
47245 case V64QImode:
47246 case V32HImode:
47247 case V16SImode:
47248 case V16SFmode:
47249 case V8DImode:
47250 case V8DFmode:
47251 if (i > 128)
47252 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
47253 gen_lowpart (V16SImode, src),
47254 gen_lowpart (V16SImode, src),
47255 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
47256 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
47257 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
47258 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
47259 GEN_INT (0xC), GEN_INT (0xD),
47260 GEN_INT (0xE), GEN_INT (0xF),
47261 GEN_INT (0x10), GEN_INT (0x11),
47262 GEN_INT (0x12), GEN_INT (0x13),
47263 GEN_INT (0x14), GEN_INT (0x15),
47264 GEN_INT (0x16), GEN_INT (0x17));
47265 else
47266 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
47267 gen_lowpart (V16SImode, src),
47268 GEN_INT (i == 128 ? 0x2 : 0x1),
47269 GEN_INT (0x3),
47270 GEN_INT (0x3),
47271 GEN_INT (0x3),
47272 GEN_INT (i == 128 ? 0x6 : 0x5),
47273 GEN_INT (0x7),
47274 GEN_INT (0x7),
47275 GEN_INT (0x7),
47276 GEN_INT (i == 128 ? 0xA : 0x9),
47277 GEN_INT (0xB),
47278 GEN_INT (0xB),
47279 GEN_INT (0xB),
47280 GEN_INT (i == 128 ? 0xE : 0xD),
47281 GEN_INT (0xF),
47282 GEN_INT (0xF),
47283 GEN_INT (0xF));
47284 break;
47285 default:
47286 gcc_unreachable ();
47287 }
47288 emit_insn (tem);
47289 if (d != dest)
47290 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
47291 }
47292
47293 /* Expand a vector reduction. FN is the binary pattern to reduce;
47294 DEST is the destination; IN is the input vector. */
47295
47296 void
47297 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
47298 {
47299 rtx half, dst, vec = in;
47300 machine_mode mode = GET_MODE (in);
47301 int i;
47302
47303 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
47304 if (TARGET_SSE4_1
47305 && mode == V8HImode
47306 && fn == gen_uminv8hi3)
47307 {
47308 emit_insn (gen_sse4_1_phminposuw (dest, in));
47309 return;
47310 }
47311
47312 for (i = GET_MODE_BITSIZE (mode);
47313 i > GET_MODE_UNIT_BITSIZE (mode);
47314 i >>= 1)
47315 {
47316 half = gen_reg_rtx (mode);
47317 emit_reduc_half (half, vec, i);
47318 if (i == GET_MODE_UNIT_BITSIZE (mode) * 2)
47319 dst = dest;
47320 else
47321 dst = gen_reg_rtx (mode);
47322 emit_insn (fn (dst, half, vec));
47323 vec = dst;
47324 }
47325 }
47326 \f
47327 /* Target hook for scalar_mode_supported_p. */
47328 static bool
47329 ix86_scalar_mode_supported_p (machine_mode mode)
47330 {
47331 if (DECIMAL_FLOAT_MODE_P (mode))
47332 return default_decimal_float_supported_p ();
47333 else if (mode == TFmode)
47334 return true;
47335 else
47336 return default_scalar_mode_supported_p (mode);
47337 }
47338
47339 /* Implements target hook vector_mode_supported_p. */
47340 static bool
47341 ix86_vector_mode_supported_p (machine_mode mode)
47342 {
47343 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
47344 return true;
47345 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
47346 return true;
47347 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
47348 return true;
47349 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
47350 return true;
47351 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
47352 return true;
47353 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
47354 return true;
47355 return false;
47356 }
47357
47358 /* Implement target hook libgcc_floating_mode_supported_p. */
47359 static bool
47360 ix86_libgcc_floating_mode_supported_p (machine_mode mode)
47361 {
47362 switch (mode)
47363 {
47364 case SFmode:
47365 case DFmode:
47366 case XFmode:
47367 return true;
47368
47369 case TFmode:
47370 #ifdef IX86_NO_LIBGCC_TFMODE
47371 return false;
47372 #elif defined IX86_MAYBE_NO_LIBGCC_TFMODE
47373 return TARGET_LONG_DOUBLE_128;
47374 #else
47375 return true;
47376 #endif
47377
47378 default:
47379 return false;
47380 }
47381 }
47382
47383 /* Target hook for c_mode_for_suffix. */
47384 static machine_mode
47385 ix86_c_mode_for_suffix (char suffix)
47386 {
47387 if (suffix == 'q')
47388 return TFmode;
47389 if (suffix == 'w')
47390 return XFmode;
47391
47392 return VOIDmode;
47393 }
47394
47395 /* Worker function for TARGET_MD_ASM_ADJUST.
47396
47397 We implement asm flag outputs, and maintain source compatibility
47398 with the old cc0-based compiler. */
47399
47400 static rtx_insn *
47401 ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &/*inputs*/,
47402 vec<const char *> &constraints,
47403 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
47404 {
47405 clobbers.safe_push (gen_rtx_REG (CCFPmode, FPSR_REG));
47406 SET_HARD_REG_BIT (clobbered_regs, FPSR_REG);
47407
47408 bool saw_asm_flag = false;
47409
47410 start_sequence ();
47411 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
47412 {
47413 const char *con = constraints[i];
47414 if (strncmp (con, "=@cc", 4) != 0)
47415 continue;
47416 con += 4;
47417 if (strchr (con, ',') != NULL)
47418 {
47419 error ("alternatives not allowed in asm flag output");
47420 continue;
47421 }
47422
47423 bool invert = false;
47424 if (con[0] == 'n')
47425 invert = true, con++;
47426
47427 machine_mode mode = CCmode;
47428 rtx_code code = UNKNOWN;
47429
47430 switch (con[0])
47431 {
47432 case 'a':
47433 if (con[1] == 0)
47434 mode = CCAmode, code = EQ;
47435 else if (con[1] == 'e' && con[2] == 0)
47436 mode = CCCmode, code = NE;
47437 break;
47438 case 'b':
47439 if (con[1] == 0)
47440 mode = CCCmode, code = EQ;
47441 else if (con[1] == 'e' && con[2] == 0)
47442 mode = CCAmode, code = NE;
47443 break;
47444 case 'c':
47445 if (con[1] == 0)
47446 mode = CCCmode, code = EQ;
47447 break;
47448 case 'e':
47449 if (con[1] == 0)
47450 mode = CCZmode, code = EQ;
47451 break;
47452 case 'g':
47453 if (con[1] == 0)
47454 mode = CCGCmode, code = GT;
47455 else if (con[1] == 'e' && con[2] == 0)
47456 mode = CCGCmode, code = GE;
47457 break;
47458 case 'l':
47459 if (con[1] == 0)
47460 mode = CCGCmode, code = LT;
47461 else if (con[1] == 'e' && con[2] == 0)
47462 mode = CCGCmode, code = LE;
47463 break;
47464 case 'o':
47465 if (con[1] == 0)
47466 mode = CCOmode, code = EQ;
47467 break;
47468 case 'p':
47469 if (con[1] == 0)
47470 mode = CCPmode, code = EQ;
47471 break;
47472 case 's':
47473 if (con[1] == 0)
47474 mode = CCSmode, code = EQ;
47475 break;
47476 case 'z':
47477 if (con[1] == 0)
47478 mode = CCZmode, code = EQ;
47479 break;
47480 }
47481 if (code == UNKNOWN)
47482 {
47483 error ("unknown asm flag output %qs", constraints[i]);
47484 continue;
47485 }
47486 if (invert)
47487 code = reverse_condition (code);
47488
47489 rtx dest = outputs[i];
47490 if (!saw_asm_flag)
47491 {
47492 /* This is the first asm flag output. Here we put the flags
47493 register in as the real output and adjust the condition to
47494 allow it. */
47495 constraints[i] = "=Bf";
47496 outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
47497 saw_asm_flag = true;
47498 }
47499 else
47500 {
47501 /* We don't need the flags register as output twice. */
47502 constraints[i] = "=X";
47503 outputs[i] = gen_rtx_SCRATCH (SImode);
47504 }
47505
47506 rtx x = gen_rtx_REG (mode, FLAGS_REG);
47507 x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
47508
47509 machine_mode dest_mode = GET_MODE (dest);
47510 if (!SCALAR_INT_MODE_P (dest_mode))
47511 {
47512 error ("invalid type for asm flag output");
47513 continue;
47514 }
47515
47516 if (dest_mode == DImode && !TARGET_64BIT)
47517 dest_mode = SImode;
47518
47519 if (dest_mode != QImode)
47520 {
47521 rtx destqi = gen_reg_rtx (QImode);
47522 emit_insn (gen_rtx_SET (destqi, x));
47523
47524 if (TARGET_ZERO_EXTEND_WITH_AND
47525 && optimize_function_for_speed_p (cfun))
47526 {
47527 x = force_reg (dest_mode, const0_rtx);
47528
47529 emit_insn (gen_movstrictqi
47530 (gen_lowpart (QImode, x), destqi));
47531 }
47532 else
47533 x = gen_rtx_ZERO_EXTEND (dest_mode, destqi);
47534 }
47535
47536 if (dest_mode != GET_MODE (dest))
47537 {
47538 rtx tmp = gen_reg_rtx (SImode);
47539
47540 emit_insn (gen_rtx_SET (tmp, x));
47541 emit_insn (gen_zero_extendsidi2 (dest, tmp));
47542 }
47543 else
47544 emit_insn (gen_rtx_SET (dest, x));
47545 }
47546 rtx_insn *seq = get_insns ();
47547 end_sequence ();
47548
47549 if (saw_asm_flag)
47550 return seq;
47551 else
47552 {
47553 /* If we had no asm flag outputs, clobber the flags. */
47554 clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
47555 SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
47556 return NULL;
47557 }
47558 }
47559
47560 /* Implements target vector targetm.asm.encode_section_info. */
47561
47562 static void ATTRIBUTE_UNUSED
47563 ix86_encode_section_info (tree decl, rtx rtl, int first)
47564 {
47565 default_encode_section_info (decl, rtl, first);
47566
47567 if (ix86_in_large_data_p (decl))
47568 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
47569 }
47570
47571 /* Worker function for REVERSE_CONDITION. */
47572
47573 enum rtx_code
47574 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
47575 {
47576 return (mode != CCFPmode && mode != CCFPUmode
47577 ? reverse_condition (code)
47578 : reverse_condition_maybe_unordered (code));
47579 }
47580
47581 /* Output code to perform an x87 FP register move, from OPERANDS[1]
47582 to OPERANDS[0]. */
47583
47584 const char *
47585 output_387_reg_move (rtx insn, rtx *operands)
47586 {
47587 if (REG_P (operands[0]))
47588 {
47589 if (REG_P (operands[1])
47590 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
47591 {
47592 if (REGNO (operands[0]) == FIRST_STACK_REG)
47593 return output_387_ffreep (operands, 0);
47594 return "fstp\t%y0";
47595 }
47596 if (STACK_TOP_P (operands[0]))
47597 return "fld%Z1\t%y1";
47598 return "fst\t%y0";
47599 }
47600 else if (MEM_P (operands[0]))
47601 {
47602 gcc_assert (REG_P (operands[1]));
47603 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
47604 return "fstp%Z0\t%y0";
47605 else
47606 {
47607 /* There is no non-popping store to memory for XFmode.
47608 So if we need one, follow the store with a load. */
47609 if (GET_MODE (operands[0]) == XFmode)
47610 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
47611 else
47612 return "fst%Z0\t%y0";
47613 }
47614 }
47615 else
47616 gcc_unreachable();
47617 }
47618
47619 /* Output code to perform a conditional jump to LABEL, if C2 flag in
47620 FP status register is set. */
47621
47622 void
47623 ix86_emit_fp_unordered_jump (rtx label)
47624 {
47625 rtx reg = gen_reg_rtx (HImode);
47626 rtx temp;
47627
47628 emit_insn (gen_x86_fnstsw_1 (reg));
47629
47630 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
47631 {
47632 emit_insn (gen_x86_sahf_1 (reg));
47633
47634 temp = gen_rtx_REG (CCmode, FLAGS_REG);
47635 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
47636 }
47637 else
47638 {
47639 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
47640
47641 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
47642 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
47643 }
47644
47645 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
47646 gen_rtx_LABEL_REF (VOIDmode, label),
47647 pc_rtx);
47648 temp = gen_rtx_SET (pc_rtx, temp);
47649
47650 emit_jump_insn (temp);
47651 predict_jump (REG_BR_PROB_BASE * 10 / 100);
47652 }
47653
47654 /* Output code to perform a log1p XFmode calculation. */
47655
47656 void ix86_emit_i387_log1p (rtx op0, rtx op1)
47657 {
47658 rtx_code_label *label1 = gen_label_rtx ();
47659 rtx_code_label *label2 = gen_label_rtx ();
47660
47661 rtx tmp = gen_reg_rtx (XFmode);
47662 rtx tmp2 = gen_reg_rtx (XFmode);
47663 rtx test;
47664
47665 emit_insn (gen_absxf2 (tmp, op1));
47666 test = gen_rtx_GE (VOIDmode, tmp,
47667 const_double_from_real_value (
47668 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
47669 XFmode));
47670 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
47671
47672 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
47673 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
47674 emit_jump (label2);
47675
47676 emit_label (label1);
47677 emit_move_insn (tmp, CONST1_RTX (XFmode));
47678 emit_insn (gen_addxf3 (tmp, op1, tmp));
47679 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
47680 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
47681
47682 emit_label (label2);
47683 }
47684
47685 /* Emit code for round calculation. */
47686 void ix86_emit_i387_round (rtx op0, rtx op1)
47687 {
47688 machine_mode inmode = GET_MODE (op1);
47689 machine_mode outmode = GET_MODE (op0);
47690 rtx e1, e2, res, tmp, tmp1, half;
47691 rtx scratch = gen_reg_rtx (HImode);
47692 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
47693 rtx_code_label *jump_label = gen_label_rtx ();
47694 rtx insn;
47695 rtx (*gen_abs) (rtx, rtx);
47696 rtx (*gen_neg) (rtx, rtx);
47697
47698 switch (inmode)
47699 {
47700 case SFmode:
47701 gen_abs = gen_abssf2;
47702 break;
47703 case DFmode:
47704 gen_abs = gen_absdf2;
47705 break;
47706 case XFmode:
47707 gen_abs = gen_absxf2;
47708 break;
47709 default:
47710 gcc_unreachable ();
47711 }
47712
47713 switch (outmode)
47714 {
47715 case SFmode:
47716 gen_neg = gen_negsf2;
47717 break;
47718 case DFmode:
47719 gen_neg = gen_negdf2;
47720 break;
47721 case XFmode:
47722 gen_neg = gen_negxf2;
47723 break;
47724 case HImode:
47725 gen_neg = gen_neghi2;
47726 break;
47727 case SImode:
47728 gen_neg = gen_negsi2;
47729 break;
47730 case DImode:
47731 gen_neg = gen_negdi2;
47732 break;
47733 default:
47734 gcc_unreachable ();
47735 }
47736
47737 e1 = gen_reg_rtx (inmode);
47738 e2 = gen_reg_rtx (inmode);
47739 res = gen_reg_rtx (outmode);
47740
47741 half = const_double_from_real_value (dconsthalf, inmode);
47742
47743 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
47744
47745 /* scratch = fxam(op1) */
47746 emit_insn (gen_rtx_SET (scratch,
47747 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
47748 UNSPEC_FXAM)));
47749 /* e1 = fabs(op1) */
47750 emit_insn (gen_abs (e1, op1));
47751
47752 /* e2 = e1 + 0.5 */
47753 half = force_reg (inmode, half);
47754 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (inmode, e1, half)));
47755
47756 /* res = floor(e2) */
47757 if (inmode != XFmode)
47758 {
47759 tmp1 = gen_reg_rtx (XFmode);
47760
47761 emit_insn (gen_rtx_SET (tmp1, gen_rtx_FLOAT_EXTEND (XFmode, e2)));
47762 }
47763 else
47764 tmp1 = e2;
47765
47766 switch (outmode)
47767 {
47768 case SFmode:
47769 case DFmode:
47770 {
47771 rtx tmp0 = gen_reg_rtx (XFmode);
47772
47773 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
47774
47775 emit_insn (gen_rtx_SET (res,
47776 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
47777 UNSPEC_TRUNC_NOOP)));
47778 }
47779 break;
47780 case XFmode:
47781 emit_insn (gen_frndintxf2_floor (res, tmp1));
47782 break;
47783 case HImode:
47784 emit_insn (gen_lfloorxfhi2 (res, tmp1));
47785 break;
47786 case SImode:
47787 emit_insn (gen_lfloorxfsi2 (res, tmp1));
47788 break;
47789 case DImode:
47790 emit_insn (gen_lfloorxfdi2 (res, tmp1));
47791 break;
47792 default:
47793 gcc_unreachable ();
47794 }
47795
47796 /* flags = signbit(a) */
47797 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
47798
47799 /* if (flags) then res = -res */
47800 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
47801 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
47802 gen_rtx_LABEL_REF (VOIDmode, jump_label),
47803 pc_rtx);
47804 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
47805 predict_jump (REG_BR_PROB_BASE * 50 / 100);
47806 JUMP_LABEL (insn) = jump_label;
47807
47808 emit_insn (gen_neg (res, res));
47809
47810 emit_label (jump_label);
47811 LABEL_NUSES (jump_label) = 1;
47812
47813 emit_move_insn (op0, res);
47814 }
47815
47816 /* Output code to perform a Newton-Rhapson approximation of a single precision
47817 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
47818
47819 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
47820 {
47821 rtx x0, x1, e0, e1;
47822
47823 x0 = gen_reg_rtx (mode);
47824 e0 = gen_reg_rtx (mode);
47825 e1 = gen_reg_rtx (mode);
47826 x1 = gen_reg_rtx (mode);
47827
47828 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
47829
47830 b = force_reg (mode, b);
47831
47832 /* x0 = rcp(b) estimate */
47833 if (mode == V16SFmode || mode == V8DFmode)
47834 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
47835 UNSPEC_RCP14)));
47836 else
47837 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
47838 UNSPEC_RCP)));
47839
47840 /* e0 = x0 * b */
47841 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b)));
47842
47843 /* e0 = x0 * e0 */
47844 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0)));
47845
47846 /* e1 = x0 + x0 */
47847 emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0)));
47848
47849 /* x1 = e1 - e0 */
47850 emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0)));
47851
47852 /* res = a * x1 */
47853 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1)));
47854 }
47855
47856 /* Output code to perform a Newton-Rhapson approximation of a
47857 single precision floating point [reciprocal] square root. */
47858
47859 void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode, bool recip)
47860 {
47861 rtx x0, e0, e1, e2, e3, mthree, mhalf;
47862 REAL_VALUE_TYPE r;
47863 int unspec;
47864
47865 x0 = gen_reg_rtx (mode);
47866 e0 = gen_reg_rtx (mode);
47867 e1 = gen_reg_rtx (mode);
47868 e2 = gen_reg_rtx (mode);
47869 e3 = gen_reg_rtx (mode);
47870
47871 real_from_integer (&r, VOIDmode, -3, SIGNED);
47872 mthree = const_double_from_real_value (r, SFmode);
47873
47874 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
47875 mhalf = const_double_from_real_value (r, SFmode);
47876 unspec = UNSPEC_RSQRT;
47877
47878 if (VECTOR_MODE_P (mode))
47879 {
47880 mthree = ix86_build_const_vector (mode, true, mthree);
47881 mhalf = ix86_build_const_vector (mode, true, mhalf);
47882 /* There is no 512-bit rsqrt. There is however rsqrt14. */
47883 if (GET_MODE_SIZE (mode) == 64)
47884 unspec = UNSPEC_RSQRT14;
47885 }
47886
47887 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
47888 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
47889
47890 a = force_reg (mode, a);
47891
47892 /* x0 = rsqrt(a) estimate */
47893 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
47894 unspec)));
47895
47896 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
47897 if (!recip)
47898 {
47899 rtx zero = force_reg (mode, CONST0_RTX(mode));
47900 rtx mask;
47901
47902 /* Handle masked compare. */
47903 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
47904 {
47905 mask = gen_reg_rtx (HImode);
47906 /* Imm value 0x4 corresponds to not-equal comparison. */
47907 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
47908 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
47909 }
47910 else
47911 {
47912 mask = gen_reg_rtx (mode);
47913 emit_insn (gen_rtx_SET (mask, gen_rtx_NE (mode, zero, a)));
47914 emit_insn (gen_rtx_SET (x0, gen_rtx_AND (mode, x0, mask)));
47915 }
47916 }
47917
47918 /* e0 = x0 * a */
47919 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a)));
47920 /* e1 = e0 * x0 */
47921 emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, e0, x0)));
47922
47923 /* e2 = e1 - 3. */
47924 mthree = force_reg (mode, mthree);
47925 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (mode, e1, mthree)));
47926
47927 mhalf = force_reg (mode, mhalf);
47928 if (recip)
47929 /* e3 = -.5 * x0 */
47930 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, x0, mhalf)));
47931 else
47932 /* e3 = -.5 * e0 */
47933 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, e0, mhalf)));
47934 /* ret = e2 * e3 */
47935 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e2, e3)));
47936 }
47937
47938 #ifdef TARGET_SOLARIS
47939 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
47940
47941 static void
47942 i386_solaris_elf_named_section (const char *name, unsigned int flags,
47943 tree decl)
47944 {
47945 /* With Binutils 2.15, the "@unwind" marker must be specified on
47946 every occurrence of the ".eh_frame" section, not just the first
47947 one. */
47948 if (TARGET_64BIT
47949 && strcmp (name, ".eh_frame") == 0)
47950 {
47951 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
47952 flags & SECTION_WRITE ? "aw" : "a");
47953 return;
47954 }
47955
47956 #ifndef USE_GAS
47957 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
47958 {
47959 solaris_elf_asm_comdat_section (name, flags, decl);
47960 return;
47961 }
47962 #endif
47963
47964 default_elf_asm_named_section (name, flags, decl);
47965 }
47966 #endif /* TARGET_SOLARIS */
47967
47968 /* Return the mangling of TYPE if it is an extended fundamental type. */
47969
47970 static const char *
47971 ix86_mangle_type (const_tree type)
47972 {
47973 type = TYPE_MAIN_VARIANT (type);
47974
47975 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
47976 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
47977 return NULL;
47978
47979 switch (TYPE_MODE (type))
47980 {
47981 case TFmode:
47982 /* __float128 is "g". */
47983 return "g";
47984 case XFmode:
47985 /* "long double" or __float80 is "e". */
47986 return "e";
47987 default:
47988 return NULL;
47989 }
47990 }
47991
47992 /* For 32-bit code we can save PIC register setup by using
47993 __stack_chk_fail_local hidden function instead of calling
47994 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
47995 register, so it is better to call __stack_chk_fail directly. */
47996
47997 static tree ATTRIBUTE_UNUSED
47998 ix86_stack_protect_fail (void)
47999 {
48000 return TARGET_64BIT
48001 ? default_external_stack_protect_fail ()
48002 : default_hidden_stack_protect_fail ();
48003 }
48004
48005 /* Select a format to encode pointers in exception handling data. CODE
48006 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
48007 true if the symbol may be affected by dynamic relocations.
48008
48009 ??? All x86 object file formats are capable of representing this.
48010 After all, the relocation needed is the same as for the call insn.
48011 Whether or not a particular assembler allows us to enter such, I
48012 guess we'll have to see. */
48013 int
48014 asm_preferred_eh_data_format (int code, int global)
48015 {
48016 if (flag_pic)
48017 {
48018 int type = DW_EH_PE_sdata8;
48019 if (!TARGET_64BIT
48020 || ix86_cmodel == CM_SMALL_PIC
48021 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
48022 type = DW_EH_PE_sdata4;
48023 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
48024 }
48025 if (ix86_cmodel == CM_SMALL
48026 || (ix86_cmodel == CM_MEDIUM && code))
48027 return DW_EH_PE_udata4;
48028 return DW_EH_PE_absptr;
48029 }
48030 \f
48031 /* Expand copysign from SIGN to the positive value ABS_VALUE
48032 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
48033 the sign-bit. */
48034 static void
48035 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
48036 {
48037 machine_mode mode = GET_MODE (sign);
48038 rtx sgn = gen_reg_rtx (mode);
48039 if (mask == NULL_RTX)
48040 {
48041 machine_mode vmode;
48042
48043 if (mode == SFmode)
48044 vmode = V4SFmode;
48045 else if (mode == DFmode)
48046 vmode = V2DFmode;
48047 else
48048 vmode = mode;
48049
48050 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
48051 if (!VECTOR_MODE_P (mode))
48052 {
48053 /* We need to generate a scalar mode mask in this case. */
48054 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
48055 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
48056 mask = gen_reg_rtx (mode);
48057 emit_insn (gen_rtx_SET (mask, tmp));
48058 }
48059 }
48060 else
48061 mask = gen_rtx_NOT (mode, mask);
48062 emit_insn (gen_rtx_SET (sgn, gen_rtx_AND (mode, mask, sign)));
48063 emit_insn (gen_rtx_SET (result, gen_rtx_IOR (mode, abs_value, sgn)));
48064 }
48065
48066 /* Expand fabs (OP0) and return a new rtx that holds the result. The
48067 mask for masking out the sign-bit is stored in *SMASK, if that is
48068 non-null. */
48069 static rtx
48070 ix86_expand_sse_fabs (rtx op0, rtx *smask)
48071 {
48072 machine_mode vmode, mode = GET_MODE (op0);
48073 rtx xa, mask;
48074
48075 xa = gen_reg_rtx (mode);
48076 if (mode == SFmode)
48077 vmode = V4SFmode;
48078 else if (mode == DFmode)
48079 vmode = V2DFmode;
48080 else
48081 vmode = mode;
48082 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
48083 if (!VECTOR_MODE_P (mode))
48084 {
48085 /* We need to generate a scalar mode mask in this case. */
48086 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
48087 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
48088 mask = gen_reg_rtx (mode);
48089 emit_insn (gen_rtx_SET (mask, tmp));
48090 }
48091 emit_insn (gen_rtx_SET (xa, gen_rtx_AND (mode, op0, mask)));
48092
48093 if (smask)
48094 *smask = mask;
48095
48096 return xa;
48097 }
48098
48099 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
48100 swapping the operands if SWAP_OPERANDS is true. The expanded
48101 code is a forward jump to a newly created label in case the
48102 comparison is true. The generated label rtx is returned. */
48103 static rtx_code_label *
48104 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
48105 bool swap_operands)
48106 {
48107 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
48108 rtx_code_label *label;
48109 rtx tmp;
48110
48111 if (swap_operands)
48112 std::swap (op0, op1);
48113
48114 label = gen_label_rtx ();
48115 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
48116 emit_insn (gen_rtx_SET (tmp, gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
48117 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
48118 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
48119 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
48120 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
48121 JUMP_LABEL (tmp) = label;
48122
48123 return label;
48124 }
48125
48126 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
48127 using comparison code CODE. Operands are swapped for the comparison if
48128 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
48129 static rtx
48130 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
48131 bool swap_operands)
48132 {
48133 rtx (*insn)(rtx, rtx, rtx, rtx);
48134 machine_mode mode = GET_MODE (op0);
48135 rtx mask = gen_reg_rtx (mode);
48136
48137 if (swap_operands)
48138 std::swap (op0, op1);
48139
48140 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
48141
48142 emit_insn (insn (mask, op0, op1,
48143 gen_rtx_fmt_ee (code, mode, op0, op1)));
48144 return mask;
48145 }
48146
48147 /* Generate and return a rtx of mode MODE for 2**n where n is the number
48148 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
48149 static rtx
48150 ix86_gen_TWO52 (machine_mode mode)
48151 {
48152 REAL_VALUE_TYPE TWO52r;
48153 rtx TWO52;
48154
48155 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
48156 TWO52 = const_double_from_real_value (TWO52r, mode);
48157 TWO52 = force_reg (mode, TWO52);
48158
48159 return TWO52;
48160 }
48161
48162 /* Expand SSE sequence for computing lround from OP1 storing
48163 into OP0. */
48164 void
48165 ix86_expand_lround (rtx op0, rtx op1)
48166 {
48167 /* C code for the stuff we're doing below:
48168 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
48169 return (long)tmp;
48170 */
48171 machine_mode mode = GET_MODE (op1);
48172 const struct real_format *fmt;
48173 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
48174 rtx adj;
48175
48176 /* load nextafter (0.5, 0.0) */
48177 fmt = REAL_MODE_FORMAT (mode);
48178 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
48179 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
48180
48181 /* adj = copysign (0.5, op1) */
48182 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
48183 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
48184
48185 /* adj = op1 + adj */
48186 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
48187
48188 /* op0 = (imode)adj */
48189 expand_fix (op0, adj, 0);
48190 }
48191
48192 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
48193 into OPERAND0. */
48194 void
48195 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
48196 {
48197 /* C code for the stuff we're doing below (for do_floor):
48198 xi = (long)op1;
48199 xi -= (double)xi > op1 ? 1 : 0;
48200 return xi;
48201 */
48202 machine_mode fmode = GET_MODE (op1);
48203 machine_mode imode = GET_MODE (op0);
48204 rtx ireg, freg, tmp;
48205 rtx_code_label *label;
48206
48207 /* reg = (long)op1 */
48208 ireg = gen_reg_rtx (imode);
48209 expand_fix (ireg, op1, 0);
48210
48211 /* freg = (double)reg */
48212 freg = gen_reg_rtx (fmode);
48213 expand_float (freg, ireg, 0);
48214
48215 /* ireg = (freg > op1) ? ireg - 1 : ireg */
48216 label = ix86_expand_sse_compare_and_jump (UNLE,
48217 freg, op1, !do_floor);
48218 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
48219 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
48220 emit_move_insn (ireg, tmp);
48221
48222 emit_label (label);
48223 LABEL_NUSES (label) = 1;
48224
48225 emit_move_insn (op0, ireg);
48226 }
48227
48228 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
48229 result in OPERAND0. */
48230 void
48231 ix86_expand_rint (rtx operand0, rtx operand1)
48232 {
48233 /* C code for the stuff we're doing below:
48234 xa = fabs (operand1);
48235 if (!isless (xa, 2**52))
48236 return operand1;
48237 xa = xa + 2**52 - 2**52;
48238 return copysign (xa, operand1);
48239 */
48240 machine_mode mode = GET_MODE (operand0);
48241 rtx res, xa, TWO52, mask;
48242 rtx_code_label *label;
48243
48244 res = gen_reg_rtx (mode);
48245 emit_move_insn (res, operand1);
48246
48247 /* xa = abs (operand1) */
48248 xa = ix86_expand_sse_fabs (res, &mask);
48249
48250 /* if (!isless (xa, TWO52)) goto label; */
48251 TWO52 = ix86_gen_TWO52 (mode);
48252 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
48253
48254 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
48255 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
48256
48257 ix86_sse_copysign_to_positive (res, xa, res, mask);
48258
48259 emit_label (label);
48260 LABEL_NUSES (label) = 1;
48261
48262 emit_move_insn (operand0, res);
48263 }
48264
48265 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
48266 into OPERAND0. */
48267 void
48268 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
48269 {
48270 /* C code for the stuff we expand below.
48271 double xa = fabs (x), x2;
48272 if (!isless (xa, TWO52))
48273 return x;
48274 xa = xa + TWO52 - TWO52;
48275 x2 = copysign (xa, x);
48276 Compensate. Floor:
48277 if (x2 > x)
48278 x2 -= 1;
48279 Compensate. Ceil:
48280 if (x2 < x)
48281 x2 -= -1;
48282 return x2;
48283 */
48284 machine_mode mode = GET_MODE (operand0);
48285 rtx xa, TWO52, tmp, one, res, mask;
48286 rtx_code_label *label;
48287
48288 TWO52 = ix86_gen_TWO52 (mode);
48289
48290 /* Temporary for holding the result, initialized to the input
48291 operand to ease control flow. */
48292 res = gen_reg_rtx (mode);
48293 emit_move_insn (res, operand1);
48294
48295 /* xa = abs (operand1) */
48296 xa = ix86_expand_sse_fabs (res, &mask);
48297
48298 /* if (!isless (xa, TWO52)) goto label; */
48299 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
48300
48301 /* xa = xa + TWO52 - TWO52; */
48302 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
48303 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
48304
48305 /* xa = copysign (xa, operand1) */
48306 ix86_sse_copysign_to_positive (xa, xa, res, mask);
48307
48308 /* generate 1.0 or -1.0 */
48309 one = force_reg (mode,
48310 const_double_from_real_value (do_floor
48311 ? dconst1 : dconstm1, mode));
48312
48313 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
48314 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
48315 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
48316 /* We always need to subtract here to preserve signed zero. */
48317 tmp = expand_simple_binop (mode, MINUS,
48318 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
48319 emit_move_insn (res, tmp);
48320
48321 emit_label (label);
48322 LABEL_NUSES (label) = 1;
48323
48324 emit_move_insn (operand0, res);
48325 }
48326
48327 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
48328 into OPERAND0. */
48329 void
48330 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
48331 {
48332 /* C code for the stuff we expand below.
48333 double xa = fabs (x), x2;
48334 if (!isless (xa, TWO52))
48335 return x;
48336 x2 = (double)(long)x;
48337 Compensate. Floor:
48338 if (x2 > x)
48339 x2 -= 1;
48340 Compensate. Ceil:
48341 if (x2 < x)
48342 x2 += 1;
48343 if (HONOR_SIGNED_ZEROS (mode))
48344 return copysign (x2, x);
48345 return x2;
48346 */
48347 machine_mode mode = GET_MODE (operand0);
48348 rtx xa, xi, TWO52, tmp, one, res, mask;
48349 rtx_code_label *label;
48350
48351 TWO52 = ix86_gen_TWO52 (mode);
48352
48353 /* Temporary for holding the result, initialized to the input
48354 operand to ease control flow. */
48355 res = gen_reg_rtx (mode);
48356 emit_move_insn (res, operand1);
48357
48358 /* xa = abs (operand1) */
48359 xa = ix86_expand_sse_fabs (res, &mask);
48360
48361 /* if (!isless (xa, TWO52)) goto label; */
48362 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
48363
48364 /* xa = (double)(long)x */
48365 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
48366 expand_fix (xi, res, 0);
48367 expand_float (xa, xi, 0);
48368
48369 /* generate 1.0 */
48370 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
48371
48372 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
48373 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
48374 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
48375 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
48376 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
48377 emit_move_insn (res, tmp);
48378
48379 if (HONOR_SIGNED_ZEROS (mode))
48380 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
48381
48382 emit_label (label);
48383 LABEL_NUSES (label) = 1;
48384
48385 emit_move_insn (operand0, res);
48386 }
48387
48388 /* Expand SSE sequence for computing round from OPERAND1 storing
48389 into OPERAND0. Sequence that works without relying on DImode truncation
48390 via cvttsd2siq that is only available on 64bit targets. */
48391 void
48392 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
48393 {
48394 /* C code for the stuff we expand below.
48395 double xa = fabs (x), xa2, x2;
48396 if (!isless (xa, TWO52))
48397 return x;
48398 Using the absolute value and copying back sign makes
48399 -0.0 -> -0.0 correct.
48400 xa2 = xa + TWO52 - TWO52;
48401 Compensate.
48402 dxa = xa2 - xa;
48403 if (dxa <= -0.5)
48404 xa2 += 1;
48405 else if (dxa > 0.5)
48406 xa2 -= 1;
48407 x2 = copysign (xa2, x);
48408 return x2;
48409 */
48410 machine_mode mode = GET_MODE (operand0);
48411 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
48412 rtx_code_label *label;
48413
48414 TWO52 = ix86_gen_TWO52 (mode);
48415
48416 /* Temporary for holding the result, initialized to the input
48417 operand to ease control flow. */
48418 res = gen_reg_rtx (mode);
48419 emit_move_insn (res, operand1);
48420
48421 /* xa = abs (operand1) */
48422 xa = ix86_expand_sse_fabs (res, &mask);
48423
48424 /* if (!isless (xa, TWO52)) goto label; */
48425 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
48426
48427 /* xa2 = xa + TWO52 - TWO52; */
48428 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
48429 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
48430
48431 /* dxa = xa2 - xa; */
48432 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
48433
48434 /* generate 0.5, 1.0 and -0.5 */
48435 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
48436 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
48437 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
48438 0, OPTAB_DIRECT);
48439
48440 /* Compensate. */
48441 tmp = gen_reg_rtx (mode);
48442 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
48443 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
48444 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
48445 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
48446 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
48447 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
48448 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
48449 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
48450
48451 /* res = copysign (xa2, operand1) */
48452 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
48453
48454 emit_label (label);
48455 LABEL_NUSES (label) = 1;
48456
48457 emit_move_insn (operand0, res);
48458 }
48459
48460 /* Expand SSE sequence for computing trunc from OPERAND1 storing
48461 into OPERAND0. */
48462 void
48463 ix86_expand_trunc (rtx operand0, rtx operand1)
48464 {
48465 /* C code for SSE variant we expand below.
48466 double xa = fabs (x), x2;
48467 if (!isless (xa, TWO52))
48468 return x;
48469 x2 = (double)(long)x;
48470 if (HONOR_SIGNED_ZEROS (mode))
48471 return copysign (x2, x);
48472 return x2;
48473 */
48474 machine_mode mode = GET_MODE (operand0);
48475 rtx xa, xi, TWO52, res, mask;
48476 rtx_code_label *label;
48477
48478 TWO52 = ix86_gen_TWO52 (mode);
48479
48480 /* Temporary for holding the result, initialized to the input
48481 operand to ease control flow. */
48482 res = gen_reg_rtx (mode);
48483 emit_move_insn (res, operand1);
48484
48485 /* xa = abs (operand1) */
48486 xa = ix86_expand_sse_fabs (res, &mask);
48487
48488 /* if (!isless (xa, TWO52)) goto label; */
48489 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
48490
48491 /* x = (double)(long)x */
48492 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
48493 expand_fix (xi, res, 0);
48494 expand_float (res, xi, 0);
48495
48496 if (HONOR_SIGNED_ZEROS (mode))
48497 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
48498
48499 emit_label (label);
48500 LABEL_NUSES (label) = 1;
48501
48502 emit_move_insn (operand0, res);
48503 }
48504
48505 /* Expand SSE sequence for computing trunc from OPERAND1 storing
48506 into OPERAND0. */
48507 void
48508 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
48509 {
48510 machine_mode mode = GET_MODE (operand0);
48511 rtx xa, mask, TWO52, one, res, smask, tmp;
48512 rtx_code_label *label;
48513
48514 /* C code for SSE variant we expand below.
48515 double xa = fabs (x), x2;
48516 if (!isless (xa, TWO52))
48517 return x;
48518 xa2 = xa + TWO52 - TWO52;
48519 Compensate:
48520 if (xa2 > xa)
48521 xa2 -= 1.0;
48522 x2 = copysign (xa2, x);
48523 return x2;
48524 */
48525
48526 TWO52 = ix86_gen_TWO52 (mode);
48527
48528 /* Temporary for holding the result, initialized to the input
48529 operand to ease control flow. */
48530 res = gen_reg_rtx (mode);
48531 emit_move_insn (res, operand1);
48532
48533 /* xa = abs (operand1) */
48534 xa = ix86_expand_sse_fabs (res, &smask);
48535
48536 /* if (!isless (xa, TWO52)) goto label; */
48537 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
48538
48539 /* res = xa + TWO52 - TWO52; */
48540 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
48541 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
48542 emit_move_insn (res, tmp);
48543
48544 /* generate 1.0 */
48545 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
48546
48547 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
48548 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
48549 emit_insn (gen_rtx_SET (mask, gen_rtx_AND (mode, mask, one)));
48550 tmp = expand_simple_binop (mode, MINUS,
48551 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
48552 emit_move_insn (res, tmp);
48553
48554 /* res = copysign (res, operand1) */
48555 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
48556
48557 emit_label (label);
48558 LABEL_NUSES (label) = 1;
48559
48560 emit_move_insn (operand0, res);
48561 }
48562
48563 /* Expand SSE sequence for computing round from OPERAND1 storing
48564 into OPERAND0. */
48565 void
48566 ix86_expand_round (rtx operand0, rtx operand1)
48567 {
48568 /* C code for the stuff we're doing below:
48569 double xa = fabs (x);
48570 if (!isless (xa, TWO52))
48571 return x;
48572 xa = (double)(long)(xa + nextafter (0.5, 0.0));
48573 return copysign (xa, x);
48574 */
48575 machine_mode mode = GET_MODE (operand0);
48576 rtx res, TWO52, xa, xi, half, mask;
48577 rtx_code_label *label;
48578 const struct real_format *fmt;
48579 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
48580
48581 /* Temporary for holding the result, initialized to the input
48582 operand to ease control flow. */
48583 res = gen_reg_rtx (mode);
48584 emit_move_insn (res, operand1);
48585
48586 TWO52 = ix86_gen_TWO52 (mode);
48587 xa = ix86_expand_sse_fabs (res, &mask);
48588 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
48589
48590 /* load nextafter (0.5, 0.0) */
48591 fmt = REAL_MODE_FORMAT (mode);
48592 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
48593 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
48594
48595 /* xa = xa + 0.5 */
48596 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
48597 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
48598
48599 /* xa = (double)(int64_t)xa */
48600 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
48601 expand_fix (xi, xa, 0);
48602 expand_float (xa, xi, 0);
48603
48604 /* res = copysign (xa, operand1) */
48605 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
48606
48607 emit_label (label);
48608 LABEL_NUSES (label) = 1;
48609
48610 emit_move_insn (operand0, res);
48611 }
48612
48613 /* Expand SSE sequence for computing round
48614 from OP1 storing into OP0 using sse4 round insn. */
48615 void
48616 ix86_expand_round_sse4 (rtx op0, rtx op1)
48617 {
48618 machine_mode mode = GET_MODE (op0);
48619 rtx e1, e2, res, half;
48620 const struct real_format *fmt;
48621 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
48622 rtx (*gen_copysign) (rtx, rtx, rtx);
48623 rtx (*gen_round) (rtx, rtx, rtx);
48624
48625 switch (mode)
48626 {
48627 case SFmode:
48628 gen_copysign = gen_copysignsf3;
48629 gen_round = gen_sse4_1_roundsf2;
48630 break;
48631 case DFmode:
48632 gen_copysign = gen_copysigndf3;
48633 gen_round = gen_sse4_1_rounddf2;
48634 break;
48635 default:
48636 gcc_unreachable ();
48637 }
48638
48639 /* round (a) = trunc (a + copysign (0.5, a)) */
48640
48641 /* load nextafter (0.5, 0.0) */
48642 fmt = REAL_MODE_FORMAT (mode);
48643 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
48644 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
48645 half = const_double_from_real_value (pred_half, mode);
48646
48647 /* e1 = copysign (0.5, op1) */
48648 e1 = gen_reg_rtx (mode);
48649 emit_insn (gen_copysign (e1, half, op1));
48650
48651 /* e2 = op1 + e1 */
48652 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
48653
48654 /* res = trunc (e2) */
48655 res = gen_reg_rtx (mode);
48656 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
48657
48658 emit_move_insn (op0, res);
48659 }
48660 \f
48661
48662 /* Table of valid machine attributes. */
48663 static const struct attribute_spec ix86_attribute_table[] =
48664 {
48665 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
48666 affects_type_identity } */
48667 /* Stdcall attribute says callee is responsible for popping arguments
48668 if they are not variable. */
48669 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
48670 true },
48671 /* Fastcall attribute says callee is responsible for popping arguments
48672 if they are not variable. */
48673 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
48674 true },
48675 /* Thiscall attribute says callee is responsible for popping arguments
48676 if they are not variable. */
48677 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
48678 true },
48679 /* Cdecl attribute says the callee is a normal C declaration */
48680 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
48681 true },
48682 /* Regparm attribute specifies how many integer arguments are to be
48683 passed in registers. */
48684 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
48685 true },
48686 /* Sseregparm attribute says we are using x86_64 calling conventions
48687 for FP arguments. */
48688 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
48689 true },
48690 /* The transactional memory builtins are implicitly regparm or fastcall
48691 depending on the ABI. Override the generic do-nothing attribute that
48692 these builtins were declared with. */
48693 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
48694 true },
48695 /* force_align_arg_pointer says this function realigns the stack at entry. */
48696 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
48697 false, true, true, ix86_handle_force_align_arg_pointer_attribute, false },
48698 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
48699 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
48700 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
48701 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
48702 false },
48703 #endif
48704 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
48705 false },
48706 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
48707 false },
48708 #ifdef SUBTARGET_ATTRIBUTE_TABLE
48709 SUBTARGET_ATTRIBUTE_TABLE,
48710 #endif
48711 /* ms_abi and sysv_abi calling convention function attributes. */
48712 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
48713 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
48714 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
48715 false },
48716 { "callee_pop_aggregate_return", 1, 1, false, true, true,
48717 ix86_handle_callee_pop_aggregate_return, true },
48718 /* End element. */
48719 { NULL, 0, 0, false, false, false, NULL, false }
48720 };
48721
48722 /* Implement targetm.vectorize.builtin_vectorization_cost. */
48723 static int
48724 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
48725 tree vectype, int)
48726 {
48727 unsigned elements;
48728
48729 switch (type_of_cost)
48730 {
48731 case scalar_stmt:
48732 return ix86_cost->scalar_stmt_cost;
48733
48734 case scalar_load:
48735 return ix86_cost->scalar_load_cost;
48736
48737 case scalar_store:
48738 return ix86_cost->scalar_store_cost;
48739
48740 case vector_stmt:
48741 return ix86_cost->vec_stmt_cost;
48742
48743 case vector_load:
48744 return ix86_cost->vec_align_load_cost;
48745
48746 case vector_store:
48747 return ix86_cost->vec_store_cost;
48748
48749 case vec_to_scalar:
48750 return ix86_cost->vec_to_scalar_cost;
48751
48752 case scalar_to_vec:
48753 return ix86_cost->scalar_to_vec_cost;
48754
48755 case unaligned_load:
48756 case unaligned_store:
48757 return ix86_cost->vec_unalign_load_cost;
48758
48759 case cond_branch_taken:
48760 return ix86_cost->cond_taken_branch_cost;
48761
48762 case cond_branch_not_taken:
48763 return ix86_cost->cond_not_taken_branch_cost;
48764
48765 case vec_perm:
48766 case vec_promote_demote:
48767 return ix86_cost->vec_stmt_cost;
48768
48769 case vec_construct:
48770 elements = TYPE_VECTOR_SUBPARTS (vectype);
48771 return ix86_cost->vec_stmt_cost * (elements / 2 + 1);
48772
48773 default:
48774 gcc_unreachable ();
48775 }
48776 }
48777
48778 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
48779 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
48780 insn every time. */
48781
48782 static GTY(()) rtx_insn *vselect_insn;
48783
48784 /* Initialize vselect_insn. */
48785
48786 static void
48787 init_vselect_insn (void)
48788 {
48789 unsigned i;
48790 rtx x;
48791
48792 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
48793 for (i = 0; i < MAX_VECT_LEN; ++i)
48794 XVECEXP (x, 0, i) = const0_rtx;
48795 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
48796 const0_rtx), x);
48797 x = gen_rtx_SET (const0_rtx, x);
48798 start_sequence ();
48799 vselect_insn = emit_insn (x);
48800 end_sequence ();
48801 }
48802
48803 /* Construct (set target (vec_select op0 (parallel perm))) and
48804 return true if that's a valid instruction in the active ISA. */
48805
48806 static bool
48807 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
48808 unsigned nelt, bool testing_p)
48809 {
48810 unsigned int i;
48811 rtx x, save_vconcat;
48812 int icode;
48813
48814 if (vselect_insn == NULL_RTX)
48815 init_vselect_insn ();
48816
48817 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
48818 PUT_NUM_ELEM (XVEC (x, 0), nelt);
48819 for (i = 0; i < nelt; ++i)
48820 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
48821 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
48822 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
48823 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
48824 SET_DEST (PATTERN (vselect_insn)) = target;
48825 icode = recog_memoized (vselect_insn);
48826
48827 if (icode >= 0 && !testing_p)
48828 emit_insn (copy_rtx (PATTERN (vselect_insn)));
48829
48830 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
48831 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
48832 INSN_CODE (vselect_insn) = -1;
48833
48834 return icode >= 0;
48835 }
48836
48837 /* Similar, but generate a vec_concat from op0 and op1 as well. */
48838
48839 static bool
48840 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
48841 const unsigned char *perm, unsigned nelt,
48842 bool testing_p)
48843 {
48844 machine_mode v2mode;
48845 rtx x;
48846 bool ok;
48847
48848 if (vselect_insn == NULL_RTX)
48849 init_vselect_insn ();
48850
48851 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
48852 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
48853 PUT_MODE (x, v2mode);
48854 XEXP (x, 0) = op0;
48855 XEXP (x, 1) = op1;
48856 ok = expand_vselect (target, x, perm, nelt, testing_p);
48857 XEXP (x, 0) = const0_rtx;
48858 XEXP (x, 1) = const0_rtx;
48859 return ok;
48860 }
48861
48862 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
48863 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
48864
48865 static bool
48866 expand_vec_perm_blend (struct expand_vec_perm_d *d)
48867 {
48868 machine_mode mmode, vmode = d->vmode;
48869 unsigned i, mask, nelt = d->nelt;
48870 rtx target, op0, op1, maskop, x;
48871 rtx rperm[32], vperm;
48872
48873 if (d->one_operand_p)
48874 return false;
48875 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
48876 && (TARGET_AVX512BW
48877 || GET_MODE_UNIT_SIZE (vmode) >= 4))
48878 ;
48879 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
48880 ;
48881 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
48882 ;
48883 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
48884 ;
48885 else
48886 return false;
48887
48888 /* This is a blend, not a permute. Elements must stay in their
48889 respective lanes. */
48890 for (i = 0; i < nelt; ++i)
48891 {
48892 unsigned e = d->perm[i];
48893 if (!(e == i || e == i + nelt))
48894 return false;
48895 }
48896
48897 if (d->testing_p)
48898 return true;
48899
48900 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
48901 decision should be extracted elsewhere, so that we only try that
48902 sequence once all budget==3 options have been tried. */
48903 target = d->target;
48904 op0 = d->op0;
48905 op1 = d->op1;
48906 mask = 0;
48907
48908 switch (vmode)
48909 {
48910 case V8DFmode:
48911 case V16SFmode:
48912 case V4DFmode:
48913 case V8SFmode:
48914 case V2DFmode:
48915 case V4SFmode:
48916 case V8HImode:
48917 case V8SImode:
48918 case V32HImode:
48919 case V64QImode:
48920 case V16SImode:
48921 case V8DImode:
48922 for (i = 0; i < nelt; ++i)
48923 mask |= (d->perm[i] >= nelt) << i;
48924 break;
48925
48926 case V2DImode:
48927 for (i = 0; i < 2; ++i)
48928 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
48929 vmode = V8HImode;
48930 goto do_subreg;
48931
48932 case V4SImode:
48933 for (i = 0; i < 4; ++i)
48934 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
48935 vmode = V8HImode;
48936 goto do_subreg;
48937
48938 case V16QImode:
48939 /* See if bytes move in pairs so we can use pblendw with
48940 an immediate argument, rather than pblendvb with a vector
48941 argument. */
48942 for (i = 0; i < 16; i += 2)
48943 if (d->perm[i] + 1 != d->perm[i + 1])
48944 {
48945 use_pblendvb:
48946 for (i = 0; i < nelt; ++i)
48947 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
48948
48949 finish_pblendvb:
48950 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
48951 vperm = force_reg (vmode, vperm);
48952
48953 if (GET_MODE_SIZE (vmode) == 16)
48954 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
48955 else
48956 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
48957 if (target != d->target)
48958 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
48959 return true;
48960 }
48961
48962 for (i = 0; i < 8; ++i)
48963 mask |= (d->perm[i * 2] >= 16) << i;
48964 vmode = V8HImode;
48965 /* FALLTHRU */
48966
48967 do_subreg:
48968 target = gen_reg_rtx (vmode);
48969 op0 = gen_lowpart (vmode, op0);
48970 op1 = gen_lowpart (vmode, op1);
48971 break;
48972
48973 case V32QImode:
48974 /* See if bytes move in pairs. If not, vpblendvb must be used. */
48975 for (i = 0; i < 32; i += 2)
48976 if (d->perm[i] + 1 != d->perm[i + 1])
48977 goto use_pblendvb;
48978 /* See if bytes move in quadruplets. If yes, vpblendd
48979 with immediate can be used. */
48980 for (i = 0; i < 32; i += 4)
48981 if (d->perm[i] + 2 != d->perm[i + 2])
48982 break;
48983 if (i < 32)
48984 {
48985 /* See if bytes move the same in both lanes. If yes,
48986 vpblendw with immediate can be used. */
48987 for (i = 0; i < 16; i += 2)
48988 if (d->perm[i] + 16 != d->perm[i + 16])
48989 goto use_pblendvb;
48990
48991 /* Use vpblendw. */
48992 for (i = 0; i < 16; ++i)
48993 mask |= (d->perm[i * 2] >= 32) << i;
48994 vmode = V16HImode;
48995 goto do_subreg;
48996 }
48997
48998 /* Use vpblendd. */
48999 for (i = 0; i < 8; ++i)
49000 mask |= (d->perm[i * 4] >= 32) << i;
49001 vmode = V8SImode;
49002 goto do_subreg;
49003
49004 case V16HImode:
49005 /* See if words move in pairs. If yes, vpblendd can be used. */
49006 for (i = 0; i < 16; i += 2)
49007 if (d->perm[i] + 1 != d->perm[i + 1])
49008 break;
49009 if (i < 16)
49010 {
49011 /* See if words move the same in both lanes. If not,
49012 vpblendvb must be used. */
49013 for (i = 0; i < 8; i++)
49014 if (d->perm[i] + 8 != d->perm[i + 8])
49015 {
49016 /* Use vpblendvb. */
49017 for (i = 0; i < 32; ++i)
49018 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
49019
49020 vmode = V32QImode;
49021 nelt = 32;
49022 target = gen_reg_rtx (vmode);
49023 op0 = gen_lowpart (vmode, op0);
49024 op1 = gen_lowpart (vmode, op1);
49025 goto finish_pblendvb;
49026 }
49027
49028 /* Use vpblendw. */
49029 for (i = 0; i < 16; ++i)
49030 mask |= (d->perm[i] >= 16) << i;
49031 break;
49032 }
49033
49034 /* Use vpblendd. */
49035 for (i = 0; i < 8; ++i)
49036 mask |= (d->perm[i * 2] >= 16) << i;
49037 vmode = V8SImode;
49038 goto do_subreg;
49039
49040 case V4DImode:
49041 /* Use vpblendd. */
49042 for (i = 0; i < 4; ++i)
49043 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
49044 vmode = V8SImode;
49045 goto do_subreg;
49046
49047 default:
49048 gcc_unreachable ();
49049 }
49050
49051 switch (vmode)
49052 {
49053 case V8DFmode:
49054 case V8DImode:
49055 mmode = QImode;
49056 break;
49057 case V16SFmode:
49058 case V16SImode:
49059 mmode = HImode;
49060 break;
49061 case V32HImode:
49062 mmode = SImode;
49063 break;
49064 case V64QImode:
49065 mmode = DImode;
49066 break;
49067 default:
49068 mmode = VOIDmode;
49069 }
49070
49071 if (mmode != VOIDmode)
49072 maskop = force_reg (mmode, gen_int_mode (mask, mmode));
49073 else
49074 maskop = GEN_INT (mask);
49075
49076 /* This matches five different patterns with the different modes. */
49077 x = gen_rtx_VEC_MERGE (vmode, op1, op0, maskop);
49078 x = gen_rtx_SET (target, x);
49079 emit_insn (x);
49080 if (target != d->target)
49081 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
49082
49083 return true;
49084 }
49085
49086 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
49087 in terms of the variable form of vpermilps.
49088
49089 Note that we will have already failed the immediate input vpermilps,
49090 which requires that the high and low part shuffle be identical; the
49091 variable form doesn't require that. */
49092
49093 static bool
49094 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
49095 {
49096 rtx rperm[8], vperm;
49097 unsigned i;
49098
49099 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
49100 return false;
49101
49102 /* We can only permute within the 128-bit lane. */
49103 for (i = 0; i < 8; ++i)
49104 {
49105 unsigned e = d->perm[i];
49106 if (i < 4 ? e >= 4 : e < 4)
49107 return false;
49108 }
49109
49110 if (d->testing_p)
49111 return true;
49112
49113 for (i = 0; i < 8; ++i)
49114 {
49115 unsigned e = d->perm[i];
49116
49117 /* Within each 128-bit lane, the elements of op0 are numbered
49118 from 0 and the elements of op1 are numbered from 4. */
49119 if (e >= 8 + 4)
49120 e -= 8;
49121 else if (e >= 4)
49122 e -= 4;
49123
49124 rperm[i] = GEN_INT (e);
49125 }
49126
49127 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
49128 vperm = force_reg (V8SImode, vperm);
49129 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
49130
49131 return true;
49132 }
49133
49134 /* Return true if permutation D can be performed as VMODE permutation
49135 instead. */
49136
49137 static bool
49138 valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d)
49139 {
49140 unsigned int i, j, chunk;
49141
49142 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
49143 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
49144 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
49145 return false;
49146
49147 if (GET_MODE_NUNITS (vmode) >= d->nelt)
49148 return true;
49149
49150 chunk = d->nelt / GET_MODE_NUNITS (vmode);
49151 for (i = 0; i < d->nelt; i += chunk)
49152 if (d->perm[i] & (chunk - 1))
49153 return false;
49154 else
49155 for (j = 1; j < chunk; ++j)
49156 if (d->perm[i] + j != d->perm[i + j])
49157 return false;
49158
49159 return true;
49160 }
49161
49162 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
49163 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
49164
49165 static bool
49166 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
49167 {
49168 unsigned i, nelt, eltsz, mask;
49169 unsigned char perm[64];
49170 machine_mode vmode = V16QImode;
49171 rtx rperm[64], vperm, target, op0, op1;
49172
49173 nelt = d->nelt;
49174
49175 if (!d->one_operand_p)
49176 {
49177 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
49178 {
49179 if (TARGET_AVX2
49180 && valid_perm_using_mode_p (V2TImode, d))
49181 {
49182 if (d->testing_p)
49183 return true;
49184
49185 /* Use vperm2i128 insn. The pattern uses
49186 V4DImode instead of V2TImode. */
49187 target = d->target;
49188 if (d->vmode != V4DImode)
49189 target = gen_reg_rtx (V4DImode);
49190 op0 = gen_lowpart (V4DImode, d->op0);
49191 op1 = gen_lowpart (V4DImode, d->op1);
49192 rperm[0]
49193 = GEN_INT ((d->perm[0] / (nelt / 2))
49194 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
49195 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
49196 if (target != d->target)
49197 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
49198 return true;
49199 }
49200 return false;
49201 }
49202 }
49203 else
49204 {
49205 if (GET_MODE_SIZE (d->vmode) == 16)
49206 {
49207 if (!TARGET_SSSE3)
49208 return false;
49209 }
49210 else if (GET_MODE_SIZE (d->vmode) == 32)
49211 {
49212 if (!TARGET_AVX2)
49213 return false;
49214
49215 /* V4DImode should be already handled through
49216 expand_vselect by vpermq instruction. */
49217 gcc_assert (d->vmode != V4DImode);
49218
49219 vmode = V32QImode;
49220 if (d->vmode == V8SImode
49221 || d->vmode == V16HImode
49222 || d->vmode == V32QImode)
49223 {
49224 /* First see if vpermq can be used for
49225 V8SImode/V16HImode/V32QImode. */
49226 if (valid_perm_using_mode_p (V4DImode, d))
49227 {
49228 for (i = 0; i < 4; i++)
49229 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
49230 if (d->testing_p)
49231 return true;
49232 target = gen_reg_rtx (V4DImode);
49233 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
49234 perm, 4, false))
49235 {
49236 emit_move_insn (d->target,
49237 gen_lowpart (d->vmode, target));
49238 return true;
49239 }
49240 return false;
49241 }
49242
49243 /* Next see if vpermd can be used. */
49244 if (valid_perm_using_mode_p (V8SImode, d))
49245 vmode = V8SImode;
49246 }
49247 /* Or if vpermps can be used. */
49248 else if (d->vmode == V8SFmode)
49249 vmode = V8SImode;
49250
49251 if (vmode == V32QImode)
49252 {
49253 /* vpshufb only works intra lanes, it is not
49254 possible to shuffle bytes in between the lanes. */
49255 for (i = 0; i < nelt; ++i)
49256 if ((d->perm[i] ^ i) & (nelt / 2))
49257 return false;
49258 }
49259 }
49260 else if (GET_MODE_SIZE (d->vmode) == 64)
49261 {
49262 if (!TARGET_AVX512BW)
49263 return false;
49264
49265 /* If vpermq didn't work, vpshufb won't work either. */
49266 if (d->vmode == V8DFmode || d->vmode == V8DImode)
49267 return false;
49268
49269 vmode = V64QImode;
49270 if (d->vmode == V16SImode
49271 || d->vmode == V32HImode
49272 || d->vmode == V64QImode)
49273 {
49274 /* First see if vpermq can be used for
49275 V16SImode/V32HImode/V64QImode. */
49276 if (valid_perm_using_mode_p (V8DImode, d))
49277 {
49278 for (i = 0; i < 8; i++)
49279 perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7;
49280 if (d->testing_p)
49281 return true;
49282 target = gen_reg_rtx (V8DImode);
49283 if (expand_vselect (target, gen_lowpart (V8DImode, d->op0),
49284 perm, 8, false))
49285 {
49286 emit_move_insn (d->target,
49287 gen_lowpart (d->vmode, target));
49288 return true;
49289 }
49290 return false;
49291 }
49292
49293 /* Next see if vpermd can be used. */
49294 if (valid_perm_using_mode_p (V16SImode, d))
49295 vmode = V16SImode;
49296 }
49297 /* Or if vpermps can be used. */
49298 else if (d->vmode == V16SFmode)
49299 vmode = V16SImode;
49300 if (vmode == V64QImode)
49301 {
49302 /* vpshufb only works intra lanes, it is not
49303 possible to shuffle bytes in between the lanes. */
49304 for (i = 0; i < nelt; ++i)
49305 if ((d->perm[i] ^ i) & (nelt / 4))
49306 return false;
49307 }
49308 }
49309 else
49310 return false;
49311 }
49312
49313 if (d->testing_p)
49314 return true;
49315
49316 if (vmode == V8SImode)
49317 for (i = 0; i < 8; ++i)
49318 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
49319 else if (vmode == V16SImode)
49320 for (i = 0; i < 16; ++i)
49321 rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
49322 else
49323 {
49324 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
49325 if (!d->one_operand_p)
49326 mask = 2 * nelt - 1;
49327 else if (vmode == V16QImode)
49328 mask = nelt - 1;
49329 else if (vmode == V64QImode)
49330 mask = nelt / 4 - 1;
49331 else
49332 mask = nelt / 2 - 1;
49333
49334 for (i = 0; i < nelt; ++i)
49335 {
49336 unsigned j, e = d->perm[i] & mask;
49337 for (j = 0; j < eltsz; ++j)
49338 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
49339 }
49340 }
49341
49342 vperm = gen_rtx_CONST_VECTOR (vmode,
49343 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
49344 vperm = force_reg (vmode, vperm);
49345
49346 target = d->target;
49347 if (d->vmode != vmode)
49348 target = gen_reg_rtx (vmode);
49349 op0 = gen_lowpart (vmode, d->op0);
49350 if (d->one_operand_p)
49351 {
49352 if (vmode == V16QImode)
49353 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
49354 else if (vmode == V32QImode)
49355 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
49356 else if (vmode == V64QImode)
49357 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
49358 else if (vmode == V8SFmode)
49359 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
49360 else if (vmode == V8SImode)
49361 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
49362 else if (vmode == V16SFmode)
49363 emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
49364 else if (vmode == V16SImode)
49365 emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
49366 else
49367 gcc_unreachable ();
49368 }
49369 else
49370 {
49371 op1 = gen_lowpart (vmode, d->op1);
49372 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
49373 }
49374 if (target != d->target)
49375 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
49376
49377 return true;
49378 }
49379
49380 /* For V*[QHS]Imode permutations, check if the same permutation
49381 can't be performed in a 2x, 4x or 8x wider inner mode. */
49382
49383 static bool
49384 canonicalize_vector_int_perm (const struct expand_vec_perm_d *d,
49385 struct expand_vec_perm_d *nd)
49386 {
49387 int i;
49388 enum machine_mode mode = VOIDmode;
49389
49390 switch (d->vmode)
49391 {
49392 case V16QImode: mode = V8HImode; break;
49393 case V32QImode: mode = V16HImode; break;
49394 case V64QImode: mode = V32HImode; break;
49395 case V8HImode: mode = V4SImode; break;
49396 case V16HImode: mode = V8SImode; break;
49397 case V32HImode: mode = V16SImode; break;
49398 case V4SImode: mode = V2DImode; break;
49399 case V8SImode: mode = V4DImode; break;
49400 case V16SImode: mode = V8DImode; break;
49401 default: return false;
49402 }
49403 for (i = 0; i < d->nelt; i += 2)
49404 if ((d->perm[i] & 1) || d->perm[i + 1] != d->perm[i] + 1)
49405 return false;
49406 nd->vmode = mode;
49407 nd->nelt = d->nelt / 2;
49408 for (i = 0; i < nd->nelt; i++)
49409 nd->perm[i] = d->perm[2 * i] / 2;
49410 if (GET_MODE_INNER (mode) != DImode)
49411 canonicalize_vector_int_perm (nd, nd);
49412 if (nd != d)
49413 {
49414 nd->one_operand_p = d->one_operand_p;
49415 nd->testing_p = d->testing_p;
49416 if (d->op0 == d->op1)
49417 nd->op0 = nd->op1 = gen_lowpart (nd->vmode, d->op0);
49418 else
49419 {
49420 nd->op0 = gen_lowpart (nd->vmode, d->op0);
49421 nd->op1 = gen_lowpart (nd->vmode, d->op1);
49422 }
49423 if (d->testing_p)
49424 nd->target = gen_raw_REG (nd->vmode, LAST_VIRTUAL_REGISTER + 1);
49425 else
49426 nd->target = gen_reg_rtx (nd->vmode);
49427 }
49428 return true;
49429 }
49430
49431 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
49432 in a single instruction. */
49433
49434 static bool
49435 expand_vec_perm_1 (struct expand_vec_perm_d *d)
49436 {
49437 unsigned i, nelt = d->nelt;
49438 struct expand_vec_perm_d nd;
49439
49440 /* Check plain VEC_SELECT first, because AVX has instructions that could
49441 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
49442 input where SEL+CONCAT may not. */
49443 if (d->one_operand_p)
49444 {
49445 int mask = nelt - 1;
49446 bool identity_perm = true;
49447 bool broadcast_perm = true;
49448
49449 for (i = 0; i < nelt; i++)
49450 {
49451 nd.perm[i] = d->perm[i] & mask;
49452 if (nd.perm[i] != i)
49453 identity_perm = false;
49454 if (nd.perm[i])
49455 broadcast_perm = false;
49456 }
49457
49458 if (identity_perm)
49459 {
49460 if (!d->testing_p)
49461 emit_move_insn (d->target, d->op0);
49462 return true;
49463 }
49464 else if (broadcast_perm && TARGET_AVX2)
49465 {
49466 /* Use vpbroadcast{b,w,d}. */
49467 rtx (*gen) (rtx, rtx) = NULL;
49468 switch (d->vmode)
49469 {
49470 case V64QImode:
49471 if (TARGET_AVX512BW)
49472 gen = gen_avx512bw_vec_dupv64qi_1;
49473 break;
49474 case V32QImode:
49475 gen = gen_avx2_pbroadcastv32qi_1;
49476 break;
49477 case V32HImode:
49478 if (TARGET_AVX512BW)
49479 gen = gen_avx512bw_vec_dupv32hi_1;
49480 break;
49481 case V16HImode:
49482 gen = gen_avx2_pbroadcastv16hi_1;
49483 break;
49484 case V16SImode:
49485 if (TARGET_AVX512F)
49486 gen = gen_avx512f_vec_dupv16si_1;
49487 break;
49488 case V8SImode:
49489 gen = gen_avx2_pbroadcastv8si_1;
49490 break;
49491 case V16QImode:
49492 gen = gen_avx2_pbroadcastv16qi;
49493 break;
49494 case V8HImode:
49495 gen = gen_avx2_pbroadcastv8hi;
49496 break;
49497 case V16SFmode:
49498 if (TARGET_AVX512F)
49499 gen = gen_avx512f_vec_dupv16sf_1;
49500 break;
49501 case V8SFmode:
49502 gen = gen_avx2_vec_dupv8sf_1;
49503 break;
49504 case V8DFmode:
49505 if (TARGET_AVX512F)
49506 gen = gen_avx512f_vec_dupv8df_1;
49507 break;
49508 case V8DImode:
49509 if (TARGET_AVX512F)
49510 gen = gen_avx512f_vec_dupv8di_1;
49511 break;
49512 /* For other modes prefer other shuffles this function creates. */
49513 default: break;
49514 }
49515 if (gen != NULL)
49516 {
49517 if (!d->testing_p)
49518 emit_insn (gen (d->target, d->op0));
49519 return true;
49520 }
49521 }
49522
49523 if (expand_vselect (d->target, d->op0, nd.perm, nelt, d->testing_p))
49524 return true;
49525
49526 /* There are plenty of patterns in sse.md that are written for
49527 SEL+CONCAT and are not replicated for a single op. Perhaps
49528 that should be changed, to avoid the nastiness here. */
49529
49530 /* Recognize interleave style patterns, which means incrementing
49531 every other permutation operand. */
49532 for (i = 0; i < nelt; i += 2)
49533 {
49534 nd.perm[i] = d->perm[i] & mask;
49535 nd.perm[i + 1] = (d->perm[i + 1] & mask) + nelt;
49536 }
49537 if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt,
49538 d->testing_p))
49539 return true;
49540
49541 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
49542 if (nelt >= 4)
49543 {
49544 for (i = 0; i < nelt; i += 4)
49545 {
49546 nd.perm[i + 0] = d->perm[i + 0] & mask;
49547 nd.perm[i + 1] = d->perm[i + 1] & mask;
49548 nd.perm[i + 2] = (d->perm[i + 2] & mask) + nelt;
49549 nd.perm[i + 3] = (d->perm[i + 3] & mask) + nelt;
49550 }
49551
49552 if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt,
49553 d->testing_p))
49554 return true;
49555 }
49556 }
49557
49558 /* Finally, try the fully general two operand permute. */
49559 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
49560 d->testing_p))
49561 return true;
49562
49563 /* Recognize interleave style patterns with reversed operands. */
49564 if (!d->one_operand_p)
49565 {
49566 for (i = 0; i < nelt; ++i)
49567 {
49568 unsigned e = d->perm[i];
49569 if (e >= nelt)
49570 e -= nelt;
49571 else
49572 e += nelt;
49573 nd.perm[i] = e;
49574 }
49575
49576 if (expand_vselect_vconcat (d->target, d->op1, d->op0, nd.perm, nelt,
49577 d->testing_p))
49578 return true;
49579 }
49580
49581 /* Try the SSE4.1 blend variable merge instructions. */
49582 if (expand_vec_perm_blend (d))
49583 return true;
49584
49585 /* Try one of the AVX vpermil variable permutations. */
49586 if (expand_vec_perm_vpermil (d))
49587 return true;
49588
49589 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
49590 vpshufb, vpermd, vpermps or vpermq variable permutation. */
49591 if (expand_vec_perm_pshufb (d))
49592 return true;
49593
49594 /* Try the AVX2 vpalignr instruction. */
49595 if (expand_vec_perm_palignr (d, true))
49596 return true;
49597
49598 /* Try the AVX512F vpermi2 instructions. */
49599 if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
49600 return true;
49601
49602 /* See if we can get the same permutation in different vector integer
49603 mode. */
49604 if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd))
49605 {
49606 if (!d->testing_p)
49607 emit_move_insn (d->target, gen_lowpart (d->vmode, nd.target));
49608 return true;
49609 }
49610 return false;
49611 }
49612
49613 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
49614 in terms of a pair of pshuflw + pshufhw instructions. */
49615
49616 static bool
49617 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
49618 {
49619 unsigned char perm2[MAX_VECT_LEN];
49620 unsigned i;
49621 bool ok;
49622
49623 if (d->vmode != V8HImode || !d->one_operand_p)
49624 return false;
49625
49626 /* The two permutations only operate in 64-bit lanes. */
49627 for (i = 0; i < 4; ++i)
49628 if (d->perm[i] >= 4)
49629 return false;
49630 for (i = 4; i < 8; ++i)
49631 if (d->perm[i] < 4)
49632 return false;
49633
49634 if (d->testing_p)
49635 return true;
49636
49637 /* Emit the pshuflw. */
49638 memcpy (perm2, d->perm, 4);
49639 for (i = 4; i < 8; ++i)
49640 perm2[i] = i;
49641 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
49642 gcc_assert (ok);
49643
49644 /* Emit the pshufhw. */
49645 memcpy (perm2 + 4, d->perm + 4, 4);
49646 for (i = 0; i < 4; ++i)
49647 perm2[i] = i;
49648 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
49649 gcc_assert (ok);
49650
49651 return true;
49652 }
49653
49654 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
49655 the permutation using the SSSE3 palignr instruction. This succeeds
49656 when all of the elements in PERM fit within one vector and we merely
49657 need to shift them down so that a single vector permutation has a
49658 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
49659 the vpalignr instruction itself can perform the requested permutation. */
49660
49661 static bool
49662 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
49663 {
49664 unsigned i, nelt = d->nelt;
49665 unsigned min, max, minswap, maxswap;
49666 bool in_order, ok, swap = false;
49667 rtx shift, target;
49668 struct expand_vec_perm_d dcopy;
49669
49670 /* Even with AVX, palignr only operates on 128-bit vectors,
49671 in AVX2 palignr operates on both 128-bit lanes. */
49672 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
49673 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
49674 return false;
49675
49676 min = 2 * nelt;
49677 max = 0;
49678 minswap = 2 * nelt;
49679 maxswap = 0;
49680 for (i = 0; i < nelt; ++i)
49681 {
49682 unsigned e = d->perm[i];
49683 unsigned eswap = d->perm[i] ^ nelt;
49684 if (GET_MODE_SIZE (d->vmode) == 32)
49685 {
49686 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
49687 eswap = e ^ (nelt / 2);
49688 }
49689 if (e < min)
49690 min = e;
49691 if (e > max)
49692 max = e;
49693 if (eswap < minswap)
49694 minswap = eswap;
49695 if (eswap > maxswap)
49696 maxswap = eswap;
49697 }
49698 if (min == 0
49699 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
49700 {
49701 if (d->one_operand_p
49702 || minswap == 0
49703 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
49704 ? nelt / 2 : nelt))
49705 return false;
49706 swap = true;
49707 min = minswap;
49708 max = maxswap;
49709 }
49710
49711 /* Given that we have SSSE3, we know we'll be able to implement the
49712 single operand permutation after the palignr with pshufb for
49713 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
49714 first. */
49715 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
49716 return true;
49717
49718 dcopy = *d;
49719 if (swap)
49720 {
49721 dcopy.op0 = d->op1;
49722 dcopy.op1 = d->op0;
49723 for (i = 0; i < nelt; ++i)
49724 dcopy.perm[i] ^= nelt;
49725 }
49726
49727 in_order = true;
49728 for (i = 0; i < nelt; ++i)
49729 {
49730 unsigned e = dcopy.perm[i];
49731 if (GET_MODE_SIZE (d->vmode) == 32
49732 && e >= nelt
49733 && (e & (nelt / 2 - 1)) < min)
49734 e = e - min - (nelt / 2);
49735 else
49736 e = e - min;
49737 if (e != i)
49738 in_order = false;
49739 dcopy.perm[i] = e;
49740 }
49741 dcopy.one_operand_p = true;
49742
49743 if (single_insn_only_p && !in_order)
49744 return false;
49745
49746 /* For AVX2, test whether we can permute the result in one instruction. */
49747 if (d->testing_p)
49748 {
49749 if (in_order)
49750 return true;
49751 dcopy.op1 = dcopy.op0;
49752 return expand_vec_perm_1 (&dcopy);
49753 }
49754
49755 shift = GEN_INT (min * GET_MODE_UNIT_BITSIZE (d->vmode));
49756 if (GET_MODE_SIZE (d->vmode) == 16)
49757 {
49758 target = gen_reg_rtx (TImode);
49759 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
49760 gen_lowpart (TImode, dcopy.op0), shift));
49761 }
49762 else
49763 {
49764 target = gen_reg_rtx (V2TImode);
49765 emit_insn (gen_avx2_palignrv2ti (target,
49766 gen_lowpart (V2TImode, dcopy.op1),
49767 gen_lowpart (V2TImode, dcopy.op0),
49768 shift));
49769 }
49770
49771 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
49772
49773 /* Test for the degenerate case where the alignment by itself
49774 produces the desired permutation. */
49775 if (in_order)
49776 {
49777 emit_move_insn (d->target, dcopy.op0);
49778 return true;
49779 }
49780
49781 ok = expand_vec_perm_1 (&dcopy);
49782 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
49783
49784 return ok;
49785 }
49786
49787 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
49788 the permutation using the SSE4_1 pblendv instruction. Potentially
49789 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
49790
49791 static bool
49792 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
49793 {
49794 unsigned i, which, nelt = d->nelt;
49795 struct expand_vec_perm_d dcopy, dcopy1;
49796 machine_mode vmode = d->vmode;
49797 bool ok;
49798
49799 /* Use the same checks as in expand_vec_perm_blend. */
49800 if (d->one_operand_p)
49801 return false;
49802 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
49803 ;
49804 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
49805 ;
49806 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
49807 ;
49808 else
49809 return false;
49810
49811 /* Figure out where permutation elements stay not in their
49812 respective lanes. */
49813 for (i = 0, which = 0; i < nelt; ++i)
49814 {
49815 unsigned e = d->perm[i];
49816 if (e != i)
49817 which |= (e < nelt ? 1 : 2);
49818 }
49819 /* We can pblend the part where elements stay not in their
49820 respective lanes only when these elements are all in one
49821 half of a permutation.
49822 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
49823 lanes, but both 8 and 9 >= 8
49824 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
49825 respective lanes and 8 >= 8, but 2 not. */
49826 if (which != 1 && which != 2)
49827 return false;
49828 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
49829 return true;
49830
49831 /* First we apply one operand permutation to the part where
49832 elements stay not in their respective lanes. */
49833 dcopy = *d;
49834 if (which == 2)
49835 dcopy.op0 = dcopy.op1 = d->op1;
49836 else
49837 dcopy.op0 = dcopy.op1 = d->op0;
49838 if (!d->testing_p)
49839 dcopy.target = gen_reg_rtx (vmode);
49840 dcopy.one_operand_p = true;
49841
49842 for (i = 0; i < nelt; ++i)
49843 dcopy.perm[i] = d->perm[i] & (nelt - 1);
49844
49845 ok = expand_vec_perm_1 (&dcopy);
49846 if (GET_MODE_SIZE (vmode) != 16 && !ok)
49847 return false;
49848 else
49849 gcc_assert (ok);
49850 if (d->testing_p)
49851 return true;
49852
49853 /* Next we put permuted elements into their positions. */
49854 dcopy1 = *d;
49855 if (which == 2)
49856 dcopy1.op1 = dcopy.target;
49857 else
49858 dcopy1.op0 = dcopy.target;
49859
49860 for (i = 0; i < nelt; ++i)
49861 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
49862
49863 ok = expand_vec_perm_blend (&dcopy1);
49864 gcc_assert (ok);
49865
49866 return true;
49867 }
49868
49869 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
49870
49871 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
49872 a two vector permutation into a single vector permutation by using
49873 an interleave operation to merge the vectors. */
49874
49875 static bool
49876 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
49877 {
49878 struct expand_vec_perm_d dremap, dfinal;
49879 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
49880 unsigned HOST_WIDE_INT contents;
49881 unsigned char remap[2 * MAX_VECT_LEN];
49882 rtx_insn *seq;
49883 bool ok, same_halves = false;
49884
49885 if (GET_MODE_SIZE (d->vmode) == 16)
49886 {
49887 if (d->one_operand_p)
49888 return false;
49889 }
49890 else if (GET_MODE_SIZE (d->vmode) == 32)
49891 {
49892 if (!TARGET_AVX)
49893 return false;
49894 /* For 32-byte modes allow even d->one_operand_p.
49895 The lack of cross-lane shuffling in some instructions
49896 might prevent a single insn shuffle. */
49897 dfinal = *d;
49898 dfinal.testing_p = true;
49899 /* If expand_vec_perm_interleave3 can expand this into
49900 a 3 insn sequence, give up and let it be expanded as
49901 3 insn sequence. While that is one insn longer,
49902 it doesn't need a memory operand and in the common
49903 case that both interleave low and high permutations
49904 with the same operands are adjacent needs 4 insns
49905 for both after CSE. */
49906 if (expand_vec_perm_interleave3 (&dfinal))
49907 return false;
49908 }
49909 else
49910 return false;
49911
49912 /* Examine from whence the elements come. */
49913 contents = 0;
49914 for (i = 0; i < nelt; ++i)
49915 contents |= HOST_WIDE_INT_1U << d->perm[i];
49916
49917 memset (remap, 0xff, sizeof (remap));
49918 dremap = *d;
49919
49920 if (GET_MODE_SIZE (d->vmode) == 16)
49921 {
49922 unsigned HOST_WIDE_INT h1, h2, h3, h4;
49923
49924 /* Split the two input vectors into 4 halves. */
49925 h1 = (HOST_WIDE_INT_1U << nelt2) - 1;
49926 h2 = h1 << nelt2;
49927 h3 = h2 << nelt2;
49928 h4 = h3 << nelt2;
49929
49930 /* If the elements from the low halves use interleave low, and similarly
49931 for interleave high. If the elements are from mis-matched halves, we
49932 can use shufps for V4SF/V4SI or do a DImode shuffle. */
49933 if ((contents & (h1 | h3)) == contents)
49934 {
49935 /* punpckl* */
49936 for (i = 0; i < nelt2; ++i)
49937 {
49938 remap[i] = i * 2;
49939 remap[i + nelt] = i * 2 + 1;
49940 dremap.perm[i * 2] = i;
49941 dremap.perm[i * 2 + 1] = i + nelt;
49942 }
49943 if (!TARGET_SSE2 && d->vmode == V4SImode)
49944 dremap.vmode = V4SFmode;
49945 }
49946 else if ((contents & (h2 | h4)) == contents)
49947 {
49948 /* punpckh* */
49949 for (i = 0; i < nelt2; ++i)
49950 {
49951 remap[i + nelt2] = i * 2;
49952 remap[i + nelt + nelt2] = i * 2 + 1;
49953 dremap.perm[i * 2] = i + nelt2;
49954 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
49955 }
49956 if (!TARGET_SSE2 && d->vmode == V4SImode)
49957 dremap.vmode = V4SFmode;
49958 }
49959 else if ((contents & (h1 | h4)) == contents)
49960 {
49961 /* shufps */
49962 for (i = 0; i < nelt2; ++i)
49963 {
49964 remap[i] = i;
49965 remap[i + nelt + nelt2] = i + nelt2;
49966 dremap.perm[i] = i;
49967 dremap.perm[i + nelt2] = i + nelt + nelt2;
49968 }
49969 if (nelt != 4)
49970 {
49971 /* shufpd */
49972 dremap.vmode = V2DImode;
49973 dremap.nelt = 2;
49974 dremap.perm[0] = 0;
49975 dremap.perm[1] = 3;
49976 }
49977 }
49978 else if ((contents & (h2 | h3)) == contents)
49979 {
49980 /* shufps */
49981 for (i = 0; i < nelt2; ++i)
49982 {
49983 remap[i + nelt2] = i;
49984 remap[i + nelt] = i + nelt2;
49985 dremap.perm[i] = i + nelt2;
49986 dremap.perm[i + nelt2] = i + nelt;
49987 }
49988 if (nelt != 4)
49989 {
49990 /* shufpd */
49991 dremap.vmode = V2DImode;
49992 dremap.nelt = 2;
49993 dremap.perm[0] = 1;
49994 dremap.perm[1] = 2;
49995 }
49996 }
49997 else
49998 return false;
49999 }
50000 else
50001 {
50002 unsigned int nelt4 = nelt / 4, nzcnt = 0;
50003 unsigned HOST_WIDE_INT q[8];
50004 unsigned int nonzero_halves[4];
50005
50006 /* Split the two input vectors into 8 quarters. */
50007 q[0] = (HOST_WIDE_INT_1U << nelt4) - 1;
50008 for (i = 1; i < 8; ++i)
50009 q[i] = q[0] << (nelt4 * i);
50010 for (i = 0; i < 4; ++i)
50011 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
50012 {
50013 nonzero_halves[nzcnt] = i;
50014 ++nzcnt;
50015 }
50016
50017 if (nzcnt == 1)
50018 {
50019 gcc_assert (d->one_operand_p);
50020 nonzero_halves[1] = nonzero_halves[0];
50021 same_halves = true;
50022 }
50023 else if (d->one_operand_p)
50024 {
50025 gcc_assert (nonzero_halves[0] == 0);
50026 gcc_assert (nonzero_halves[1] == 1);
50027 }
50028
50029 if (nzcnt <= 2)
50030 {
50031 if (d->perm[0] / nelt2 == nonzero_halves[1])
50032 {
50033 /* Attempt to increase the likelihood that dfinal
50034 shuffle will be intra-lane. */
50035 std::swap (nonzero_halves[0], nonzero_halves[1]);
50036 }
50037
50038 /* vperm2f128 or vperm2i128. */
50039 for (i = 0; i < nelt2; ++i)
50040 {
50041 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
50042 remap[i + nonzero_halves[0] * nelt2] = i;
50043 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
50044 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
50045 }
50046
50047 if (d->vmode != V8SFmode
50048 && d->vmode != V4DFmode
50049 && d->vmode != V8SImode)
50050 {
50051 dremap.vmode = V8SImode;
50052 dremap.nelt = 8;
50053 for (i = 0; i < 4; ++i)
50054 {
50055 dremap.perm[i] = i + nonzero_halves[0] * 4;
50056 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
50057 }
50058 }
50059 }
50060 else if (d->one_operand_p)
50061 return false;
50062 else if (TARGET_AVX2
50063 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
50064 {
50065 /* vpunpckl* */
50066 for (i = 0; i < nelt4; ++i)
50067 {
50068 remap[i] = i * 2;
50069 remap[i + nelt] = i * 2 + 1;
50070 remap[i + nelt2] = i * 2 + nelt2;
50071 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
50072 dremap.perm[i * 2] = i;
50073 dremap.perm[i * 2 + 1] = i + nelt;
50074 dremap.perm[i * 2 + nelt2] = i + nelt2;
50075 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
50076 }
50077 }
50078 else if (TARGET_AVX2
50079 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
50080 {
50081 /* vpunpckh* */
50082 for (i = 0; i < nelt4; ++i)
50083 {
50084 remap[i + nelt4] = i * 2;
50085 remap[i + nelt + nelt4] = i * 2 + 1;
50086 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
50087 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
50088 dremap.perm[i * 2] = i + nelt4;
50089 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
50090 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
50091 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
50092 }
50093 }
50094 else
50095 return false;
50096 }
50097
50098 /* Use the remapping array set up above to move the elements from their
50099 swizzled locations into their final destinations. */
50100 dfinal = *d;
50101 for (i = 0; i < nelt; ++i)
50102 {
50103 unsigned e = remap[d->perm[i]];
50104 gcc_assert (e < nelt);
50105 /* If same_halves is true, both halves of the remapped vector are the
50106 same. Avoid cross-lane accesses if possible. */
50107 if (same_halves && i >= nelt2)
50108 {
50109 gcc_assert (e < nelt2);
50110 dfinal.perm[i] = e + nelt2;
50111 }
50112 else
50113 dfinal.perm[i] = e;
50114 }
50115 if (!d->testing_p)
50116 {
50117 dremap.target = gen_reg_rtx (dremap.vmode);
50118 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
50119 }
50120 dfinal.op1 = dfinal.op0;
50121 dfinal.one_operand_p = true;
50122
50123 /* Test if the final remap can be done with a single insn. For V4SFmode or
50124 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
50125 start_sequence ();
50126 ok = expand_vec_perm_1 (&dfinal);
50127 seq = get_insns ();
50128 end_sequence ();
50129
50130 if (!ok)
50131 return false;
50132
50133 if (d->testing_p)
50134 return true;
50135
50136 if (dremap.vmode != dfinal.vmode)
50137 {
50138 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
50139 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
50140 }
50141
50142 ok = expand_vec_perm_1 (&dremap);
50143 gcc_assert (ok);
50144
50145 emit_insn (seq);
50146 return true;
50147 }
50148
50149 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
50150 a single vector cross-lane permutation into vpermq followed
50151 by any of the single insn permutations. */
50152
50153 static bool
50154 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
50155 {
50156 struct expand_vec_perm_d dremap, dfinal;
50157 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
50158 unsigned contents[2];
50159 bool ok;
50160
50161 if (!(TARGET_AVX2
50162 && (d->vmode == V32QImode || d->vmode == V16HImode)
50163 && d->one_operand_p))
50164 return false;
50165
50166 contents[0] = 0;
50167 contents[1] = 0;
50168 for (i = 0; i < nelt2; ++i)
50169 {
50170 contents[0] |= 1u << (d->perm[i] / nelt4);
50171 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
50172 }
50173
50174 for (i = 0; i < 2; ++i)
50175 {
50176 unsigned int cnt = 0;
50177 for (j = 0; j < 4; ++j)
50178 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
50179 return false;
50180 }
50181
50182 if (d->testing_p)
50183 return true;
50184
50185 dremap = *d;
50186 dremap.vmode = V4DImode;
50187 dremap.nelt = 4;
50188 dremap.target = gen_reg_rtx (V4DImode);
50189 dremap.op0 = gen_lowpart (V4DImode, d->op0);
50190 dremap.op1 = dremap.op0;
50191 dremap.one_operand_p = true;
50192 for (i = 0; i < 2; ++i)
50193 {
50194 unsigned int cnt = 0;
50195 for (j = 0; j < 4; ++j)
50196 if ((contents[i] & (1u << j)) != 0)
50197 dremap.perm[2 * i + cnt++] = j;
50198 for (; cnt < 2; ++cnt)
50199 dremap.perm[2 * i + cnt] = 0;
50200 }
50201
50202 dfinal = *d;
50203 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
50204 dfinal.op1 = dfinal.op0;
50205 dfinal.one_operand_p = true;
50206 for (i = 0, j = 0; i < nelt; ++i)
50207 {
50208 if (i == nelt2)
50209 j = 2;
50210 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
50211 if ((d->perm[i] / nelt4) == dremap.perm[j])
50212 ;
50213 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
50214 dfinal.perm[i] |= nelt4;
50215 else
50216 gcc_unreachable ();
50217 }
50218
50219 ok = expand_vec_perm_1 (&dremap);
50220 gcc_assert (ok);
50221
50222 ok = expand_vec_perm_1 (&dfinal);
50223 gcc_assert (ok);
50224
50225 return true;
50226 }
50227
50228 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
50229 a vector permutation using two instructions, vperm2f128 resp.
50230 vperm2i128 followed by any single in-lane permutation. */
50231
50232 static bool
50233 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
50234 {
50235 struct expand_vec_perm_d dfirst, dsecond;
50236 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
50237 bool ok;
50238
50239 if (!TARGET_AVX
50240 || GET_MODE_SIZE (d->vmode) != 32
50241 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
50242 return false;
50243
50244 dsecond = *d;
50245 dsecond.one_operand_p = false;
50246 dsecond.testing_p = true;
50247
50248 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
50249 immediate. For perm < 16 the second permutation uses
50250 d->op0 as first operand, for perm >= 16 it uses d->op1
50251 as first operand. The second operand is the result of
50252 vperm2[fi]128. */
50253 for (perm = 0; perm < 32; perm++)
50254 {
50255 /* Ignore permutations which do not move anything cross-lane. */
50256 if (perm < 16)
50257 {
50258 /* The second shuffle for e.g. V4DFmode has
50259 0123 and ABCD operands.
50260 Ignore AB23, as 23 is already in the second lane
50261 of the first operand. */
50262 if ((perm & 0xc) == (1 << 2)) continue;
50263 /* And 01CD, as 01 is in the first lane of the first
50264 operand. */
50265 if ((perm & 3) == 0) continue;
50266 /* And 4567, as then the vperm2[fi]128 doesn't change
50267 anything on the original 4567 second operand. */
50268 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
50269 }
50270 else
50271 {
50272 /* The second shuffle for e.g. V4DFmode has
50273 4567 and ABCD operands.
50274 Ignore AB67, as 67 is already in the second lane
50275 of the first operand. */
50276 if ((perm & 0xc) == (3 << 2)) continue;
50277 /* And 45CD, as 45 is in the first lane of the first
50278 operand. */
50279 if ((perm & 3) == 2) continue;
50280 /* And 0123, as then the vperm2[fi]128 doesn't change
50281 anything on the original 0123 first operand. */
50282 if ((perm & 0xf) == (1 << 2)) continue;
50283 }
50284
50285 for (i = 0; i < nelt; i++)
50286 {
50287 j = d->perm[i] / nelt2;
50288 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
50289 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
50290 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
50291 dsecond.perm[i] = d->perm[i] & (nelt - 1);
50292 else
50293 break;
50294 }
50295
50296 if (i == nelt)
50297 {
50298 start_sequence ();
50299 ok = expand_vec_perm_1 (&dsecond);
50300 end_sequence ();
50301 }
50302 else
50303 ok = false;
50304
50305 if (ok)
50306 {
50307 if (d->testing_p)
50308 return true;
50309
50310 /* Found a usable second shuffle. dfirst will be
50311 vperm2f128 on d->op0 and d->op1. */
50312 dsecond.testing_p = false;
50313 dfirst = *d;
50314 dfirst.target = gen_reg_rtx (d->vmode);
50315 for (i = 0; i < nelt; i++)
50316 dfirst.perm[i] = (i & (nelt2 - 1))
50317 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
50318
50319 canonicalize_perm (&dfirst);
50320 ok = expand_vec_perm_1 (&dfirst);
50321 gcc_assert (ok);
50322
50323 /* And dsecond is some single insn shuffle, taking
50324 d->op0 and result of vperm2f128 (if perm < 16) or
50325 d->op1 and result of vperm2f128 (otherwise). */
50326 if (perm >= 16)
50327 dsecond.op0 = dsecond.op1;
50328 dsecond.op1 = dfirst.target;
50329
50330 ok = expand_vec_perm_1 (&dsecond);
50331 gcc_assert (ok);
50332
50333 return true;
50334 }
50335
50336 /* For one operand, the only useful vperm2f128 permutation is 0x01
50337 aka lanes swap. */
50338 if (d->one_operand_p)
50339 return false;
50340 }
50341
50342 return false;
50343 }
50344
50345 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
50346 a two vector permutation using 2 intra-lane interleave insns
50347 and cross-lane shuffle for 32-byte vectors. */
50348
50349 static bool
50350 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
50351 {
50352 unsigned i, nelt;
50353 rtx (*gen) (rtx, rtx, rtx);
50354
50355 if (d->one_operand_p)
50356 return false;
50357 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
50358 ;
50359 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
50360 ;
50361 else
50362 return false;
50363
50364 nelt = d->nelt;
50365 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
50366 return false;
50367 for (i = 0; i < nelt; i += 2)
50368 if (d->perm[i] != d->perm[0] + i / 2
50369 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
50370 return false;
50371
50372 if (d->testing_p)
50373 return true;
50374
50375 switch (d->vmode)
50376 {
50377 case V32QImode:
50378 if (d->perm[0])
50379 gen = gen_vec_interleave_highv32qi;
50380 else
50381 gen = gen_vec_interleave_lowv32qi;
50382 break;
50383 case V16HImode:
50384 if (d->perm[0])
50385 gen = gen_vec_interleave_highv16hi;
50386 else
50387 gen = gen_vec_interleave_lowv16hi;
50388 break;
50389 case V8SImode:
50390 if (d->perm[0])
50391 gen = gen_vec_interleave_highv8si;
50392 else
50393 gen = gen_vec_interleave_lowv8si;
50394 break;
50395 case V4DImode:
50396 if (d->perm[0])
50397 gen = gen_vec_interleave_highv4di;
50398 else
50399 gen = gen_vec_interleave_lowv4di;
50400 break;
50401 case V8SFmode:
50402 if (d->perm[0])
50403 gen = gen_vec_interleave_highv8sf;
50404 else
50405 gen = gen_vec_interleave_lowv8sf;
50406 break;
50407 case V4DFmode:
50408 if (d->perm[0])
50409 gen = gen_vec_interleave_highv4df;
50410 else
50411 gen = gen_vec_interleave_lowv4df;
50412 break;
50413 default:
50414 gcc_unreachable ();
50415 }
50416
50417 emit_insn (gen (d->target, d->op0, d->op1));
50418 return true;
50419 }
50420
50421 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
50422 a single vector permutation using a single intra-lane vector
50423 permutation, vperm2f128 swapping the lanes and vblend* insn blending
50424 the non-swapped and swapped vectors together. */
50425
50426 static bool
50427 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
50428 {
50429 struct expand_vec_perm_d dfirst, dsecond;
50430 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
50431 rtx_insn *seq;
50432 bool ok;
50433 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
50434
50435 if (!TARGET_AVX
50436 || TARGET_AVX2
50437 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
50438 || !d->one_operand_p)
50439 return false;
50440
50441 dfirst = *d;
50442 for (i = 0; i < nelt; i++)
50443 dfirst.perm[i] = 0xff;
50444 for (i = 0, msk = 0; i < nelt; i++)
50445 {
50446 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
50447 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
50448 return false;
50449 dfirst.perm[j] = d->perm[i];
50450 if (j != i)
50451 msk |= (1 << i);
50452 }
50453 for (i = 0; i < nelt; i++)
50454 if (dfirst.perm[i] == 0xff)
50455 dfirst.perm[i] = i;
50456
50457 if (!d->testing_p)
50458 dfirst.target = gen_reg_rtx (dfirst.vmode);
50459
50460 start_sequence ();
50461 ok = expand_vec_perm_1 (&dfirst);
50462 seq = get_insns ();
50463 end_sequence ();
50464
50465 if (!ok)
50466 return false;
50467
50468 if (d->testing_p)
50469 return true;
50470
50471 emit_insn (seq);
50472
50473 dsecond = *d;
50474 dsecond.op0 = dfirst.target;
50475 dsecond.op1 = dfirst.target;
50476 dsecond.one_operand_p = true;
50477 dsecond.target = gen_reg_rtx (dsecond.vmode);
50478 for (i = 0; i < nelt; i++)
50479 dsecond.perm[i] = i ^ nelt2;
50480
50481 ok = expand_vec_perm_1 (&dsecond);
50482 gcc_assert (ok);
50483
50484 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
50485 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
50486 return true;
50487 }
50488
50489 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
50490 permutation using two vperm2f128, followed by a vshufpd insn blending
50491 the two vectors together. */
50492
50493 static bool
50494 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
50495 {
50496 struct expand_vec_perm_d dfirst, dsecond, dthird;
50497 bool ok;
50498
50499 if (!TARGET_AVX || (d->vmode != V4DFmode))
50500 return false;
50501
50502 if (d->testing_p)
50503 return true;
50504
50505 dfirst = *d;
50506 dsecond = *d;
50507 dthird = *d;
50508
50509 dfirst.perm[0] = (d->perm[0] & ~1);
50510 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
50511 dfirst.perm[2] = (d->perm[2] & ~1);
50512 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
50513 dsecond.perm[0] = (d->perm[1] & ~1);
50514 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
50515 dsecond.perm[2] = (d->perm[3] & ~1);
50516 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
50517 dthird.perm[0] = (d->perm[0] % 2);
50518 dthird.perm[1] = (d->perm[1] % 2) + 4;
50519 dthird.perm[2] = (d->perm[2] % 2) + 2;
50520 dthird.perm[3] = (d->perm[3] % 2) + 6;
50521
50522 dfirst.target = gen_reg_rtx (dfirst.vmode);
50523 dsecond.target = gen_reg_rtx (dsecond.vmode);
50524 dthird.op0 = dfirst.target;
50525 dthird.op1 = dsecond.target;
50526 dthird.one_operand_p = false;
50527
50528 canonicalize_perm (&dfirst);
50529 canonicalize_perm (&dsecond);
50530
50531 ok = expand_vec_perm_1 (&dfirst)
50532 && expand_vec_perm_1 (&dsecond)
50533 && expand_vec_perm_1 (&dthird);
50534
50535 gcc_assert (ok);
50536
50537 return true;
50538 }
50539
50540 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
50541 permutation with two pshufb insns and an ior. We should have already
50542 failed all two instruction sequences. */
50543
50544 static bool
50545 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
50546 {
50547 rtx rperm[2][16], vperm, l, h, op, m128;
50548 unsigned int i, nelt, eltsz;
50549
50550 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
50551 return false;
50552 gcc_assert (!d->one_operand_p);
50553
50554 if (d->testing_p)
50555 return true;
50556
50557 nelt = d->nelt;
50558 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
50559
50560 /* Generate two permutation masks. If the required element is within
50561 the given vector it is shuffled into the proper lane. If the required
50562 element is in the other vector, force a zero into the lane by setting
50563 bit 7 in the permutation mask. */
50564 m128 = GEN_INT (-128);
50565 for (i = 0; i < nelt; ++i)
50566 {
50567 unsigned j, e = d->perm[i];
50568 unsigned which = (e >= nelt);
50569 if (e >= nelt)
50570 e -= nelt;
50571
50572 for (j = 0; j < eltsz; ++j)
50573 {
50574 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
50575 rperm[1-which][i*eltsz + j] = m128;
50576 }
50577 }
50578
50579 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
50580 vperm = force_reg (V16QImode, vperm);
50581
50582 l = gen_reg_rtx (V16QImode);
50583 op = gen_lowpart (V16QImode, d->op0);
50584 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
50585
50586 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
50587 vperm = force_reg (V16QImode, vperm);
50588
50589 h = gen_reg_rtx (V16QImode);
50590 op = gen_lowpart (V16QImode, d->op1);
50591 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
50592
50593 op = d->target;
50594 if (d->vmode != V16QImode)
50595 op = gen_reg_rtx (V16QImode);
50596 emit_insn (gen_iorv16qi3 (op, l, h));
50597 if (op != d->target)
50598 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
50599
50600 return true;
50601 }
50602
50603 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
50604 with two vpshufb insns, vpermq and vpor. We should have already failed
50605 all two or three instruction sequences. */
50606
50607 static bool
50608 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
50609 {
50610 rtx rperm[2][32], vperm, l, h, hp, op, m128;
50611 unsigned int i, nelt, eltsz;
50612
50613 if (!TARGET_AVX2
50614 || !d->one_operand_p
50615 || (d->vmode != V32QImode && d->vmode != V16HImode))
50616 return false;
50617
50618 if (d->testing_p)
50619 return true;
50620
50621 nelt = d->nelt;
50622 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
50623
50624 /* Generate two permutation masks. If the required element is within
50625 the same lane, it is shuffled in. If the required element from the
50626 other lane, force a zero by setting bit 7 in the permutation mask.
50627 In the other mask the mask has non-negative elements if element
50628 is requested from the other lane, but also moved to the other lane,
50629 so that the result of vpshufb can have the two V2TImode halves
50630 swapped. */
50631 m128 = GEN_INT (-128);
50632 for (i = 0; i < nelt; ++i)
50633 {
50634 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
50635 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
50636
50637 for (j = 0; j < eltsz; ++j)
50638 {
50639 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
50640 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
50641 }
50642 }
50643
50644 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
50645 vperm = force_reg (V32QImode, vperm);
50646
50647 h = gen_reg_rtx (V32QImode);
50648 op = gen_lowpart (V32QImode, d->op0);
50649 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
50650
50651 /* Swap the 128-byte lanes of h into hp. */
50652 hp = gen_reg_rtx (V4DImode);
50653 op = gen_lowpart (V4DImode, h);
50654 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
50655 const1_rtx));
50656
50657 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
50658 vperm = force_reg (V32QImode, vperm);
50659
50660 l = gen_reg_rtx (V32QImode);
50661 op = gen_lowpart (V32QImode, d->op0);
50662 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
50663
50664 op = d->target;
50665 if (d->vmode != V32QImode)
50666 op = gen_reg_rtx (V32QImode);
50667 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
50668 if (op != d->target)
50669 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
50670
50671 return true;
50672 }
50673
50674 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
50675 and extract-odd permutations of two V32QImode and V16QImode operand
50676 with two vpshufb insns, vpor and vpermq. We should have already
50677 failed all two or three instruction sequences. */
50678
50679 static bool
50680 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
50681 {
50682 rtx rperm[2][32], vperm, l, h, ior, op, m128;
50683 unsigned int i, nelt, eltsz;
50684
50685 if (!TARGET_AVX2
50686 || d->one_operand_p
50687 || (d->vmode != V32QImode && d->vmode != V16HImode))
50688 return false;
50689
50690 for (i = 0; i < d->nelt; ++i)
50691 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
50692 return false;
50693
50694 if (d->testing_p)
50695 return true;
50696
50697 nelt = d->nelt;
50698 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
50699
50700 /* Generate two permutation masks. In the first permutation mask
50701 the first quarter will contain indexes for the first half
50702 of the op0, the second quarter will contain bit 7 set, third quarter
50703 will contain indexes for the second half of the op0 and the
50704 last quarter bit 7 set. In the second permutation mask
50705 the first quarter will contain bit 7 set, the second quarter
50706 indexes for the first half of the op1, the third quarter bit 7 set
50707 and last quarter indexes for the second half of the op1.
50708 I.e. the first mask e.g. for V32QImode extract even will be:
50709 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
50710 (all values masked with 0xf except for -128) and second mask
50711 for extract even will be
50712 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
50713 m128 = GEN_INT (-128);
50714 for (i = 0; i < nelt; ++i)
50715 {
50716 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
50717 unsigned which = d->perm[i] >= nelt;
50718 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
50719
50720 for (j = 0; j < eltsz; ++j)
50721 {
50722 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
50723 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
50724 }
50725 }
50726
50727 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
50728 vperm = force_reg (V32QImode, vperm);
50729
50730 l = gen_reg_rtx (V32QImode);
50731 op = gen_lowpart (V32QImode, d->op0);
50732 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
50733
50734 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
50735 vperm = force_reg (V32QImode, vperm);
50736
50737 h = gen_reg_rtx (V32QImode);
50738 op = gen_lowpart (V32QImode, d->op1);
50739 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
50740
50741 ior = gen_reg_rtx (V32QImode);
50742 emit_insn (gen_iorv32qi3 (ior, l, h));
50743
50744 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
50745 op = gen_reg_rtx (V4DImode);
50746 ior = gen_lowpart (V4DImode, ior);
50747 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
50748 const1_rtx, GEN_INT (3)));
50749 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
50750
50751 return true;
50752 }
50753
50754 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
50755 and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
50756 with two "and" and "pack" or two "shift" and "pack" insns. We should
50757 have already failed all two instruction sequences. */
50758
50759 static bool
50760 expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
50761 {
50762 rtx op, dop0, dop1, t, rperm[16];
50763 unsigned i, odd, c, s, nelt = d->nelt;
50764 bool end_perm = false;
50765 machine_mode half_mode;
50766 rtx (*gen_and) (rtx, rtx, rtx);
50767 rtx (*gen_pack) (rtx, rtx, rtx);
50768 rtx (*gen_shift) (rtx, rtx, rtx);
50769
50770 if (d->one_operand_p)
50771 return false;
50772
50773 switch (d->vmode)
50774 {
50775 case V8HImode:
50776 /* Required for "pack". */
50777 if (!TARGET_SSE4_1)
50778 return false;
50779 c = 0xffff;
50780 s = 16;
50781 half_mode = V4SImode;
50782 gen_and = gen_andv4si3;
50783 gen_pack = gen_sse4_1_packusdw;
50784 gen_shift = gen_lshrv4si3;
50785 break;
50786 case V16QImode:
50787 /* No check as all instructions are SSE2. */
50788 c = 0xff;
50789 s = 8;
50790 half_mode = V8HImode;
50791 gen_and = gen_andv8hi3;
50792 gen_pack = gen_sse2_packuswb;
50793 gen_shift = gen_lshrv8hi3;
50794 break;
50795 case V16HImode:
50796 if (!TARGET_AVX2)
50797 return false;
50798 c = 0xffff;
50799 s = 16;
50800 half_mode = V8SImode;
50801 gen_and = gen_andv8si3;
50802 gen_pack = gen_avx2_packusdw;
50803 gen_shift = gen_lshrv8si3;
50804 end_perm = true;
50805 break;
50806 case V32QImode:
50807 if (!TARGET_AVX2)
50808 return false;
50809 c = 0xff;
50810 s = 8;
50811 half_mode = V16HImode;
50812 gen_and = gen_andv16hi3;
50813 gen_pack = gen_avx2_packuswb;
50814 gen_shift = gen_lshrv16hi3;
50815 end_perm = true;
50816 break;
50817 default:
50818 /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
50819 general shuffles. */
50820 return false;
50821 }
50822
50823 /* Check that permutation is even or odd. */
50824 odd = d->perm[0];
50825 if (odd > 1)
50826 return false;
50827
50828 for (i = 1; i < nelt; ++i)
50829 if (d->perm[i] != 2 * i + odd)
50830 return false;
50831
50832 if (d->testing_p)
50833 return true;
50834
50835 dop0 = gen_reg_rtx (half_mode);
50836 dop1 = gen_reg_rtx (half_mode);
50837 if (odd == 0)
50838 {
50839 for (i = 0; i < nelt / 2; i++)
50840 rperm[i] = GEN_INT (c);
50841 t = gen_rtx_CONST_VECTOR (half_mode, gen_rtvec_v (nelt / 2, rperm));
50842 t = force_reg (half_mode, t);
50843 emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0)));
50844 emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1)));
50845 }
50846 else
50847 {
50848 emit_insn (gen_shift (dop0,
50849 gen_lowpart (half_mode, d->op0),
50850 GEN_INT (s)));
50851 emit_insn (gen_shift (dop1,
50852 gen_lowpart (half_mode, d->op1),
50853 GEN_INT (s)));
50854 }
50855 /* In AVX2 for 256 bit case we need to permute pack result. */
50856 if (TARGET_AVX2 && end_perm)
50857 {
50858 op = gen_reg_rtx (d->vmode);
50859 t = gen_reg_rtx (V4DImode);
50860 emit_insn (gen_pack (op, dop0, dop1));
50861 emit_insn (gen_avx2_permv4di_1 (t,
50862 gen_lowpart (V4DImode, op),
50863 const0_rtx,
50864 const2_rtx,
50865 const1_rtx,
50866 GEN_INT (3)));
50867 emit_move_insn (d->target, gen_lowpart (d->vmode, t));
50868 }
50869 else
50870 emit_insn (gen_pack (d->target, dop0, dop1));
50871
50872 return true;
50873 }
50874
50875 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
50876 and extract-odd permutations of two V64QI operands
50877 with two "shifts", two "truncs" and one "concat" insns for "odd"
50878 and two "truncs" and one concat insn for "even."
50879 Have already failed all two instruction sequences. */
50880
50881 static bool
50882 expand_vec_perm_even_odd_trunc (struct expand_vec_perm_d *d)
50883 {
50884 rtx t1, t2, t3, t4;
50885 unsigned i, odd, nelt = d->nelt;
50886
50887 if (!TARGET_AVX512BW
50888 || d->one_operand_p
50889 || d->vmode != V64QImode)
50890 return false;
50891
50892 /* Check that permutation is even or odd. */
50893 odd = d->perm[0];
50894 if (odd > 1)
50895 return false;
50896
50897 for (i = 1; i < nelt; ++i)
50898 if (d->perm[i] != 2 * i + odd)
50899 return false;
50900
50901 if (d->testing_p)
50902 return true;
50903
50904
50905 if (odd)
50906 {
50907 t1 = gen_reg_rtx (V32HImode);
50908 t2 = gen_reg_rtx (V32HImode);
50909 emit_insn (gen_lshrv32hi3 (t1,
50910 gen_lowpart (V32HImode, d->op0),
50911 GEN_INT (8)));
50912 emit_insn (gen_lshrv32hi3 (t2,
50913 gen_lowpart (V32HImode, d->op1),
50914 GEN_INT (8)));
50915 }
50916 else
50917 {
50918 t1 = gen_lowpart (V32HImode, d->op0);
50919 t2 = gen_lowpart (V32HImode, d->op1);
50920 }
50921
50922 t3 = gen_reg_rtx (V32QImode);
50923 t4 = gen_reg_rtx (V32QImode);
50924 emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t3, t1));
50925 emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t4, t2));
50926 emit_insn (gen_avx_vec_concatv64qi (d->target, t3, t4));
50927
50928 return true;
50929 }
50930
50931 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
50932 and extract-odd permutations. */
50933
50934 static bool
50935 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
50936 {
50937 rtx t1, t2, t3, t4, t5;
50938
50939 switch (d->vmode)
50940 {
50941 case V4DFmode:
50942 if (d->testing_p)
50943 break;
50944 t1 = gen_reg_rtx (V4DFmode);
50945 t2 = gen_reg_rtx (V4DFmode);
50946
50947 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
50948 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
50949 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
50950
50951 /* Now an unpck[lh]pd will produce the result required. */
50952 if (odd)
50953 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
50954 else
50955 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
50956 emit_insn (t3);
50957 break;
50958
50959 case V8SFmode:
50960 {
50961 int mask = odd ? 0xdd : 0x88;
50962
50963 if (d->testing_p)
50964 break;
50965 t1 = gen_reg_rtx (V8SFmode);
50966 t2 = gen_reg_rtx (V8SFmode);
50967 t3 = gen_reg_rtx (V8SFmode);
50968
50969 /* Shuffle within the 128-bit lanes to produce:
50970 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
50971 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
50972 GEN_INT (mask)));
50973
50974 /* Shuffle the lanes around to produce:
50975 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
50976 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
50977 GEN_INT (0x3)));
50978
50979 /* Shuffle within the 128-bit lanes to produce:
50980 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
50981 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
50982
50983 /* Shuffle within the 128-bit lanes to produce:
50984 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
50985 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
50986
50987 /* Shuffle the lanes around to produce:
50988 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
50989 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
50990 GEN_INT (0x20)));
50991 }
50992 break;
50993
50994 case V2DFmode:
50995 case V4SFmode:
50996 case V2DImode:
50997 case V4SImode:
50998 /* These are always directly implementable by expand_vec_perm_1. */
50999 gcc_unreachable ();
51000
51001 case V8HImode:
51002 if (TARGET_SSE4_1)
51003 return expand_vec_perm_even_odd_pack (d);
51004 else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
51005 return expand_vec_perm_pshufb2 (d);
51006 else
51007 {
51008 if (d->testing_p)
51009 break;
51010 /* We need 2*log2(N)-1 operations to achieve odd/even
51011 with interleave. */
51012 t1 = gen_reg_rtx (V8HImode);
51013 t2 = gen_reg_rtx (V8HImode);
51014 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
51015 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
51016 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
51017 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
51018 if (odd)
51019 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
51020 else
51021 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
51022 emit_insn (t3);
51023 }
51024 break;
51025
51026 case V16QImode:
51027 return expand_vec_perm_even_odd_pack (d);
51028
51029 case V16HImode:
51030 case V32QImode:
51031 return expand_vec_perm_even_odd_pack (d);
51032
51033 case V64QImode:
51034 return expand_vec_perm_even_odd_trunc (d);
51035
51036 case V4DImode:
51037 if (!TARGET_AVX2)
51038 {
51039 struct expand_vec_perm_d d_copy = *d;
51040 d_copy.vmode = V4DFmode;
51041 if (d->testing_p)
51042 d_copy.target = gen_raw_REG (V4DFmode, LAST_VIRTUAL_REGISTER + 1);
51043 else
51044 d_copy.target = gen_reg_rtx (V4DFmode);
51045 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
51046 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
51047 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
51048 {
51049 if (!d->testing_p)
51050 emit_move_insn (d->target,
51051 gen_lowpart (V4DImode, d_copy.target));
51052 return true;
51053 }
51054 return false;
51055 }
51056
51057 if (d->testing_p)
51058 break;
51059
51060 t1 = gen_reg_rtx (V4DImode);
51061 t2 = gen_reg_rtx (V4DImode);
51062
51063 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
51064 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
51065 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
51066
51067 /* Now an vpunpck[lh]qdq will produce the result required. */
51068 if (odd)
51069 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
51070 else
51071 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
51072 emit_insn (t3);
51073 break;
51074
51075 case V8SImode:
51076 if (!TARGET_AVX2)
51077 {
51078 struct expand_vec_perm_d d_copy = *d;
51079 d_copy.vmode = V8SFmode;
51080 if (d->testing_p)
51081 d_copy.target = gen_raw_REG (V8SFmode, LAST_VIRTUAL_REGISTER + 1);
51082 else
51083 d_copy.target = gen_reg_rtx (V8SFmode);
51084 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
51085 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
51086 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
51087 {
51088 if (!d->testing_p)
51089 emit_move_insn (d->target,
51090 gen_lowpart (V8SImode, d_copy.target));
51091 return true;
51092 }
51093 return false;
51094 }
51095
51096 if (d->testing_p)
51097 break;
51098
51099 t1 = gen_reg_rtx (V8SImode);
51100 t2 = gen_reg_rtx (V8SImode);
51101 t3 = gen_reg_rtx (V4DImode);
51102 t4 = gen_reg_rtx (V4DImode);
51103 t5 = gen_reg_rtx (V4DImode);
51104
51105 /* Shuffle the lanes around into
51106 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
51107 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
51108 gen_lowpart (V4DImode, d->op1),
51109 GEN_INT (0x20)));
51110 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
51111 gen_lowpart (V4DImode, d->op1),
51112 GEN_INT (0x31)));
51113
51114 /* Swap the 2nd and 3rd position in each lane into
51115 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
51116 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
51117 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
51118 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
51119 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
51120
51121 /* Now an vpunpck[lh]qdq will produce
51122 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
51123 if (odd)
51124 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
51125 gen_lowpart (V4DImode, t2));
51126 else
51127 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
51128 gen_lowpart (V4DImode, t2));
51129 emit_insn (t3);
51130 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
51131 break;
51132
51133 default:
51134 gcc_unreachable ();
51135 }
51136
51137 return true;
51138 }
51139
51140 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
51141 extract-even and extract-odd permutations. */
51142
51143 static bool
51144 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
51145 {
51146 unsigned i, odd, nelt = d->nelt;
51147
51148 odd = d->perm[0];
51149 if (odd != 0 && odd != 1)
51150 return false;
51151
51152 for (i = 1; i < nelt; ++i)
51153 if (d->perm[i] != 2 * i + odd)
51154 return false;
51155
51156 return expand_vec_perm_even_odd_1 (d, odd);
51157 }
51158
51159 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
51160 permutations. We assume that expand_vec_perm_1 has already failed. */
51161
51162 static bool
51163 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
51164 {
51165 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
51166 machine_mode vmode = d->vmode;
51167 unsigned char perm2[4];
51168 rtx op0 = d->op0, dest;
51169 bool ok;
51170
51171 switch (vmode)
51172 {
51173 case V4DFmode:
51174 case V8SFmode:
51175 /* These are special-cased in sse.md so that we can optionally
51176 use the vbroadcast instruction. They expand to two insns
51177 if the input happens to be in a register. */
51178 gcc_unreachable ();
51179
51180 case V2DFmode:
51181 case V2DImode:
51182 case V4SFmode:
51183 case V4SImode:
51184 /* These are always implementable using standard shuffle patterns. */
51185 gcc_unreachable ();
51186
51187 case V8HImode:
51188 case V16QImode:
51189 /* These can be implemented via interleave. We save one insn by
51190 stopping once we have promoted to V4SImode and then use pshufd. */
51191 if (d->testing_p)
51192 return true;
51193 do
51194 {
51195 rtx dest;
51196 rtx (*gen) (rtx, rtx, rtx)
51197 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
51198 : gen_vec_interleave_lowv8hi;
51199
51200 if (elt >= nelt2)
51201 {
51202 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
51203 : gen_vec_interleave_highv8hi;
51204 elt -= nelt2;
51205 }
51206 nelt2 /= 2;
51207
51208 dest = gen_reg_rtx (vmode);
51209 emit_insn (gen (dest, op0, op0));
51210 vmode = get_mode_wider_vector (vmode);
51211 op0 = gen_lowpart (vmode, dest);
51212 }
51213 while (vmode != V4SImode);
51214
51215 memset (perm2, elt, 4);
51216 dest = gen_reg_rtx (V4SImode);
51217 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
51218 gcc_assert (ok);
51219 if (!d->testing_p)
51220 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
51221 return true;
51222
51223 case V64QImode:
51224 case V32QImode:
51225 case V16HImode:
51226 case V8SImode:
51227 case V4DImode:
51228 /* For AVX2 broadcasts of the first element vpbroadcast* or
51229 vpermq should be used by expand_vec_perm_1. */
51230 gcc_assert (!TARGET_AVX2 || d->perm[0]);
51231 return false;
51232
51233 default:
51234 gcc_unreachable ();
51235 }
51236 }
51237
51238 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
51239 broadcast permutations. */
51240
51241 static bool
51242 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
51243 {
51244 unsigned i, elt, nelt = d->nelt;
51245
51246 if (!d->one_operand_p)
51247 return false;
51248
51249 elt = d->perm[0];
51250 for (i = 1; i < nelt; ++i)
51251 if (d->perm[i] != elt)
51252 return false;
51253
51254 return expand_vec_perm_broadcast_1 (d);
51255 }
51256
51257 /* Implement arbitrary permutations of two V64QImode operands
51258 will 2 vpermi2w, 2 vpshufb and one vpor instruction. */
51259 static bool
51260 expand_vec_perm_vpermi2_vpshub2 (struct expand_vec_perm_d *d)
51261 {
51262 if (!TARGET_AVX512BW || !(d->vmode == V64QImode))
51263 return false;
51264
51265 if (d->testing_p)
51266 return true;
51267
51268 struct expand_vec_perm_d ds[2];
51269 rtx rperm[128], vperm, target0, target1;
51270 unsigned int i, nelt;
51271 machine_mode vmode;
51272
51273 nelt = d->nelt;
51274 vmode = V64QImode;
51275
51276 for (i = 0; i < 2; i++)
51277 {
51278 ds[i] = *d;
51279 ds[i].vmode = V32HImode;
51280 ds[i].nelt = 32;
51281 ds[i].target = gen_reg_rtx (V32HImode);
51282 ds[i].op0 = gen_lowpart (V32HImode, d->op0);
51283 ds[i].op1 = gen_lowpart (V32HImode, d->op1);
51284 }
51285
51286 /* Prepare permutations such that the first one takes care of
51287 putting the even bytes into the right positions or one higher
51288 positions (ds[0]) and the second one takes care of
51289 putting the odd bytes into the right positions or one below
51290 (ds[1]). */
51291
51292 for (i = 0; i < nelt; i++)
51293 {
51294 ds[i & 1].perm[i / 2] = d->perm[i] / 2;
51295 if (i & 1)
51296 {
51297 rperm[i] = constm1_rtx;
51298 rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1));
51299 }
51300 else
51301 {
51302 rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1));
51303 rperm[i + 64] = constm1_rtx;
51304 }
51305 }
51306
51307 bool ok = expand_vec_perm_1 (&ds[0]);
51308 gcc_assert (ok);
51309 ds[0].target = gen_lowpart (V64QImode, ds[0].target);
51310
51311 ok = expand_vec_perm_1 (&ds[1]);
51312 gcc_assert (ok);
51313 ds[1].target = gen_lowpart (V64QImode, ds[1].target);
51314
51315 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm));
51316 vperm = force_reg (vmode, vperm);
51317 target0 = gen_reg_rtx (V64QImode);
51318 emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm));
51319
51320 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64));
51321 vperm = force_reg (vmode, vperm);
51322 target1 = gen_reg_rtx (V64QImode);
51323 emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm));
51324
51325 emit_insn (gen_iorv64qi3 (d->target, target0, target1));
51326 return true;
51327 }
51328
51329 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
51330 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
51331 all the shorter instruction sequences. */
51332
51333 static bool
51334 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
51335 {
51336 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
51337 unsigned int i, nelt, eltsz;
51338 bool used[4];
51339
51340 if (!TARGET_AVX2
51341 || d->one_operand_p
51342 || (d->vmode != V32QImode && d->vmode != V16HImode))
51343 return false;
51344
51345 if (d->testing_p)
51346 return true;
51347
51348 nelt = d->nelt;
51349 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
51350
51351 /* Generate 4 permutation masks. If the required element is within
51352 the same lane, it is shuffled in. If the required element from the
51353 other lane, force a zero by setting bit 7 in the permutation mask.
51354 In the other mask the mask has non-negative elements if element
51355 is requested from the other lane, but also moved to the other lane,
51356 so that the result of vpshufb can have the two V2TImode halves
51357 swapped. */
51358 m128 = GEN_INT (-128);
51359 for (i = 0; i < 32; ++i)
51360 {
51361 rperm[0][i] = m128;
51362 rperm[1][i] = m128;
51363 rperm[2][i] = m128;
51364 rperm[3][i] = m128;
51365 }
51366 used[0] = false;
51367 used[1] = false;
51368 used[2] = false;
51369 used[3] = false;
51370 for (i = 0; i < nelt; ++i)
51371 {
51372 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
51373 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
51374 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
51375
51376 for (j = 0; j < eltsz; ++j)
51377 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
51378 used[which] = true;
51379 }
51380
51381 for (i = 0; i < 2; ++i)
51382 {
51383 if (!used[2 * i + 1])
51384 {
51385 h[i] = NULL_RTX;
51386 continue;
51387 }
51388 vperm = gen_rtx_CONST_VECTOR (V32QImode,
51389 gen_rtvec_v (32, rperm[2 * i + 1]));
51390 vperm = force_reg (V32QImode, vperm);
51391 h[i] = gen_reg_rtx (V32QImode);
51392 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
51393 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
51394 }
51395
51396 /* Swap the 128-byte lanes of h[X]. */
51397 for (i = 0; i < 2; ++i)
51398 {
51399 if (h[i] == NULL_RTX)
51400 continue;
51401 op = gen_reg_rtx (V4DImode);
51402 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
51403 const2_rtx, GEN_INT (3), const0_rtx,
51404 const1_rtx));
51405 h[i] = gen_lowpart (V32QImode, op);
51406 }
51407
51408 for (i = 0; i < 2; ++i)
51409 {
51410 if (!used[2 * i])
51411 {
51412 l[i] = NULL_RTX;
51413 continue;
51414 }
51415 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
51416 vperm = force_reg (V32QImode, vperm);
51417 l[i] = gen_reg_rtx (V32QImode);
51418 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
51419 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
51420 }
51421
51422 for (i = 0; i < 2; ++i)
51423 {
51424 if (h[i] && l[i])
51425 {
51426 op = gen_reg_rtx (V32QImode);
51427 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
51428 l[i] = op;
51429 }
51430 else if (h[i])
51431 l[i] = h[i];
51432 }
51433
51434 gcc_assert (l[0] && l[1]);
51435 op = d->target;
51436 if (d->vmode != V32QImode)
51437 op = gen_reg_rtx (V32QImode);
51438 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
51439 if (op != d->target)
51440 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
51441 return true;
51442 }
51443
51444 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
51445 With all of the interface bits taken care of, perform the expansion
51446 in D and return true on success. */
51447
51448 static bool
51449 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
51450 {
51451 /* Try a single instruction expansion. */
51452 if (expand_vec_perm_1 (d))
51453 return true;
51454
51455 /* Try sequences of two instructions. */
51456
51457 if (expand_vec_perm_pshuflw_pshufhw (d))
51458 return true;
51459
51460 if (expand_vec_perm_palignr (d, false))
51461 return true;
51462
51463 if (expand_vec_perm_interleave2 (d))
51464 return true;
51465
51466 if (expand_vec_perm_broadcast (d))
51467 return true;
51468
51469 if (expand_vec_perm_vpermq_perm_1 (d))
51470 return true;
51471
51472 if (expand_vec_perm_vperm2f128 (d))
51473 return true;
51474
51475 if (expand_vec_perm_pblendv (d))
51476 return true;
51477
51478 /* Try sequences of three instructions. */
51479
51480 if (expand_vec_perm_even_odd_pack (d))
51481 return true;
51482
51483 if (expand_vec_perm_2vperm2f128_vshuf (d))
51484 return true;
51485
51486 if (expand_vec_perm_pshufb2 (d))
51487 return true;
51488
51489 if (expand_vec_perm_interleave3 (d))
51490 return true;
51491
51492 if (expand_vec_perm_vperm2f128_vblend (d))
51493 return true;
51494
51495 /* Try sequences of four instructions. */
51496
51497 if (expand_vec_perm_even_odd_trunc (d))
51498 return true;
51499 if (expand_vec_perm_vpshufb2_vpermq (d))
51500 return true;
51501
51502 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
51503 return true;
51504
51505 if (expand_vec_perm_vpermi2_vpshub2 (d))
51506 return true;
51507
51508 /* ??? Look for narrow permutations whose element orderings would
51509 allow the promotion to a wider mode. */
51510
51511 /* ??? Look for sequences of interleave or a wider permute that place
51512 the data into the correct lanes for a half-vector shuffle like
51513 pshuf[lh]w or vpermilps. */
51514
51515 /* ??? Look for sequences of interleave that produce the desired results.
51516 The combinatorics of punpck[lh] get pretty ugly... */
51517
51518 if (expand_vec_perm_even_odd (d))
51519 return true;
51520
51521 /* Even longer sequences. */
51522 if (expand_vec_perm_vpshufb4_vpermq2 (d))
51523 return true;
51524
51525 /* See if we can get the same permutation in different vector integer
51526 mode. */
51527 struct expand_vec_perm_d nd;
51528 if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd))
51529 {
51530 if (!d->testing_p)
51531 emit_move_insn (d->target, gen_lowpart (d->vmode, nd.target));
51532 return true;
51533 }
51534
51535 return false;
51536 }
51537
51538 /* If a permutation only uses one operand, make it clear. Returns true
51539 if the permutation references both operands. */
51540
51541 static bool
51542 canonicalize_perm (struct expand_vec_perm_d *d)
51543 {
51544 int i, which, nelt = d->nelt;
51545
51546 for (i = which = 0; i < nelt; ++i)
51547 which |= (d->perm[i] < nelt ? 1 : 2);
51548
51549 d->one_operand_p = true;
51550 switch (which)
51551 {
51552 default:
51553 gcc_unreachable();
51554
51555 case 3:
51556 if (!rtx_equal_p (d->op0, d->op1))
51557 {
51558 d->one_operand_p = false;
51559 break;
51560 }
51561 /* The elements of PERM do not suggest that only the first operand
51562 is used, but both operands are identical. Allow easier matching
51563 of the permutation by folding the permutation into the single
51564 input vector. */
51565 /* FALLTHRU */
51566
51567 case 2:
51568 for (i = 0; i < nelt; ++i)
51569 d->perm[i] &= nelt - 1;
51570 d->op0 = d->op1;
51571 break;
51572
51573 case 1:
51574 d->op1 = d->op0;
51575 break;
51576 }
51577
51578 return (which == 3);
51579 }
51580
51581 bool
51582 ix86_expand_vec_perm_const (rtx operands[4])
51583 {
51584 struct expand_vec_perm_d d;
51585 unsigned char perm[MAX_VECT_LEN];
51586 int i, nelt;
51587 bool two_args;
51588 rtx sel;
51589
51590 d.target = operands[0];
51591 d.op0 = operands[1];
51592 d.op1 = operands[2];
51593 sel = operands[3];
51594
51595 d.vmode = GET_MODE (d.target);
51596 gcc_assert (VECTOR_MODE_P (d.vmode));
51597 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
51598 d.testing_p = false;
51599
51600 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
51601 gcc_assert (XVECLEN (sel, 0) == nelt);
51602 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
51603
51604 for (i = 0; i < nelt; ++i)
51605 {
51606 rtx e = XVECEXP (sel, 0, i);
51607 int ei = INTVAL (e) & (2 * nelt - 1);
51608 d.perm[i] = ei;
51609 perm[i] = ei;
51610 }
51611
51612 two_args = canonicalize_perm (&d);
51613
51614 if (ix86_expand_vec_perm_const_1 (&d))
51615 return true;
51616
51617 /* If the selector says both arguments are needed, but the operands are the
51618 same, the above tried to expand with one_operand_p and flattened selector.
51619 If that didn't work, retry without one_operand_p; we succeeded with that
51620 during testing. */
51621 if (two_args && d.one_operand_p)
51622 {
51623 d.one_operand_p = false;
51624 memcpy (d.perm, perm, sizeof (perm));
51625 return ix86_expand_vec_perm_const_1 (&d);
51626 }
51627
51628 return false;
51629 }
51630
51631 /* Implement targetm.vectorize.vec_perm_const_ok. */
51632
51633 static bool
51634 ix86_vectorize_vec_perm_const_ok (machine_mode vmode,
51635 const unsigned char *sel)
51636 {
51637 struct expand_vec_perm_d d;
51638 unsigned int i, nelt, which;
51639 bool ret;
51640
51641 d.vmode = vmode;
51642 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
51643 d.testing_p = true;
51644
51645 /* Given sufficient ISA support we can just return true here
51646 for selected vector modes. */
51647 switch (d.vmode)
51648 {
51649 case V16SFmode:
51650 case V16SImode:
51651 case V8DImode:
51652 case V8DFmode:
51653 if (TARGET_AVX512F)
51654 /* All implementable with a single vpermi2 insn. */
51655 return true;
51656 break;
51657 case V32HImode:
51658 if (TARGET_AVX512BW)
51659 /* All implementable with a single vpermi2 insn. */
51660 return true;
51661 break;
51662 case V64QImode:
51663 if (TARGET_AVX512BW)
51664 /* Implementable with 2 vpermi2, 2 vpshufb and 1 or insn. */
51665 return true;
51666 break;
51667 case V8SImode:
51668 case V8SFmode:
51669 case V4DFmode:
51670 case V4DImode:
51671 if (TARGET_AVX512VL)
51672 /* All implementable with a single vpermi2 insn. */
51673 return true;
51674 break;
51675 case V16HImode:
51676 if (TARGET_AVX2)
51677 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
51678 return true;
51679 break;
51680 case V32QImode:
51681 if (TARGET_AVX2)
51682 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
51683 return true;
51684 break;
51685 case V4SImode:
51686 case V4SFmode:
51687 case V8HImode:
51688 case V16QImode:
51689 /* All implementable with a single vpperm insn. */
51690 if (TARGET_XOP)
51691 return true;
51692 /* All implementable with 2 pshufb + 1 ior. */
51693 if (TARGET_SSSE3)
51694 return true;
51695 break;
51696 case V2DImode:
51697 case V2DFmode:
51698 /* All implementable with shufpd or unpck[lh]pd. */
51699 return true;
51700 default:
51701 return false;
51702 }
51703
51704 /* Extract the values from the vector CST into the permutation
51705 array in D. */
51706 memcpy (d.perm, sel, nelt);
51707 for (i = which = 0; i < nelt; ++i)
51708 {
51709 unsigned char e = d.perm[i];
51710 gcc_assert (e < 2 * nelt);
51711 which |= (e < nelt ? 1 : 2);
51712 }
51713
51714 /* For all elements from second vector, fold the elements to first. */
51715 if (which == 2)
51716 for (i = 0; i < nelt; ++i)
51717 d.perm[i] -= nelt;
51718
51719 /* Check whether the mask can be applied to the vector type. */
51720 d.one_operand_p = (which != 3);
51721
51722 /* Implementable with shufps or pshufd. */
51723 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
51724 return true;
51725
51726 /* Otherwise we have to go through the motions and see if we can
51727 figure out how to generate the requested permutation. */
51728 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
51729 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
51730 if (!d.one_operand_p)
51731 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
51732
51733 start_sequence ();
51734 ret = ix86_expand_vec_perm_const_1 (&d);
51735 end_sequence ();
51736
51737 return ret;
51738 }
51739
51740 void
51741 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
51742 {
51743 struct expand_vec_perm_d d;
51744 unsigned i, nelt;
51745
51746 d.target = targ;
51747 d.op0 = op0;
51748 d.op1 = op1;
51749 d.vmode = GET_MODE (targ);
51750 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
51751 d.one_operand_p = false;
51752 d.testing_p = false;
51753
51754 for (i = 0; i < nelt; ++i)
51755 d.perm[i] = i * 2 + odd;
51756
51757 /* We'll either be able to implement the permutation directly... */
51758 if (expand_vec_perm_1 (&d))
51759 return;
51760
51761 /* ... or we use the special-case patterns. */
51762 expand_vec_perm_even_odd_1 (&d, odd);
51763 }
51764
51765 static void
51766 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
51767 {
51768 struct expand_vec_perm_d d;
51769 unsigned i, nelt, base;
51770 bool ok;
51771
51772 d.target = targ;
51773 d.op0 = op0;
51774 d.op1 = op1;
51775 d.vmode = GET_MODE (targ);
51776 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
51777 d.one_operand_p = false;
51778 d.testing_p = false;
51779
51780 base = high_p ? nelt / 2 : 0;
51781 for (i = 0; i < nelt / 2; ++i)
51782 {
51783 d.perm[i * 2] = i + base;
51784 d.perm[i * 2 + 1] = i + base + nelt;
51785 }
51786
51787 /* Note that for AVX this isn't one instruction. */
51788 ok = ix86_expand_vec_perm_const_1 (&d);
51789 gcc_assert (ok);
51790 }
51791
51792
51793 /* Expand a vector operation CODE for a V*QImode in terms of the
51794 same operation on V*HImode. */
51795
51796 void
51797 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
51798 {
51799 machine_mode qimode = GET_MODE (dest);
51800 machine_mode himode;
51801 rtx (*gen_il) (rtx, rtx, rtx);
51802 rtx (*gen_ih) (rtx, rtx, rtx);
51803 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
51804 struct expand_vec_perm_d d;
51805 bool ok, full_interleave;
51806 bool uns_p = false;
51807 int i;
51808
51809 switch (qimode)
51810 {
51811 case V16QImode:
51812 himode = V8HImode;
51813 gen_il = gen_vec_interleave_lowv16qi;
51814 gen_ih = gen_vec_interleave_highv16qi;
51815 break;
51816 case V32QImode:
51817 himode = V16HImode;
51818 gen_il = gen_avx2_interleave_lowv32qi;
51819 gen_ih = gen_avx2_interleave_highv32qi;
51820 break;
51821 case V64QImode:
51822 himode = V32HImode;
51823 gen_il = gen_avx512bw_interleave_lowv64qi;
51824 gen_ih = gen_avx512bw_interleave_highv64qi;
51825 break;
51826 default:
51827 gcc_unreachable ();
51828 }
51829
51830 op2_l = op2_h = op2;
51831 switch (code)
51832 {
51833 case MULT:
51834 /* Unpack data such that we've got a source byte in each low byte of
51835 each word. We don't care what goes into the high byte of each word.
51836 Rather than trying to get zero in there, most convenient is to let
51837 it be a copy of the low byte. */
51838 op2_l = gen_reg_rtx (qimode);
51839 op2_h = gen_reg_rtx (qimode);
51840 emit_insn (gen_il (op2_l, op2, op2));
51841 emit_insn (gen_ih (op2_h, op2, op2));
51842 /* FALLTHRU */
51843
51844 op1_l = gen_reg_rtx (qimode);
51845 op1_h = gen_reg_rtx (qimode);
51846 emit_insn (gen_il (op1_l, op1, op1));
51847 emit_insn (gen_ih (op1_h, op1, op1));
51848 full_interleave = qimode == V16QImode;
51849 break;
51850
51851 case ASHIFT:
51852 case LSHIFTRT:
51853 uns_p = true;
51854 /* FALLTHRU */
51855 case ASHIFTRT:
51856 op1_l = gen_reg_rtx (himode);
51857 op1_h = gen_reg_rtx (himode);
51858 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
51859 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
51860 full_interleave = true;
51861 break;
51862 default:
51863 gcc_unreachable ();
51864 }
51865
51866 /* Perform the operation. */
51867 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
51868 1, OPTAB_DIRECT);
51869 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
51870 1, OPTAB_DIRECT);
51871 gcc_assert (res_l && res_h);
51872
51873 /* Merge the data back into the right place. */
51874 d.target = dest;
51875 d.op0 = gen_lowpart (qimode, res_l);
51876 d.op1 = gen_lowpart (qimode, res_h);
51877 d.vmode = qimode;
51878 d.nelt = GET_MODE_NUNITS (qimode);
51879 d.one_operand_p = false;
51880 d.testing_p = false;
51881
51882 if (full_interleave)
51883 {
51884 /* For SSE2, we used an full interleave, so the desired
51885 results are in the even elements. */
51886 for (i = 0; i < 64; ++i)
51887 d.perm[i] = i * 2;
51888 }
51889 else
51890 {
51891 /* For AVX, the interleave used above was not cross-lane. So the
51892 extraction is evens but with the second and third quarter swapped.
51893 Happily, that is even one insn shorter than even extraction. */
51894 for (i = 0; i < 64; ++i)
51895 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
51896 }
51897
51898 ok = ix86_expand_vec_perm_const_1 (&d);
51899 gcc_assert (ok);
51900
51901 set_unique_reg_note (get_last_insn (), REG_EQUAL,
51902 gen_rtx_fmt_ee (code, qimode, op1, op2));
51903 }
51904
51905 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
51906 if op is CONST_VECTOR with all odd elements equal to their
51907 preceding element. */
51908
51909 static bool
51910 const_vector_equal_evenodd_p (rtx op)
51911 {
51912 machine_mode mode = GET_MODE (op);
51913 int i, nunits = GET_MODE_NUNITS (mode);
51914 if (GET_CODE (op) != CONST_VECTOR
51915 || nunits != CONST_VECTOR_NUNITS (op))
51916 return false;
51917 for (i = 0; i < nunits; i += 2)
51918 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
51919 return false;
51920 return true;
51921 }
51922
51923 void
51924 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
51925 bool uns_p, bool odd_p)
51926 {
51927 machine_mode mode = GET_MODE (op1);
51928 machine_mode wmode = GET_MODE (dest);
51929 rtx x;
51930 rtx orig_op1 = op1, orig_op2 = op2;
51931
51932 if (!nonimmediate_operand (op1, mode))
51933 op1 = force_reg (mode, op1);
51934 if (!nonimmediate_operand (op2, mode))
51935 op2 = force_reg (mode, op2);
51936
51937 /* We only play even/odd games with vectors of SImode. */
51938 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
51939
51940 /* If we're looking for the odd results, shift those members down to
51941 the even slots. For some cpus this is faster than a PSHUFD. */
51942 if (odd_p)
51943 {
51944 /* For XOP use vpmacsdqh, but only for smult, as it is only
51945 signed. */
51946 if (TARGET_XOP && mode == V4SImode && !uns_p)
51947 {
51948 x = force_reg (wmode, CONST0_RTX (wmode));
51949 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
51950 return;
51951 }
51952
51953 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
51954 if (!const_vector_equal_evenodd_p (orig_op1))
51955 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
51956 x, NULL, 1, OPTAB_DIRECT);
51957 if (!const_vector_equal_evenodd_p (orig_op2))
51958 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
51959 x, NULL, 1, OPTAB_DIRECT);
51960 op1 = gen_lowpart (mode, op1);
51961 op2 = gen_lowpart (mode, op2);
51962 }
51963
51964 if (mode == V16SImode)
51965 {
51966 if (uns_p)
51967 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
51968 else
51969 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
51970 }
51971 else if (mode == V8SImode)
51972 {
51973 if (uns_p)
51974 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
51975 else
51976 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
51977 }
51978 else if (uns_p)
51979 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
51980 else if (TARGET_SSE4_1)
51981 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
51982 else
51983 {
51984 rtx s1, s2, t0, t1, t2;
51985
51986 /* The easiest way to implement this without PMULDQ is to go through
51987 the motions as if we are performing a full 64-bit multiply. With
51988 the exception that we need to do less shuffling of the elements. */
51989
51990 /* Compute the sign-extension, aka highparts, of the two operands. */
51991 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
51992 op1, pc_rtx, pc_rtx);
51993 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
51994 op2, pc_rtx, pc_rtx);
51995
51996 /* Multiply LO(A) * HI(B), and vice-versa. */
51997 t1 = gen_reg_rtx (wmode);
51998 t2 = gen_reg_rtx (wmode);
51999 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
52000 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
52001
52002 /* Multiply LO(A) * LO(B). */
52003 t0 = gen_reg_rtx (wmode);
52004 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
52005
52006 /* Combine and shift the highparts into place. */
52007 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
52008 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
52009 1, OPTAB_DIRECT);
52010
52011 /* Combine high and low parts. */
52012 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
52013 return;
52014 }
52015 emit_insn (x);
52016 }
52017
52018 void
52019 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
52020 bool uns_p, bool high_p)
52021 {
52022 machine_mode wmode = GET_MODE (dest);
52023 machine_mode mode = GET_MODE (op1);
52024 rtx t1, t2, t3, t4, mask;
52025
52026 switch (mode)
52027 {
52028 case V4SImode:
52029 t1 = gen_reg_rtx (mode);
52030 t2 = gen_reg_rtx (mode);
52031 if (TARGET_XOP && !uns_p)
52032 {
52033 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
52034 shuffle the elements once so that all elements are in the right
52035 place for immediate use: { A C B D }. */
52036 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
52037 const1_rtx, GEN_INT (3)));
52038 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
52039 const1_rtx, GEN_INT (3)));
52040 }
52041 else
52042 {
52043 /* Put the elements into place for the multiply. */
52044 ix86_expand_vec_interleave (t1, op1, op1, high_p);
52045 ix86_expand_vec_interleave (t2, op2, op2, high_p);
52046 high_p = false;
52047 }
52048 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
52049 break;
52050
52051 case V8SImode:
52052 /* Shuffle the elements between the lanes. After this we
52053 have { A B E F | C D G H } for each operand. */
52054 t1 = gen_reg_rtx (V4DImode);
52055 t2 = gen_reg_rtx (V4DImode);
52056 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
52057 const0_rtx, const2_rtx,
52058 const1_rtx, GEN_INT (3)));
52059 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
52060 const0_rtx, const2_rtx,
52061 const1_rtx, GEN_INT (3)));
52062
52063 /* Shuffle the elements within the lanes. After this we
52064 have { A A B B | C C D D } or { E E F F | G G H H }. */
52065 t3 = gen_reg_rtx (V8SImode);
52066 t4 = gen_reg_rtx (V8SImode);
52067 mask = GEN_INT (high_p
52068 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
52069 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
52070 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
52071 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
52072
52073 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
52074 break;
52075
52076 case V8HImode:
52077 case V16HImode:
52078 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
52079 uns_p, OPTAB_DIRECT);
52080 t2 = expand_binop (mode,
52081 uns_p ? umul_highpart_optab : smul_highpart_optab,
52082 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
52083 gcc_assert (t1 && t2);
52084
52085 t3 = gen_reg_rtx (mode);
52086 ix86_expand_vec_interleave (t3, t1, t2, high_p);
52087 emit_move_insn (dest, gen_lowpart (wmode, t3));
52088 break;
52089
52090 case V16QImode:
52091 case V32QImode:
52092 case V32HImode:
52093 case V16SImode:
52094 case V64QImode:
52095 t1 = gen_reg_rtx (wmode);
52096 t2 = gen_reg_rtx (wmode);
52097 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
52098 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
52099
52100 emit_insn (gen_rtx_SET (dest, gen_rtx_MULT (wmode, t1, t2)));
52101 break;
52102
52103 default:
52104 gcc_unreachable ();
52105 }
52106 }
52107
52108 void
52109 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
52110 {
52111 rtx res_1, res_2, res_3, res_4;
52112
52113 res_1 = gen_reg_rtx (V4SImode);
52114 res_2 = gen_reg_rtx (V4SImode);
52115 res_3 = gen_reg_rtx (V2DImode);
52116 res_4 = gen_reg_rtx (V2DImode);
52117 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
52118 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
52119
52120 /* Move the results in element 2 down to element 1; we don't care
52121 what goes in elements 2 and 3. Then we can merge the parts
52122 back together with an interleave.
52123
52124 Note that two other sequences were tried:
52125 (1) Use interleaves at the start instead of psrldq, which allows
52126 us to use a single shufps to merge things back at the end.
52127 (2) Use shufps here to combine the two vectors, then pshufd to
52128 put the elements in the correct order.
52129 In both cases the cost of the reformatting stall was too high
52130 and the overall sequence slower. */
52131
52132 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
52133 const0_rtx, const2_rtx,
52134 const0_rtx, const0_rtx));
52135 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
52136 const0_rtx, const2_rtx,
52137 const0_rtx, const0_rtx));
52138 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
52139
52140 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
52141 }
52142
52143 void
52144 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
52145 {
52146 machine_mode mode = GET_MODE (op0);
52147 rtx t1, t2, t3, t4, t5, t6;
52148
52149 if (TARGET_AVX512DQ && mode == V8DImode)
52150 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
52151 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
52152 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
52153 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
52154 emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
52155 else if (TARGET_XOP && mode == V2DImode)
52156 {
52157 /* op1: A,B,C,D, op2: E,F,G,H */
52158 op1 = gen_lowpart (V4SImode, op1);
52159 op2 = gen_lowpart (V4SImode, op2);
52160
52161 t1 = gen_reg_rtx (V4SImode);
52162 t2 = gen_reg_rtx (V4SImode);
52163 t3 = gen_reg_rtx (V2DImode);
52164 t4 = gen_reg_rtx (V2DImode);
52165
52166 /* t1: B,A,D,C */
52167 emit_insn (gen_sse2_pshufd_1 (t1, op1,
52168 GEN_INT (1),
52169 GEN_INT (0),
52170 GEN_INT (3),
52171 GEN_INT (2)));
52172
52173 /* t2: (B*E),(A*F),(D*G),(C*H) */
52174 emit_insn (gen_mulv4si3 (t2, t1, op2));
52175
52176 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
52177 emit_insn (gen_xop_phadddq (t3, t2));
52178
52179 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
52180 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
52181
52182 /* Multiply lower parts and add all */
52183 t5 = gen_reg_rtx (V2DImode);
52184 emit_insn (gen_vec_widen_umult_even_v4si (t5,
52185 gen_lowpart (V4SImode, op1),
52186 gen_lowpart (V4SImode, op2)));
52187 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
52188
52189 }
52190 else
52191 {
52192 machine_mode nmode;
52193 rtx (*umul) (rtx, rtx, rtx);
52194
52195 if (mode == V2DImode)
52196 {
52197 umul = gen_vec_widen_umult_even_v4si;
52198 nmode = V4SImode;
52199 }
52200 else if (mode == V4DImode)
52201 {
52202 umul = gen_vec_widen_umult_even_v8si;
52203 nmode = V8SImode;
52204 }
52205 else if (mode == V8DImode)
52206 {
52207 umul = gen_vec_widen_umult_even_v16si;
52208 nmode = V16SImode;
52209 }
52210 else
52211 gcc_unreachable ();
52212
52213
52214 /* Multiply low parts. */
52215 t1 = gen_reg_rtx (mode);
52216 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
52217
52218 /* Shift input vectors right 32 bits so we can multiply high parts. */
52219 t6 = GEN_INT (32);
52220 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
52221 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
52222
52223 /* Multiply high parts by low parts. */
52224 t4 = gen_reg_rtx (mode);
52225 t5 = gen_reg_rtx (mode);
52226 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
52227 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
52228
52229 /* Combine and shift the highparts back. */
52230 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
52231 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
52232
52233 /* Combine high and low parts. */
52234 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
52235 }
52236
52237 set_unique_reg_note (get_last_insn (), REG_EQUAL,
52238 gen_rtx_MULT (mode, op1, op2));
52239 }
52240
52241 /* Return 1 if control tansfer instruction INSN
52242 should be encoded with bnd prefix.
52243 If insn is NULL then return 1 when control
52244 transfer instructions should be prefixed with
52245 bnd by default for current function. */
52246
52247 bool
52248 ix86_bnd_prefixed_insn_p (rtx insn)
52249 {
52250 /* For call insns check special flag. */
52251 if (insn && CALL_P (insn))
52252 {
52253 rtx call = get_call_rtx_from (insn);
52254 if (call)
52255 return CALL_EXPR_WITH_BOUNDS_P (call);
52256 }
52257
52258 /* All other insns are prefixed only if function is instrumented. */
52259 return chkp_function_instrumented_p (current_function_decl);
52260 }
52261
52262 /* Calculate integer abs() using only SSE2 instructions. */
52263
52264 void
52265 ix86_expand_sse2_abs (rtx target, rtx input)
52266 {
52267 machine_mode mode = GET_MODE (target);
52268 rtx tmp0, tmp1, x;
52269
52270 switch (mode)
52271 {
52272 /* For 32-bit signed integer X, the best way to calculate the absolute
52273 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
52274 case V4SImode:
52275 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
52276 GEN_INT (GET_MODE_UNIT_BITSIZE (mode) - 1),
52277 NULL, 0, OPTAB_DIRECT);
52278 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
52279 NULL, 0, OPTAB_DIRECT);
52280 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
52281 target, 0, OPTAB_DIRECT);
52282 break;
52283
52284 /* For 16-bit signed integer X, the best way to calculate the absolute
52285 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
52286 case V8HImode:
52287 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
52288
52289 x = expand_simple_binop (mode, SMAX, tmp0, input,
52290 target, 0, OPTAB_DIRECT);
52291 break;
52292
52293 /* For 8-bit signed integer X, the best way to calculate the absolute
52294 value of X is min ((unsigned char) X, (unsigned char) (-X)),
52295 as SSE2 provides the PMINUB insn. */
52296 case V16QImode:
52297 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
52298
52299 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
52300 target, 0, OPTAB_DIRECT);
52301 break;
52302
52303 default:
52304 gcc_unreachable ();
52305 }
52306
52307 if (x != target)
52308 emit_move_insn (target, x);
52309 }
52310
52311 /* Expand an extract from a vector register through pextr insn.
52312 Return true if successful. */
52313
52314 bool
52315 ix86_expand_pextr (rtx *operands)
52316 {
52317 rtx dst = operands[0];
52318 rtx src = operands[1];
52319
52320 unsigned int size = INTVAL (operands[2]);
52321 unsigned int pos = INTVAL (operands[3]);
52322
52323 if (SUBREG_P (dst))
52324 {
52325 /* Reject non-lowpart subregs. */
52326 if (SUBREG_BYTE (dst) > 0)
52327 return false;
52328 dst = SUBREG_REG (dst);
52329 }
52330
52331 if (SUBREG_P (src))
52332 {
52333 pos += SUBREG_BYTE (src) * BITS_PER_UNIT;
52334 src = SUBREG_REG (src);
52335 }
52336
52337 switch (GET_MODE (src))
52338 {
52339 case V16QImode:
52340 case V8HImode:
52341 case V4SImode:
52342 case V2DImode:
52343 case V1TImode:
52344 case TImode:
52345 {
52346 machine_mode srcmode, dstmode;
52347 rtx d, pat;
52348
52349 dstmode = mode_for_size (size, MODE_INT, 0);
52350
52351 switch (dstmode)
52352 {
52353 case QImode:
52354 if (!TARGET_SSE4_1)
52355 return false;
52356 srcmode = V16QImode;
52357 break;
52358
52359 case HImode:
52360 if (!TARGET_SSE2)
52361 return false;
52362 srcmode = V8HImode;
52363 break;
52364
52365 case SImode:
52366 if (!TARGET_SSE4_1)
52367 return false;
52368 srcmode = V4SImode;
52369 break;
52370
52371 case DImode:
52372 gcc_assert (TARGET_64BIT);
52373 if (!TARGET_SSE4_1)
52374 return false;
52375 srcmode = V2DImode;
52376 break;
52377
52378 default:
52379 return false;
52380 }
52381
52382 /* Reject extractions from misaligned positions. */
52383 if (pos & (size-1))
52384 return false;
52385
52386 if (GET_MODE (dst) == dstmode)
52387 d = dst;
52388 else
52389 d = gen_reg_rtx (dstmode);
52390
52391 /* Construct insn pattern. */
52392 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (pos / size)));
52393 pat = gen_rtx_VEC_SELECT (dstmode, gen_lowpart (srcmode, src), pat);
52394
52395 /* Let the rtl optimizers know about the zero extension performed. */
52396 if (dstmode == QImode || dstmode == HImode)
52397 {
52398 pat = gen_rtx_ZERO_EXTEND (SImode, pat);
52399 d = gen_lowpart (SImode, d);
52400 }
52401
52402 emit_insn (gen_rtx_SET (d, pat));
52403
52404 if (d != dst)
52405 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
52406 return true;
52407 }
52408
52409 default:
52410 return false;
52411 }
52412 }
52413
52414 /* Expand an insert into a vector register through pinsr insn.
52415 Return true if successful. */
52416
52417 bool
52418 ix86_expand_pinsr (rtx *operands)
52419 {
52420 rtx dst = operands[0];
52421 rtx src = operands[3];
52422
52423 unsigned int size = INTVAL (operands[1]);
52424 unsigned int pos = INTVAL (operands[2]);
52425
52426 if (SUBREG_P (dst))
52427 {
52428 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
52429 dst = SUBREG_REG (dst);
52430 }
52431
52432 switch (GET_MODE (dst))
52433 {
52434 case V16QImode:
52435 case V8HImode:
52436 case V4SImode:
52437 case V2DImode:
52438 case V1TImode:
52439 case TImode:
52440 {
52441 machine_mode srcmode, dstmode;
52442 rtx (*pinsr)(rtx, rtx, rtx, rtx);
52443 rtx d;
52444
52445 srcmode = mode_for_size (size, MODE_INT, 0);
52446
52447 switch (srcmode)
52448 {
52449 case QImode:
52450 if (!TARGET_SSE4_1)
52451 return false;
52452 dstmode = V16QImode;
52453 pinsr = gen_sse4_1_pinsrb;
52454 break;
52455
52456 case HImode:
52457 if (!TARGET_SSE2)
52458 return false;
52459 dstmode = V8HImode;
52460 pinsr = gen_sse2_pinsrw;
52461 break;
52462
52463 case SImode:
52464 if (!TARGET_SSE4_1)
52465 return false;
52466 dstmode = V4SImode;
52467 pinsr = gen_sse4_1_pinsrd;
52468 break;
52469
52470 case DImode:
52471 gcc_assert (TARGET_64BIT);
52472 if (!TARGET_SSE4_1)
52473 return false;
52474 dstmode = V2DImode;
52475 pinsr = gen_sse4_1_pinsrq;
52476 break;
52477
52478 default:
52479 return false;
52480 }
52481
52482 /* Reject insertions to misaligned positions. */
52483 if (pos & (size-1))
52484 return false;
52485
52486 if (SUBREG_P (src))
52487 {
52488 unsigned int srcpos = SUBREG_BYTE (src);
52489
52490 if (srcpos > 0)
52491 {
52492 rtx extr_ops[4];
52493
52494 extr_ops[0] = gen_reg_rtx (srcmode);
52495 extr_ops[1] = gen_lowpart (srcmode, SUBREG_REG (src));
52496 extr_ops[2] = GEN_INT (size);
52497 extr_ops[3] = GEN_INT (srcpos * BITS_PER_UNIT);
52498
52499 if (!ix86_expand_pextr (extr_ops))
52500 return false;
52501
52502 src = extr_ops[0];
52503 }
52504 else
52505 src = gen_lowpart (srcmode, SUBREG_REG (src));
52506 }
52507
52508 if (GET_MODE (dst) == dstmode)
52509 d = dst;
52510 else
52511 d = gen_reg_rtx (dstmode);
52512
52513 emit_insn (pinsr (d, gen_lowpart (dstmode, dst),
52514 gen_lowpart (srcmode, src),
52515 GEN_INT (1 << (pos / size))));
52516 if (d != dst)
52517 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
52518 return true;
52519 }
52520
52521 default:
52522 return false;
52523 }
52524 }
52525 \f
52526 /* This function returns the calling abi specific va_list type node.
52527 It returns the FNDECL specific va_list type. */
52528
52529 static tree
52530 ix86_fn_abi_va_list (tree fndecl)
52531 {
52532 if (!TARGET_64BIT)
52533 return va_list_type_node;
52534 gcc_assert (fndecl != NULL_TREE);
52535
52536 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
52537 return ms_va_list_type_node;
52538 else
52539 return sysv_va_list_type_node;
52540 }
52541
52542 /* Returns the canonical va_list type specified by TYPE. If there
52543 is no valid TYPE provided, it return NULL_TREE. */
52544
52545 static tree
52546 ix86_canonical_va_list_type (tree type)
52547 {
52548 tree wtype, htype;
52549
52550 /* Resolve references and pointers to va_list type. */
52551 if (TREE_CODE (type) == MEM_REF)
52552 type = TREE_TYPE (type);
52553 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
52554 type = TREE_TYPE (type);
52555 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
52556 type = TREE_TYPE (type);
52557
52558 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
52559 {
52560 wtype = va_list_type_node;
52561 gcc_assert (wtype != NULL_TREE);
52562 htype = type;
52563 if (TREE_CODE (wtype) == ARRAY_TYPE)
52564 {
52565 /* If va_list is an array type, the argument may have decayed
52566 to a pointer type, e.g. by being passed to another function.
52567 In that case, unwrap both types so that we can compare the
52568 underlying records. */
52569 if (TREE_CODE (htype) == ARRAY_TYPE
52570 || POINTER_TYPE_P (htype))
52571 {
52572 wtype = TREE_TYPE (wtype);
52573 htype = TREE_TYPE (htype);
52574 }
52575 }
52576 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
52577 return va_list_type_node;
52578 wtype = sysv_va_list_type_node;
52579 gcc_assert (wtype != NULL_TREE);
52580 htype = type;
52581 if (TREE_CODE (wtype) == ARRAY_TYPE)
52582 {
52583 /* If va_list is an array type, the argument may have decayed
52584 to a pointer type, e.g. by being passed to another function.
52585 In that case, unwrap both types so that we can compare the
52586 underlying records. */
52587 if (TREE_CODE (htype) == ARRAY_TYPE
52588 || POINTER_TYPE_P (htype))
52589 {
52590 wtype = TREE_TYPE (wtype);
52591 htype = TREE_TYPE (htype);
52592 }
52593 }
52594 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
52595 return sysv_va_list_type_node;
52596 wtype = ms_va_list_type_node;
52597 gcc_assert (wtype != NULL_TREE);
52598 htype = type;
52599 if (TREE_CODE (wtype) == ARRAY_TYPE)
52600 {
52601 /* If va_list is an array type, the argument may have decayed
52602 to a pointer type, e.g. by being passed to another function.
52603 In that case, unwrap both types so that we can compare the
52604 underlying records. */
52605 if (TREE_CODE (htype) == ARRAY_TYPE
52606 || POINTER_TYPE_P (htype))
52607 {
52608 wtype = TREE_TYPE (wtype);
52609 htype = TREE_TYPE (htype);
52610 }
52611 }
52612 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
52613 return ms_va_list_type_node;
52614 return NULL_TREE;
52615 }
52616 return std_canonical_va_list_type (type);
52617 }
52618
52619 /* Iterate through the target-specific builtin types for va_list.
52620 IDX denotes the iterator, *PTREE is set to the result type of
52621 the va_list builtin, and *PNAME to its internal type.
52622 Returns zero if there is no element for this index, otherwise
52623 IDX should be increased upon the next call.
52624 Note, do not iterate a base builtin's name like __builtin_va_list.
52625 Used from c_common_nodes_and_builtins. */
52626
52627 static int
52628 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
52629 {
52630 if (TARGET_64BIT)
52631 {
52632 switch (idx)
52633 {
52634 default:
52635 break;
52636
52637 case 0:
52638 *ptree = ms_va_list_type_node;
52639 *pname = "__builtin_ms_va_list";
52640 return 1;
52641
52642 case 1:
52643 *ptree = sysv_va_list_type_node;
52644 *pname = "__builtin_sysv_va_list";
52645 return 1;
52646 }
52647 }
52648
52649 return 0;
52650 }
52651
52652 #undef TARGET_SCHED_DISPATCH
52653 #define TARGET_SCHED_DISPATCH has_dispatch
52654 #undef TARGET_SCHED_DISPATCH_DO
52655 #define TARGET_SCHED_DISPATCH_DO do_dispatch
52656 #undef TARGET_SCHED_REASSOCIATION_WIDTH
52657 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
52658 #undef TARGET_SCHED_REORDER
52659 #define TARGET_SCHED_REORDER ix86_sched_reorder
52660 #undef TARGET_SCHED_ADJUST_PRIORITY
52661 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
52662 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
52663 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
52664 ix86_dependencies_evaluation_hook
52665
52666 /* The size of the dispatch window is the total number of bytes of
52667 object code allowed in a window. */
52668 #define DISPATCH_WINDOW_SIZE 16
52669
52670 /* Number of dispatch windows considered for scheduling. */
52671 #define MAX_DISPATCH_WINDOWS 3
52672
52673 /* Maximum number of instructions in a window. */
52674 #define MAX_INSN 4
52675
52676 /* Maximum number of immediate operands in a window. */
52677 #define MAX_IMM 4
52678
52679 /* Maximum number of immediate bits allowed in a window. */
52680 #define MAX_IMM_SIZE 128
52681
52682 /* Maximum number of 32 bit immediates allowed in a window. */
52683 #define MAX_IMM_32 4
52684
52685 /* Maximum number of 64 bit immediates allowed in a window. */
52686 #define MAX_IMM_64 2
52687
52688 /* Maximum total of loads or prefetches allowed in a window. */
52689 #define MAX_LOAD 2
52690
52691 /* Maximum total of stores allowed in a window. */
52692 #define MAX_STORE 1
52693
52694 #undef BIG
52695 #define BIG 100
52696
52697
52698 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
52699 enum dispatch_group {
52700 disp_no_group = 0,
52701 disp_load,
52702 disp_store,
52703 disp_load_store,
52704 disp_prefetch,
52705 disp_imm,
52706 disp_imm_32,
52707 disp_imm_64,
52708 disp_branch,
52709 disp_cmp,
52710 disp_jcc,
52711 disp_last
52712 };
52713
52714 /* Number of allowable groups in a dispatch window. It is an array
52715 indexed by dispatch_group enum. 100 is used as a big number,
52716 because the number of these kind of operations does not have any
52717 effect in dispatch window, but we need them for other reasons in
52718 the table. */
52719 static unsigned int num_allowable_groups[disp_last] = {
52720 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
52721 };
52722
52723 char group_name[disp_last + 1][16] = {
52724 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
52725 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
52726 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
52727 };
52728
52729 /* Instruction path. */
52730 enum insn_path {
52731 no_path = 0,
52732 path_single, /* Single micro op. */
52733 path_double, /* Double micro op. */
52734 path_multi, /* Instructions with more than 2 micro op.. */
52735 last_path
52736 };
52737
52738 /* sched_insn_info defines a window to the instructions scheduled in
52739 the basic block. It contains a pointer to the insn_info table and
52740 the instruction scheduled.
52741
52742 Windows are allocated for each basic block and are linked
52743 together. */
52744 typedef struct sched_insn_info_s {
52745 rtx insn;
52746 enum dispatch_group group;
52747 enum insn_path path;
52748 int byte_len;
52749 int imm_bytes;
52750 } sched_insn_info;
52751
52752 /* Linked list of dispatch windows. This is a two way list of
52753 dispatch windows of a basic block. It contains information about
52754 the number of uops in the window and the total number of
52755 instructions and of bytes in the object code for this dispatch
52756 window. */
52757 typedef struct dispatch_windows_s {
52758 int num_insn; /* Number of insn in the window. */
52759 int num_uops; /* Number of uops in the window. */
52760 int window_size; /* Number of bytes in the window. */
52761 int window_num; /* Window number between 0 or 1. */
52762 int num_imm; /* Number of immediates in an insn. */
52763 int num_imm_32; /* Number of 32 bit immediates in an insn. */
52764 int num_imm_64; /* Number of 64 bit immediates in an insn. */
52765 int imm_size; /* Total immediates in the window. */
52766 int num_loads; /* Total memory loads in the window. */
52767 int num_stores; /* Total memory stores in the window. */
52768 int violation; /* Violation exists in window. */
52769 sched_insn_info *window; /* Pointer to the window. */
52770 struct dispatch_windows_s *next;
52771 struct dispatch_windows_s *prev;
52772 } dispatch_windows;
52773
52774 /* Immediate valuse used in an insn. */
52775 typedef struct imm_info_s
52776 {
52777 int imm;
52778 int imm32;
52779 int imm64;
52780 } imm_info;
52781
52782 static dispatch_windows *dispatch_window_list;
52783 static dispatch_windows *dispatch_window_list1;
52784
52785 /* Get dispatch group of insn. */
52786
52787 static enum dispatch_group
52788 get_mem_group (rtx_insn *insn)
52789 {
52790 enum attr_memory memory;
52791
52792 if (INSN_CODE (insn) < 0)
52793 return disp_no_group;
52794 memory = get_attr_memory (insn);
52795 if (memory == MEMORY_STORE)
52796 return disp_store;
52797
52798 if (memory == MEMORY_LOAD)
52799 return disp_load;
52800
52801 if (memory == MEMORY_BOTH)
52802 return disp_load_store;
52803
52804 return disp_no_group;
52805 }
52806
52807 /* Return true if insn is a compare instruction. */
52808
52809 static bool
52810 is_cmp (rtx_insn *insn)
52811 {
52812 enum attr_type type;
52813
52814 type = get_attr_type (insn);
52815 return (type == TYPE_TEST
52816 || type == TYPE_ICMP
52817 || type == TYPE_FCMP
52818 || GET_CODE (PATTERN (insn)) == COMPARE);
52819 }
52820
52821 /* Return true if a dispatch violation encountered. */
52822
52823 static bool
52824 dispatch_violation (void)
52825 {
52826 if (dispatch_window_list->next)
52827 return dispatch_window_list->next->violation;
52828 return dispatch_window_list->violation;
52829 }
52830
52831 /* Return true if insn is a branch instruction. */
52832
52833 static bool
52834 is_branch (rtx_insn *insn)
52835 {
52836 return (CALL_P (insn) || JUMP_P (insn));
52837 }
52838
52839 /* Return true if insn is a prefetch instruction. */
52840
52841 static bool
52842 is_prefetch (rtx_insn *insn)
52843 {
52844 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
52845 }
52846
52847 /* This function initializes a dispatch window and the list container holding a
52848 pointer to the window. */
52849
52850 static void
52851 init_window (int window_num)
52852 {
52853 int i;
52854 dispatch_windows *new_list;
52855
52856 if (window_num == 0)
52857 new_list = dispatch_window_list;
52858 else
52859 new_list = dispatch_window_list1;
52860
52861 new_list->num_insn = 0;
52862 new_list->num_uops = 0;
52863 new_list->window_size = 0;
52864 new_list->next = NULL;
52865 new_list->prev = NULL;
52866 new_list->window_num = window_num;
52867 new_list->num_imm = 0;
52868 new_list->num_imm_32 = 0;
52869 new_list->num_imm_64 = 0;
52870 new_list->imm_size = 0;
52871 new_list->num_loads = 0;
52872 new_list->num_stores = 0;
52873 new_list->violation = false;
52874
52875 for (i = 0; i < MAX_INSN; i++)
52876 {
52877 new_list->window[i].insn = NULL;
52878 new_list->window[i].group = disp_no_group;
52879 new_list->window[i].path = no_path;
52880 new_list->window[i].byte_len = 0;
52881 new_list->window[i].imm_bytes = 0;
52882 }
52883 return;
52884 }
52885
52886 /* This function allocates and initializes a dispatch window and the
52887 list container holding a pointer to the window. */
52888
52889 static dispatch_windows *
52890 allocate_window (void)
52891 {
52892 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
52893 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
52894
52895 return new_list;
52896 }
52897
52898 /* This routine initializes the dispatch scheduling information. It
52899 initiates building dispatch scheduler tables and constructs the
52900 first dispatch window. */
52901
52902 static void
52903 init_dispatch_sched (void)
52904 {
52905 /* Allocate a dispatch list and a window. */
52906 dispatch_window_list = allocate_window ();
52907 dispatch_window_list1 = allocate_window ();
52908 init_window (0);
52909 init_window (1);
52910 }
52911
52912 /* This function returns true if a branch is detected. End of a basic block
52913 does not have to be a branch, but here we assume only branches end a
52914 window. */
52915
52916 static bool
52917 is_end_basic_block (enum dispatch_group group)
52918 {
52919 return group == disp_branch;
52920 }
52921
52922 /* This function is called when the end of a window processing is reached. */
52923
52924 static void
52925 process_end_window (void)
52926 {
52927 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
52928 if (dispatch_window_list->next)
52929 {
52930 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
52931 gcc_assert (dispatch_window_list->window_size
52932 + dispatch_window_list1->window_size <= 48);
52933 init_window (1);
52934 }
52935 init_window (0);
52936 }
52937
52938 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
52939 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
52940 for 48 bytes of instructions. Note that these windows are not dispatch
52941 windows that their sizes are DISPATCH_WINDOW_SIZE. */
52942
52943 static dispatch_windows *
52944 allocate_next_window (int window_num)
52945 {
52946 if (window_num == 0)
52947 {
52948 if (dispatch_window_list->next)
52949 init_window (1);
52950 init_window (0);
52951 return dispatch_window_list;
52952 }
52953
52954 dispatch_window_list->next = dispatch_window_list1;
52955 dispatch_window_list1->prev = dispatch_window_list;
52956
52957 return dispatch_window_list1;
52958 }
52959
52960 /* Compute number of immediate operands of an instruction. */
52961
52962 static void
52963 find_constant (rtx in_rtx, imm_info *imm_values)
52964 {
52965 if (INSN_P (in_rtx))
52966 in_rtx = PATTERN (in_rtx);
52967 subrtx_iterator::array_type array;
52968 FOR_EACH_SUBRTX (iter, array, in_rtx, ALL)
52969 if (const_rtx x = *iter)
52970 switch (GET_CODE (x))
52971 {
52972 case CONST:
52973 case SYMBOL_REF:
52974 case CONST_INT:
52975 (imm_values->imm)++;
52976 if (x86_64_immediate_operand (CONST_CAST_RTX (x), SImode))
52977 (imm_values->imm32)++;
52978 else
52979 (imm_values->imm64)++;
52980 break;
52981
52982 case CONST_DOUBLE:
52983 case CONST_WIDE_INT:
52984 (imm_values->imm)++;
52985 (imm_values->imm64)++;
52986 break;
52987
52988 case CODE_LABEL:
52989 if (LABEL_KIND (x) == LABEL_NORMAL)
52990 {
52991 (imm_values->imm)++;
52992 (imm_values->imm32)++;
52993 }
52994 break;
52995
52996 default:
52997 break;
52998 }
52999 }
53000
53001 /* Return total size of immediate operands of an instruction along with number
53002 of corresponding immediate-operands. It initializes its parameters to zero
53003 befor calling FIND_CONSTANT.
53004 INSN is the input instruction. IMM is the total of immediates.
53005 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
53006 bit immediates. */
53007
53008 static int
53009 get_num_immediates (rtx_insn *insn, int *imm, int *imm32, int *imm64)
53010 {
53011 imm_info imm_values = {0, 0, 0};
53012
53013 find_constant (insn, &imm_values);
53014 *imm = imm_values.imm;
53015 *imm32 = imm_values.imm32;
53016 *imm64 = imm_values.imm64;
53017 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
53018 }
53019
53020 /* This function indicates if an operand of an instruction is an
53021 immediate. */
53022
53023 static bool
53024 has_immediate (rtx_insn *insn)
53025 {
53026 int num_imm_operand;
53027 int num_imm32_operand;
53028 int num_imm64_operand;
53029
53030 if (insn)
53031 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
53032 &num_imm64_operand);
53033 return false;
53034 }
53035
53036 /* Return single or double path for instructions. */
53037
53038 static enum insn_path
53039 get_insn_path (rtx_insn *insn)
53040 {
53041 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
53042
53043 if ((int)path == 0)
53044 return path_single;
53045
53046 if ((int)path == 1)
53047 return path_double;
53048
53049 return path_multi;
53050 }
53051
53052 /* Return insn dispatch group. */
53053
53054 static enum dispatch_group
53055 get_insn_group (rtx_insn *insn)
53056 {
53057 enum dispatch_group group = get_mem_group (insn);
53058 if (group)
53059 return group;
53060
53061 if (is_branch (insn))
53062 return disp_branch;
53063
53064 if (is_cmp (insn))
53065 return disp_cmp;
53066
53067 if (has_immediate (insn))
53068 return disp_imm;
53069
53070 if (is_prefetch (insn))
53071 return disp_prefetch;
53072
53073 return disp_no_group;
53074 }
53075
53076 /* Count number of GROUP restricted instructions in a dispatch
53077 window WINDOW_LIST. */
53078
53079 static int
53080 count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
53081 {
53082 enum dispatch_group group = get_insn_group (insn);
53083 int imm_size;
53084 int num_imm_operand;
53085 int num_imm32_operand;
53086 int num_imm64_operand;
53087
53088 if (group == disp_no_group)
53089 return 0;
53090
53091 if (group == disp_imm)
53092 {
53093 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
53094 &num_imm64_operand);
53095 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
53096 || num_imm_operand + window_list->num_imm > MAX_IMM
53097 || (num_imm32_operand > 0
53098 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
53099 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
53100 || (num_imm64_operand > 0
53101 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
53102 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
53103 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
53104 && num_imm64_operand > 0
53105 && ((window_list->num_imm_64 > 0
53106 && window_list->num_insn >= 2)
53107 || window_list->num_insn >= 3)))
53108 return BIG;
53109
53110 return 1;
53111 }
53112
53113 if ((group == disp_load_store
53114 && (window_list->num_loads >= MAX_LOAD
53115 || window_list->num_stores >= MAX_STORE))
53116 || ((group == disp_load
53117 || group == disp_prefetch)
53118 && window_list->num_loads >= MAX_LOAD)
53119 || (group == disp_store
53120 && window_list->num_stores >= MAX_STORE))
53121 return BIG;
53122
53123 return 1;
53124 }
53125
53126 /* This function returns true if insn satisfies dispatch rules on the
53127 last window scheduled. */
53128
53129 static bool
53130 fits_dispatch_window (rtx_insn *insn)
53131 {
53132 dispatch_windows *window_list = dispatch_window_list;
53133 dispatch_windows *window_list_next = dispatch_window_list->next;
53134 unsigned int num_restrict;
53135 enum dispatch_group group = get_insn_group (insn);
53136 enum insn_path path = get_insn_path (insn);
53137 int sum;
53138
53139 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
53140 instructions should be given the lowest priority in the
53141 scheduling process in Haifa scheduler to make sure they will be
53142 scheduled in the same dispatch window as the reference to them. */
53143 if (group == disp_jcc || group == disp_cmp)
53144 return false;
53145
53146 /* Check nonrestricted. */
53147 if (group == disp_no_group || group == disp_branch)
53148 return true;
53149
53150 /* Get last dispatch window. */
53151 if (window_list_next)
53152 window_list = window_list_next;
53153
53154 if (window_list->window_num == 1)
53155 {
53156 sum = window_list->prev->window_size + window_list->window_size;
53157
53158 if (sum == 32
53159 || (min_insn_size (insn) + sum) >= 48)
53160 /* Window 1 is full. Go for next window. */
53161 return true;
53162 }
53163
53164 num_restrict = count_num_restricted (insn, window_list);
53165
53166 if (num_restrict > num_allowable_groups[group])
53167 return false;
53168
53169 /* See if it fits in the first window. */
53170 if (window_list->window_num == 0)
53171 {
53172 /* The first widow should have only single and double path
53173 uops. */
53174 if (path == path_double
53175 && (window_list->num_uops + 2) > MAX_INSN)
53176 return false;
53177 else if (path != path_single)
53178 return false;
53179 }
53180 return true;
53181 }
53182
53183 /* Add an instruction INSN with NUM_UOPS micro-operations to the
53184 dispatch window WINDOW_LIST. */
53185
53186 static void
53187 add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
53188 {
53189 int byte_len = min_insn_size (insn);
53190 int num_insn = window_list->num_insn;
53191 int imm_size;
53192 sched_insn_info *window = window_list->window;
53193 enum dispatch_group group = get_insn_group (insn);
53194 enum insn_path path = get_insn_path (insn);
53195 int num_imm_operand;
53196 int num_imm32_operand;
53197 int num_imm64_operand;
53198
53199 if (!window_list->violation && group != disp_cmp
53200 && !fits_dispatch_window (insn))
53201 window_list->violation = true;
53202
53203 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
53204 &num_imm64_operand);
53205
53206 /* Initialize window with new instruction. */
53207 window[num_insn].insn = insn;
53208 window[num_insn].byte_len = byte_len;
53209 window[num_insn].group = group;
53210 window[num_insn].path = path;
53211 window[num_insn].imm_bytes = imm_size;
53212
53213 window_list->window_size += byte_len;
53214 window_list->num_insn = num_insn + 1;
53215 window_list->num_uops = window_list->num_uops + num_uops;
53216 window_list->imm_size += imm_size;
53217 window_list->num_imm += num_imm_operand;
53218 window_list->num_imm_32 += num_imm32_operand;
53219 window_list->num_imm_64 += num_imm64_operand;
53220
53221 if (group == disp_store)
53222 window_list->num_stores += 1;
53223 else if (group == disp_load
53224 || group == disp_prefetch)
53225 window_list->num_loads += 1;
53226 else if (group == disp_load_store)
53227 {
53228 window_list->num_stores += 1;
53229 window_list->num_loads += 1;
53230 }
53231 }
53232
53233 /* Adds a scheduled instruction, INSN, to the current dispatch window.
53234 If the total bytes of instructions or the number of instructions in
53235 the window exceed allowable, it allocates a new window. */
53236
53237 static void
53238 add_to_dispatch_window (rtx_insn *insn)
53239 {
53240 int byte_len;
53241 dispatch_windows *window_list;
53242 dispatch_windows *next_list;
53243 dispatch_windows *window0_list;
53244 enum insn_path path;
53245 enum dispatch_group insn_group;
53246 bool insn_fits;
53247 int num_insn;
53248 int num_uops;
53249 int window_num;
53250 int insn_num_uops;
53251 int sum;
53252
53253 if (INSN_CODE (insn) < 0)
53254 return;
53255
53256 byte_len = min_insn_size (insn);
53257 window_list = dispatch_window_list;
53258 next_list = window_list->next;
53259 path = get_insn_path (insn);
53260 insn_group = get_insn_group (insn);
53261
53262 /* Get the last dispatch window. */
53263 if (next_list)
53264 window_list = dispatch_window_list->next;
53265
53266 if (path == path_single)
53267 insn_num_uops = 1;
53268 else if (path == path_double)
53269 insn_num_uops = 2;
53270 else
53271 insn_num_uops = (int) path;
53272
53273 /* If current window is full, get a new window.
53274 Window number zero is full, if MAX_INSN uops are scheduled in it.
53275 Window number one is full, if window zero's bytes plus window
53276 one's bytes is 32, or if the bytes of the new instruction added
53277 to the total makes it greater than 48, or it has already MAX_INSN
53278 instructions in it. */
53279 num_insn = window_list->num_insn;
53280 num_uops = window_list->num_uops;
53281 window_num = window_list->window_num;
53282 insn_fits = fits_dispatch_window (insn);
53283
53284 if (num_insn >= MAX_INSN
53285 || num_uops + insn_num_uops > MAX_INSN
53286 || !(insn_fits))
53287 {
53288 window_num = ~window_num & 1;
53289 window_list = allocate_next_window (window_num);
53290 }
53291
53292 if (window_num == 0)
53293 {
53294 add_insn_window (insn, window_list, insn_num_uops);
53295 if (window_list->num_insn >= MAX_INSN
53296 && insn_group == disp_branch)
53297 {
53298 process_end_window ();
53299 return;
53300 }
53301 }
53302 else if (window_num == 1)
53303 {
53304 window0_list = window_list->prev;
53305 sum = window0_list->window_size + window_list->window_size;
53306 if (sum == 32
53307 || (byte_len + sum) >= 48)
53308 {
53309 process_end_window ();
53310 window_list = dispatch_window_list;
53311 }
53312
53313 add_insn_window (insn, window_list, insn_num_uops);
53314 }
53315 else
53316 gcc_unreachable ();
53317
53318 if (is_end_basic_block (insn_group))
53319 {
53320 /* End of basic block is reached do end-basic-block process. */
53321 process_end_window ();
53322 return;
53323 }
53324 }
53325
53326 /* Print the dispatch window, WINDOW_NUM, to FILE. */
53327
53328 DEBUG_FUNCTION static void
53329 debug_dispatch_window_file (FILE *file, int window_num)
53330 {
53331 dispatch_windows *list;
53332 int i;
53333
53334 if (window_num == 0)
53335 list = dispatch_window_list;
53336 else
53337 list = dispatch_window_list1;
53338
53339 fprintf (file, "Window #%d:\n", list->window_num);
53340 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
53341 list->num_insn, list->num_uops, list->window_size);
53342 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
53343 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
53344
53345 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
53346 list->num_stores);
53347 fprintf (file, " insn info:\n");
53348
53349 for (i = 0; i < MAX_INSN; i++)
53350 {
53351 if (!list->window[i].insn)
53352 break;
53353 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
53354 i, group_name[list->window[i].group],
53355 i, (void *)list->window[i].insn,
53356 i, list->window[i].path,
53357 i, list->window[i].byte_len,
53358 i, list->window[i].imm_bytes);
53359 }
53360 }
53361
53362 /* Print to stdout a dispatch window. */
53363
53364 DEBUG_FUNCTION void
53365 debug_dispatch_window (int window_num)
53366 {
53367 debug_dispatch_window_file (stdout, window_num);
53368 }
53369
53370 /* Print INSN dispatch information to FILE. */
53371
53372 DEBUG_FUNCTION static void
53373 debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
53374 {
53375 int byte_len;
53376 enum insn_path path;
53377 enum dispatch_group group;
53378 int imm_size;
53379 int num_imm_operand;
53380 int num_imm32_operand;
53381 int num_imm64_operand;
53382
53383 if (INSN_CODE (insn) < 0)
53384 return;
53385
53386 byte_len = min_insn_size (insn);
53387 path = get_insn_path (insn);
53388 group = get_insn_group (insn);
53389 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
53390 &num_imm64_operand);
53391
53392 fprintf (file, " insn info:\n");
53393 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
53394 group_name[group], path, byte_len);
53395 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
53396 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
53397 }
53398
53399 /* Print to STDERR the status of the ready list with respect to
53400 dispatch windows. */
53401
53402 DEBUG_FUNCTION void
53403 debug_ready_dispatch (void)
53404 {
53405 int i;
53406 int no_ready = number_in_ready ();
53407
53408 fprintf (stdout, "Number of ready: %d\n", no_ready);
53409
53410 for (i = 0; i < no_ready; i++)
53411 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
53412 }
53413
53414 /* This routine is the driver of the dispatch scheduler. */
53415
53416 static void
53417 do_dispatch (rtx_insn *insn, int mode)
53418 {
53419 if (mode == DISPATCH_INIT)
53420 init_dispatch_sched ();
53421 else if (mode == ADD_TO_DISPATCH_WINDOW)
53422 add_to_dispatch_window (insn);
53423 }
53424
53425 /* Return TRUE if Dispatch Scheduling is supported. */
53426
53427 static bool
53428 has_dispatch (rtx_insn *insn, int action)
53429 {
53430 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3
53431 || TARGET_BDVER4 || TARGET_ZNVER1) && flag_dispatch_scheduler)
53432 switch (action)
53433 {
53434 default:
53435 return false;
53436
53437 case IS_DISPATCH_ON:
53438 return true;
53439 break;
53440
53441 case IS_CMP:
53442 return is_cmp (insn);
53443
53444 case DISPATCH_VIOLATION:
53445 return dispatch_violation ();
53446
53447 case FITS_DISPATCH_WINDOW:
53448 return fits_dispatch_window (insn);
53449 }
53450
53451 return false;
53452 }
53453
53454 /* Implementation of reassociation_width target hook used by
53455 reassoc phase to identify parallelism level in reassociated
53456 tree. Statements tree_code is passed in OPC. Arguments type
53457 is passed in MODE.
53458
53459 Currently parallel reassociation is enabled for Atom
53460 processors only and we set reassociation width to be 2
53461 because Atom may issue up to 2 instructions per cycle.
53462
53463 Return value should be fixed if parallel reassociation is
53464 enabled for other processors. */
53465
53466 static int
53467 ix86_reassociation_width (unsigned int, machine_mode mode)
53468 {
53469 /* Vector part. */
53470 if (VECTOR_MODE_P (mode))
53471 {
53472 if (TARGET_VECTOR_PARALLEL_EXECUTION)
53473 return 2;
53474 else
53475 return 1;
53476 }
53477
53478 /* Scalar part. */
53479 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
53480 return 2;
53481 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
53482 return ((TARGET_64BIT && ix86_tune == PROCESSOR_HASWELL)? 4 : 2);
53483 else
53484 return 1;
53485 }
53486
53487 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
53488 place emms and femms instructions. */
53489
53490 static machine_mode
53491 ix86_preferred_simd_mode (machine_mode mode)
53492 {
53493 if (!TARGET_SSE)
53494 return word_mode;
53495
53496 switch (mode)
53497 {
53498 case QImode:
53499 return TARGET_AVX512BW ? V64QImode :
53500 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
53501 case HImode:
53502 return TARGET_AVX512BW ? V32HImode :
53503 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
53504 case SImode:
53505 return TARGET_AVX512F ? V16SImode :
53506 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
53507 case DImode:
53508 return TARGET_AVX512F ? V8DImode :
53509 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
53510
53511 case SFmode:
53512 if (TARGET_AVX512F)
53513 return V16SFmode;
53514 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
53515 return V8SFmode;
53516 else
53517 return V4SFmode;
53518
53519 case DFmode:
53520 if (!TARGET_VECTORIZE_DOUBLE)
53521 return word_mode;
53522 else if (TARGET_AVX512F)
53523 return V8DFmode;
53524 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
53525 return V4DFmode;
53526 else if (TARGET_SSE2)
53527 return V2DFmode;
53528 /* FALLTHRU */
53529
53530 default:
53531 return word_mode;
53532 }
53533 }
53534
53535 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
53536 vectors. If AVX512F is enabled then try vectorizing with 512bit,
53537 256bit and 128bit vectors. */
53538
53539 static unsigned int
53540 ix86_autovectorize_vector_sizes (void)
53541 {
53542 return TARGET_AVX512F ? 64 | 32 | 16 :
53543 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
53544 }
53545
53546 /* Implemenation of targetm.vectorize.get_mask_mode. */
53547
53548 static machine_mode
53549 ix86_get_mask_mode (unsigned nunits, unsigned vector_size)
53550 {
53551 unsigned elem_size = vector_size / nunits;
53552
53553 /* Scalar mask case. */
53554 if ((TARGET_AVX512F && vector_size == 64)
53555 || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
53556 {
53557 if (elem_size == 4 || elem_size == 8 || TARGET_AVX512BW)
53558 return smallest_mode_for_size (nunits, MODE_INT);
53559 }
53560
53561 machine_mode elem_mode
53562 = smallest_mode_for_size (elem_size * BITS_PER_UNIT, MODE_INT);
53563
53564 gcc_assert (elem_size * nunits == vector_size);
53565
53566 return mode_for_vector (elem_mode, nunits);
53567 }
53568
53569 \f
53570
53571 /* Return class of registers which could be used for pseudo of MODE
53572 and of class RCLASS for spilling instead of memory. Return NO_REGS
53573 if it is not possible or non-profitable. */
53574 static reg_class_t
53575 ix86_spill_class (reg_class_t rclass, machine_mode mode)
53576 {
53577 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
53578 && (mode == SImode || (TARGET_64BIT && mode == DImode))
53579 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
53580 return ALL_SSE_REGS;
53581 return NO_REGS;
53582 }
53583
53584 /* Implement targetm.vectorize.init_cost. */
53585
53586 static void *
53587 ix86_init_cost (struct loop *)
53588 {
53589 unsigned *cost = XNEWVEC (unsigned, 3);
53590 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
53591 return cost;
53592 }
53593
53594 /* Implement targetm.vectorize.add_stmt_cost. */
53595
53596 static unsigned
53597 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
53598 struct _stmt_vec_info *stmt_info, int misalign,
53599 enum vect_cost_model_location where)
53600 {
53601 unsigned *cost = (unsigned *) data;
53602 unsigned retval = 0;
53603
53604 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
53605 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
53606
53607 /* Statements in an inner loop relative to the loop being
53608 vectorized are weighted more heavily. The value here is
53609 arbitrary and could potentially be improved with analysis. */
53610 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
53611 count *= 50; /* FIXME. */
53612
53613 retval = (unsigned) (count * stmt_cost);
53614
53615 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
53616 for Silvermont as it has out of order integer pipeline and can execute
53617 2 scalar instruction per tick, but has in order SIMD pipeline. */
53618 if (TARGET_SILVERMONT || TARGET_INTEL)
53619 if (stmt_info && stmt_info->stmt)
53620 {
53621 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
53622 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
53623 retval = (retval * 17) / 10;
53624 }
53625
53626 cost[where] += retval;
53627
53628 return retval;
53629 }
53630
53631 /* Implement targetm.vectorize.finish_cost. */
53632
53633 static void
53634 ix86_finish_cost (void *data, unsigned *prologue_cost,
53635 unsigned *body_cost, unsigned *epilogue_cost)
53636 {
53637 unsigned *cost = (unsigned *) data;
53638 *prologue_cost = cost[vect_prologue];
53639 *body_cost = cost[vect_body];
53640 *epilogue_cost = cost[vect_epilogue];
53641 }
53642
53643 /* Implement targetm.vectorize.destroy_cost_data. */
53644
53645 static void
53646 ix86_destroy_cost_data (void *data)
53647 {
53648 free (data);
53649 }
53650
53651 /* Validate target specific memory model bits in VAL. */
53652
53653 static unsigned HOST_WIDE_INT
53654 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
53655 {
53656 enum memmodel model = memmodel_from_int (val);
53657 bool strong;
53658
53659 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
53660 |MEMMODEL_MASK)
53661 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
53662 {
53663 warning (OPT_Winvalid_memory_model,
53664 "Unknown architecture specific memory model");
53665 return MEMMODEL_SEQ_CST;
53666 }
53667 strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
53668 if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
53669 {
53670 warning (OPT_Winvalid_memory_model,
53671 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
53672 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
53673 }
53674 if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
53675 {
53676 warning (OPT_Winvalid_memory_model,
53677 "HLE_RELEASE not used with RELEASE or stronger memory model");
53678 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
53679 }
53680 return val;
53681 }
53682
53683 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
53684 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
53685 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
53686 or number of vecsize_mangle variants that should be emitted. */
53687
53688 static int
53689 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
53690 struct cgraph_simd_clone *clonei,
53691 tree base_type, int num)
53692 {
53693 int ret = 1;
53694
53695 if (clonei->simdlen
53696 && (clonei->simdlen < 2
53697 || clonei->simdlen > 16
53698 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
53699 {
53700 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
53701 "unsupported simdlen %d", clonei->simdlen);
53702 return 0;
53703 }
53704
53705 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
53706 if (TREE_CODE (ret_type) != VOID_TYPE)
53707 switch (TYPE_MODE (ret_type))
53708 {
53709 case QImode:
53710 case HImode:
53711 case SImode:
53712 case DImode:
53713 case SFmode:
53714 case DFmode:
53715 /* case SCmode: */
53716 /* case DCmode: */
53717 break;
53718 default:
53719 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
53720 "unsupported return type %qT for simd\n", ret_type);
53721 return 0;
53722 }
53723
53724 tree t;
53725 int i;
53726
53727 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
53728 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
53729 switch (TYPE_MODE (TREE_TYPE (t)))
53730 {
53731 case QImode:
53732 case HImode:
53733 case SImode:
53734 case DImode:
53735 case SFmode:
53736 case DFmode:
53737 /* case SCmode: */
53738 /* case DCmode: */
53739 break;
53740 default:
53741 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
53742 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
53743 return 0;
53744 }
53745
53746 if (clonei->cilk_elemental)
53747 {
53748 /* Parse here processor clause. If not present, default to 'b'. */
53749 clonei->vecsize_mangle = 'b';
53750 }
53751 else if (!TREE_PUBLIC (node->decl))
53752 {
53753 /* If the function isn't exported, we can pick up just one ISA
53754 for the clones. */
53755 if (TARGET_AVX2)
53756 clonei->vecsize_mangle = 'd';
53757 else if (TARGET_AVX)
53758 clonei->vecsize_mangle = 'c';
53759 else
53760 clonei->vecsize_mangle = 'b';
53761 ret = 1;
53762 }
53763 else
53764 {
53765 clonei->vecsize_mangle = "bcd"[num];
53766 ret = 3;
53767 }
53768 switch (clonei->vecsize_mangle)
53769 {
53770 case 'b':
53771 clonei->vecsize_int = 128;
53772 clonei->vecsize_float = 128;
53773 break;
53774 case 'c':
53775 clonei->vecsize_int = 128;
53776 clonei->vecsize_float = 256;
53777 break;
53778 case 'd':
53779 clonei->vecsize_int = 256;
53780 clonei->vecsize_float = 256;
53781 break;
53782 }
53783 if (clonei->simdlen == 0)
53784 {
53785 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
53786 clonei->simdlen = clonei->vecsize_int;
53787 else
53788 clonei->simdlen = clonei->vecsize_float;
53789 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
53790 if (clonei->simdlen > 16)
53791 clonei->simdlen = 16;
53792 }
53793 return ret;
53794 }
53795
53796 /* Add target attribute to SIMD clone NODE if needed. */
53797
53798 static void
53799 ix86_simd_clone_adjust (struct cgraph_node *node)
53800 {
53801 const char *str = NULL;
53802 gcc_assert (node->decl == cfun->decl);
53803 switch (node->simdclone->vecsize_mangle)
53804 {
53805 case 'b':
53806 if (!TARGET_SSE2)
53807 str = "sse2";
53808 break;
53809 case 'c':
53810 if (!TARGET_AVX)
53811 str = "avx";
53812 break;
53813 case 'd':
53814 if (!TARGET_AVX2)
53815 str = "avx2";
53816 break;
53817 default:
53818 gcc_unreachable ();
53819 }
53820 if (str == NULL)
53821 return;
53822 push_cfun (NULL);
53823 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
53824 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
53825 gcc_assert (ok);
53826 pop_cfun ();
53827 ix86_reset_previous_fndecl ();
53828 ix86_set_current_function (node->decl);
53829 }
53830
53831 /* If SIMD clone NODE can't be used in a vectorized loop
53832 in current function, return -1, otherwise return a badness of using it
53833 (0 if it is most desirable from vecsize_mangle point of view, 1
53834 slightly less desirable, etc.). */
53835
53836 static int
53837 ix86_simd_clone_usable (struct cgraph_node *node)
53838 {
53839 switch (node->simdclone->vecsize_mangle)
53840 {
53841 case 'b':
53842 if (!TARGET_SSE2)
53843 return -1;
53844 if (!TARGET_AVX)
53845 return 0;
53846 return TARGET_AVX2 ? 2 : 1;
53847 case 'c':
53848 if (!TARGET_AVX)
53849 return -1;
53850 return TARGET_AVX2 ? 1 : 0;
53851 break;
53852 case 'd':
53853 if (!TARGET_AVX2)
53854 return -1;
53855 return 0;
53856 default:
53857 gcc_unreachable ();
53858 }
53859 }
53860
53861 /* This function adjusts the unroll factor based on
53862 the hardware capabilities. For ex, bdver3 has
53863 a loop buffer which makes unrolling of smaller
53864 loops less important. This function decides the
53865 unroll factor using number of memory references
53866 (value 32 is used) as a heuristic. */
53867
53868 static unsigned
53869 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
53870 {
53871 basic_block *bbs;
53872 rtx_insn *insn;
53873 unsigned i;
53874 unsigned mem_count = 0;
53875
53876 if (!TARGET_ADJUST_UNROLL)
53877 return nunroll;
53878
53879 /* Count the number of memory references within the loop body.
53880 This value determines the unrolling factor for bdver3 and bdver4
53881 architectures. */
53882 subrtx_iterator::array_type array;
53883 bbs = get_loop_body (loop);
53884 for (i = 0; i < loop->num_nodes; i++)
53885 FOR_BB_INSNS (bbs[i], insn)
53886 if (NONDEBUG_INSN_P (insn))
53887 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
53888 if (const_rtx x = *iter)
53889 if (MEM_P (x))
53890 {
53891 machine_mode mode = GET_MODE (x);
53892 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
53893 if (n_words > 4)
53894 mem_count += 2;
53895 else
53896 mem_count += 1;
53897 }
53898 free (bbs);
53899
53900 if (mem_count && mem_count <=32)
53901 return 32/mem_count;
53902
53903 return nunroll;
53904 }
53905
53906
53907 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
53908
53909 static bool
53910 ix86_float_exceptions_rounding_supported_p (void)
53911 {
53912 /* For x87 floating point with standard excess precision handling,
53913 there is no adddf3 pattern (since x87 floating point only has
53914 XFmode operations) so the default hook implementation gets this
53915 wrong. */
53916 return TARGET_80387 || TARGET_SSE_MATH;
53917 }
53918
53919 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
53920
53921 static void
53922 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
53923 {
53924 if (!TARGET_80387 && !TARGET_SSE_MATH)
53925 return;
53926 tree exceptions_var = create_tmp_var_raw (integer_type_node);
53927 if (TARGET_80387)
53928 {
53929 tree fenv_index_type = build_index_type (size_int (6));
53930 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
53931 tree fenv_var = create_tmp_var_raw (fenv_type);
53932 TREE_ADDRESSABLE (fenv_var) = 1;
53933 tree fenv_ptr = build_pointer_type (fenv_type);
53934 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
53935 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
53936 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
53937 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
53938 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
53939 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
53940 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
53941 tree hold_fnclex = build_call_expr (fnclex, 0);
53942 fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv,
53943 NULL_TREE, NULL_TREE);
53944 *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var,
53945 hold_fnclex);
53946 *clear = build_call_expr (fnclex, 0);
53947 tree sw_var = create_tmp_var_raw (short_unsigned_type_node);
53948 tree fnstsw_call = build_call_expr (fnstsw, 0);
53949 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
53950 sw_var, fnstsw_call);
53951 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
53952 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
53953 exceptions_var, exceptions_x87);
53954 *update = build2 (COMPOUND_EXPR, integer_type_node,
53955 sw_mod, update_mod);
53956 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
53957 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
53958 }
53959 if (TARGET_SSE_MATH)
53960 {
53961 tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node);
53962 tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node);
53963 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
53964 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
53965 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
53966 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
53967 mxcsr_orig_var, stmxcsr_hold_call);
53968 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
53969 mxcsr_orig_var,
53970 build_int_cst (unsigned_type_node, 0x1f80));
53971 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
53972 build_int_cst (unsigned_type_node, 0xffffffc0));
53973 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
53974 mxcsr_mod_var, hold_mod_val);
53975 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
53976 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
53977 hold_assign_orig, hold_assign_mod);
53978 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
53979 ldmxcsr_hold_call);
53980 if (*hold)
53981 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
53982 else
53983 *hold = hold_all;
53984 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
53985 if (*clear)
53986 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
53987 ldmxcsr_clear_call);
53988 else
53989 *clear = ldmxcsr_clear_call;
53990 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
53991 tree exceptions_sse = fold_convert (integer_type_node,
53992 stxmcsr_update_call);
53993 if (*update)
53994 {
53995 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
53996 exceptions_var, exceptions_sse);
53997 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
53998 exceptions_var, exceptions_mod);
53999 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
54000 exceptions_assign);
54001 }
54002 else
54003 *update = build2 (MODIFY_EXPR, integer_type_node,
54004 exceptions_var, exceptions_sse);
54005 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
54006 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
54007 ldmxcsr_update_call);
54008 }
54009 tree atomic_feraiseexcept
54010 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
54011 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
54012 1, exceptions_var);
54013 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
54014 atomic_feraiseexcept_call);
54015 }
54016
54017 /* Return mode to be used for bounds or VOIDmode
54018 if bounds are not supported. */
54019
54020 static enum machine_mode
54021 ix86_mpx_bound_mode ()
54022 {
54023 /* Do not support pointer checker if MPX
54024 is not enabled. */
54025 if (!TARGET_MPX)
54026 {
54027 if (flag_check_pointer_bounds)
54028 warning (0, "Pointer Checker requires MPX support on this target."
54029 " Use -mmpx options to enable MPX.");
54030 return VOIDmode;
54031 }
54032
54033 return BNDmode;
54034 }
54035
54036 /* Return constant used to statically initialize constant bounds.
54037
54038 This function is used to create special bound values. For now
54039 only INIT bounds and NONE bounds are expected. More special
54040 values may be added later. */
54041
54042 static tree
54043 ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub)
54044 {
54045 tree low = lb ? build_minus_one_cst (pointer_sized_int_node)
54046 : build_zero_cst (pointer_sized_int_node);
54047 tree high = ub ? build_zero_cst (pointer_sized_int_node)
54048 : build_minus_one_cst (pointer_sized_int_node);
54049
54050 /* This function is supposed to be used to create INIT and
54051 NONE bounds only. */
54052 gcc_assert ((lb == 0 && ub == -1)
54053 || (lb == -1 && ub == 0));
54054
54055 return build_complex (NULL, low, high);
54056 }
54057
54058 /* Generate a list of statements STMTS to initialize pointer bounds
54059 variable VAR with bounds LB and UB. Return the number of generated
54060 statements. */
54061
54062 static int
54063 ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts)
54064 {
54065 tree bnd_ptr = build_pointer_type (pointer_sized_int_node);
54066 tree lhs, modify, var_p;
54067
54068 ub = build1 (BIT_NOT_EXPR, pointer_sized_int_node, ub);
54069 var_p = fold_convert (bnd_ptr, build_fold_addr_expr (var));
54070
54071 lhs = build1 (INDIRECT_REF, pointer_sized_int_node, var_p);
54072 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb);
54073 append_to_statement_list (modify, stmts);
54074
54075 lhs = build1 (INDIRECT_REF, pointer_sized_int_node,
54076 build2 (POINTER_PLUS_EXPR, bnd_ptr, var_p,
54077 TYPE_SIZE_UNIT (pointer_sized_int_node)));
54078 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub);
54079 append_to_statement_list (modify, stmts);
54080
54081 return 2;
54082 }
54083
54084 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
54085 /* For i386, common symbol is local only for non-PIE binaries. For
54086 x86-64, common symbol is local only for non-PIE binaries or linker
54087 supports copy reloc in PIE binaries. */
54088
54089 static bool
54090 ix86_binds_local_p (const_tree exp)
54091 {
54092 return default_binds_local_p_3 (exp, flag_shlib != 0, true, true,
54093 (!flag_pic
54094 || (TARGET_64BIT
54095 && HAVE_LD_PIE_COPYRELOC != 0)));
54096 }
54097 #endif
54098
54099 /* If MEM is in the form of [base+offset], extract the two parts
54100 of address and set to BASE and OFFSET, otherwise return false. */
54101
54102 static bool
54103 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
54104 {
54105 rtx addr;
54106
54107 gcc_assert (MEM_P (mem));
54108
54109 addr = XEXP (mem, 0);
54110
54111 if (GET_CODE (addr) == CONST)
54112 addr = XEXP (addr, 0);
54113
54114 if (REG_P (addr) || GET_CODE (addr) == SYMBOL_REF)
54115 {
54116 *base = addr;
54117 *offset = const0_rtx;
54118 return true;
54119 }
54120
54121 if (GET_CODE (addr) == PLUS
54122 && (REG_P (XEXP (addr, 0))
54123 || GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
54124 && CONST_INT_P (XEXP (addr, 1)))
54125 {
54126 *base = XEXP (addr, 0);
54127 *offset = XEXP (addr, 1);
54128 return true;
54129 }
54130
54131 return false;
54132 }
54133
54134 /* Given OPERANDS of consecutive load/store, check if we can merge
54135 them into move multiple. LOAD is true if they are load instructions.
54136 MODE is the mode of memory operands. */
54137
54138 bool
54139 ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
54140 enum machine_mode mode)
54141 {
54142 HOST_WIDE_INT offval_1, offval_2, msize;
54143 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
54144
54145 if (load)
54146 {
54147 mem_1 = operands[1];
54148 mem_2 = operands[3];
54149 reg_1 = operands[0];
54150 reg_2 = operands[2];
54151 }
54152 else
54153 {
54154 mem_1 = operands[0];
54155 mem_2 = operands[2];
54156 reg_1 = operands[1];
54157 reg_2 = operands[3];
54158 }
54159
54160 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
54161
54162 if (REGNO (reg_1) != REGNO (reg_2))
54163 return false;
54164
54165 /* Check if the addresses are in the form of [base+offset]. */
54166 if (!extract_base_offset_in_addr (mem_1, &base_1, &offset_1))
54167 return false;
54168 if (!extract_base_offset_in_addr (mem_2, &base_2, &offset_2))
54169 return false;
54170
54171 /* Check if the bases are the same. */
54172 if (!rtx_equal_p (base_1, base_2))
54173 return false;
54174
54175 offval_1 = INTVAL (offset_1);
54176 offval_2 = INTVAL (offset_2);
54177 msize = GET_MODE_SIZE (mode);
54178 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
54179 if (offval_1 + msize != offval_2)
54180 return false;
54181
54182 return true;
54183 }
54184
54185 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
54186
54187 static bool
54188 ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
54189 optimization_type opt_type)
54190 {
54191 switch (op)
54192 {
54193 case asin_optab:
54194 case acos_optab:
54195 case log1p_optab:
54196 case exp_optab:
54197 case exp10_optab:
54198 case exp2_optab:
54199 case expm1_optab:
54200 case ldexp_optab:
54201 case scalb_optab:
54202 case round_optab:
54203 return opt_type == OPTIMIZE_FOR_SPEED;
54204
54205 case rint_optab:
54206 if (SSE_FLOAT_MODE_P (mode1)
54207 && TARGET_SSE_MATH
54208 && !flag_trapping_math
54209 && !TARGET_ROUND)
54210 return opt_type == OPTIMIZE_FOR_SPEED;
54211 return true;
54212
54213 case floor_optab:
54214 case ceil_optab:
54215 case btrunc_optab:
54216 if (SSE_FLOAT_MODE_P (mode1)
54217 && TARGET_SSE_MATH
54218 && !flag_trapping_math
54219 && TARGET_ROUND)
54220 return true;
54221 return opt_type == OPTIMIZE_FOR_SPEED;
54222
54223 case rsqrt_optab:
54224 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p ();
54225
54226 default:
54227 return true;
54228 }
54229 }
54230
54231 /* Address space support.
54232
54233 This is not "far pointers" in the 16-bit sense, but an easy way
54234 to use %fs and %gs segment prefixes. Therefore:
54235
54236 (a) All address spaces have the same modes,
54237 (b) All address spaces have the same addresss forms,
54238 (c) While %fs and %gs are technically subsets of the generic
54239 address space, they are probably not subsets of each other.
54240 (d) Since we have no access to the segment base register values
54241 without resorting to a system call, we cannot convert a
54242 non-default address space to a default address space.
54243 Therefore we do not claim %fs or %gs are subsets of generic.
54244 (e) However, __seg_tls uses UNSPEC_TP as the base (which itself is
54245 stored at __seg_tls:0) so we can map between tls and generic. */
54246
54247 static bool
54248 ix86_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
54249 {
54250 return (subset == superset
54251 || (superset == ADDR_SPACE_GENERIC
54252 && subset == ADDR_SPACE_SEG_TLS));
54253 }
54254 #undef TARGET_ADDR_SPACE_SUBSET_P
54255 #define TARGET_ADDR_SPACE_SUBSET_P ix86_addr_space_subset_p
54256
54257 static rtx
54258 ix86_addr_space_convert (rtx op, tree from_type, tree to_type)
54259 {
54260 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
54261 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
54262
54263 /* Conversion between SEG_TLS and GENERIC is handled by adding or
54264 subtracting the thread pointer. */
54265 if ((from_as == ADDR_SPACE_GENERIC && to_as == ADDR_SPACE_SEG_TLS)
54266 || (from_as == ADDR_SPACE_SEG_TLS && to_as == ADDR_SPACE_GENERIC))
54267 {
54268 machine_mode mode = GET_MODE (op);
54269 if (mode == VOIDmode)
54270 mode = ptr_mode;
54271 rtx tp = get_thread_pointer (mode, optimize || mode != ptr_mode);
54272 return expand_binop (mode, (to_as == ADDR_SPACE_GENERIC
54273 ? add_optab : sub_optab),
54274 op, tp, NULL, 1, OPTAB_WIDEN);
54275 }
54276
54277 return op;
54278 }
54279 #undef TARGET_ADDR_SPACE_CONVERT
54280 #define TARGET_ADDR_SPACE_CONVERT ix86_addr_space_convert
54281
54282 static int
54283 ix86_addr_space_debug (addr_space_t as)
54284 {
54285 /* Fold __seg_tls to __seg_fs or __seg_gs for debugging. */
54286 if (as == ADDR_SPACE_SEG_TLS)
54287 as = DEFAULT_TLS_SEG_REG;
54288 return as;
54289 }
54290 #undef TARGET_ADDR_SPACE_DEBUG
54291 #define TARGET_ADDR_SPACE_DEBUG ix86_addr_space_debug
54292
54293 /* All use of segmentation is assumed to make address 0 valid. */
54294
54295 static bool
54296 ix86_addr_space_zero_address_valid (addr_space_t as)
54297 {
54298 return as != ADDR_SPACE_GENERIC;
54299 }
54300 #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
54301 #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
54302
54303 /* Initialize the GCC target structure. */
54304 #undef TARGET_RETURN_IN_MEMORY
54305 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
54306
54307 #undef TARGET_LEGITIMIZE_ADDRESS
54308 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
54309
54310 #undef TARGET_ATTRIBUTE_TABLE
54311 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
54312 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
54313 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
54314 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
54315 # undef TARGET_MERGE_DECL_ATTRIBUTES
54316 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
54317 #endif
54318
54319 #undef TARGET_COMP_TYPE_ATTRIBUTES
54320 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
54321
54322 #undef TARGET_INIT_BUILTINS
54323 #define TARGET_INIT_BUILTINS ix86_init_builtins
54324 #undef TARGET_BUILTIN_DECL
54325 #define TARGET_BUILTIN_DECL ix86_builtin_decl
54326 #undef TARGET_EXPAND_BUILTIN
54327 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
54328
54329 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
54330 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
54331 ix86_builtin_vectorized_function
54332
54333 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
54334 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
54335
54336 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
54337 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
54338
54339 #undef TARGET_VECTORIZE_BUILTIN_GATHER
54340 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
54341
54342 #undef TARGET_VECTORIZE_BUILTIN_SCATTER
54343 #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
54344
54345 #undef TARGET_BUILTIN_RECIPROCAL
54346 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
54347
54348 #undef TARGET_ASM_FUNCTION_EPILOGUE
54349 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
54350
54351 #undef TARGET_ENCODE_SECTION_INFO
54352 #ifndef SUBTARGET_ENCODE_SECTION_INFO
54353 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
54354 #else
54355 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
54356 #endif
54357
54358 #undef TARGET_ASM_OPEN_PAREN
54359 #define TARGET_ASM_OPEN_PAREN ""
54360 #undef TARGET_ASM_CLOSE_PAREN
54361 #define TARGET_ASM_CLOSE_PAREN ""
54362
54363 #undef TARGET_ASM_BYTE_OP
54364 #define TARGET_ASM_BYTE_OP ASM_BYTE
54365
54366 #undef TARGET_ASM_ALIGNED_HI_OP
54367 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
54368 #undef TARGET_ASM_ALIGNED_SI_OP
54369 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
54370 #ifdef ASM_QUAD
54371 #undef TARGET_ASM_ALIGNED_DI_OP
54372 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
54373 #endif
54374
54375 #undef TARGET_PROFILE_BEFORE_PROLOGUE
54376 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
54377
54378 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
54379 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
54380
54381 #undef TARGET_ASM_UNALIGNED_HI_OP
54382 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
54383 #undef TARGET_ASM_UNALIGNED_SI_OP
54384 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
54385 #undef TARGET_ASM_UNALIGNED_DI_OP
54386 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
54387
54388 #undef TARGET_PRINT_OPERAND
54389 #define TARGET_PRINT_OPERAND ix86_print_operand
54390 #undef TARGET_PRINT_OPERAND_ADDRESS
54391 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
54392 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
54393 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
54394 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
54395 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
54396
54397 #undef TARGET_SCHED_INIT_GLOBAL
54398 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
54399 #undef TARGET_SCHED_ADJUST_COST
54400 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
54401 #undef TARGET_SCHED_ISSUE_RATE
54402 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
54403 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
54404 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
54405 ia32_multipass_dfa_lookahead
54406 #undef TARGET_SCHED_MACRO_FUSION_P
54407 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
54408 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
54409 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
54410
54411 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
54412 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
54413
54414 #undef TARGET_MEMMODEL_CHECK
54415 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
54416
54417 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
54418 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
54419
54420 #ifdef HAVE_AS_TLS
54421 #undef TARGET_HAVE_TLS
54422 #define TARGET_HAVE_TLS true
54423 #endif
54424 #undef TARGET_CANNOT_FORCE_CONST_MEM
54425 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
54426 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
54427 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
54428
54429 #undef TARGET_DELEGITIMIZE_ADDRESS
54430 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
54431
54432 #undef TARGET_MS_BITFIELD_LAYOUT_P
54433 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
54434
54435 #if TARGET_MACHO
54436 #undef TARGET_BINDS_LOCAL_P
54437 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
54438 #else
54439 #undef TARGET_BINDS_LOCAL_P
54440 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
54441 #endif
54442 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
54443 #undef TARGET_BINDS_LOCAL_P
54444 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
54445 #endif
54446
54447 #undef TARGET_ASM_OUTPUT_MI_THUNK
54448 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
54449 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
54450 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
54451
54452 #undef TARGET_ASM_FILE_START
54453 #define TARGET_ASM_FILE_START x86_file_start
54454
54455 #undef TARGET_OPTION_OVERRIDE
54456 #define TARGET_OPTION_OVERRIDE ix86_option_override
54457
54458 #undef TARGET_REGISTER_MOVE_COST
54459 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
54460 #undef TARGET_MEMORY_MOVE_COST
54461 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
54462 #undef TARGET_RTX_COSTS
54463 #define TARGET_RTX_COSTS ix86_rtx_costs
54464 #undef TARGET_ADDRESS_COST
54465 #define TARGET_ADDRESS_COST ix86_address_cost
54466
54467 #undef TARGET_FIXED_CONDITION_CODE_REGS
54468 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
54469 #undef TARGET_CC_MODES_COMPATIBLE
54470 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
54471
54472 #undef TARGET_MACHINE_DEPENDENT_REORG
54473 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
54474
54475 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
54476 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
54477
54478 #undef TARGET_BUILD_BUILTIN_VA_LIST
54479 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
54480
54481 #undef TARGET_FOLD_BUILTIN
54482 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
54483
54484 #undef TARGET_COMPARE_VERSION_PRIORITY
54485 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
54486
54487 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
54488 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
54489 ix86_generate_version_dispatcher_body
54490
54491 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
54492 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
54493 ix86_get_function_versions_dispatcher
54494
54495 #undef TARGET_ENUM_VA_LIST_P
54496 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
54497
54498 #undef TARGET_FN_ABI_VA_LIST
54499 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
54500
54501 #undef TARGET_CANONICAL_VA_LIST_TYPE
54502 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
54503
54504 #undef TARGET_EXPAND_BUILTIN_VA_START
54505 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
54506
54507 #undef TARGET_MD_ASM_ADJUST
54508 #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
54509
54510 #undef TARGET_PROMOTE_PROTOTYPES
54511 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
54512 #undef TARGET_SETUP_INCOMING_VARARGS
54513 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
54514 #undef TARGET_MUST_PASS_IN_STACK
54515 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
54516 #undef TARGET_FUNCTION_ARG_ADVANCE
54517 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
54518 #undef TARGET_FUNCTION_ARG
54519 #define TARGET_FUNCTION_ARG ix86_function_arg
54520 #undef TARGET_INIT_PIC_REG
54521 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
54522 #undef TARGET_USE_PSEUDO_PIC_REG
54523 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
54524 #undef TARGET_FUNCTION_ARG_BOUNDARY
54525 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
54526 #undef TARGET_PASS_BY_REFERENCE
54527 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
54528 #undef TARGET_INTERNAL_ARG_POINTER
54529 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
54530 #undef TARGET_UPDATE_STACK_BOUNDARY
54531 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
54532 #undef TARGET_GET_DRAP_RTX
54533 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
54534 #undef TARGET_STRICT_ARGUMENT_NAMING
54535 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
54536 #undef TARGET_STATIC_CHAIN
54537 #define TARGET_STATIC_CHAIN ix86_static_chain
54538 #undef TARGET_TRAMPOLINE_INIT
54539 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
54540 #undef TARGET_RETURN_POPS_ARGS
54541 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
54542
54543 #undef TARGET_LEGITIMATE_COMBINED_INSN
54544 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
54545
54546 #undef TARGET_ASAN_SHADOW_OFFSET
54547 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
54548
54549 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
54550 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
54551
54552 #undef TARGET_SCALAR_MODE_SUPPORTED_P
54553 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
54554
54555 #undef TARGET_VECTOR_MODE_SUPPORTED_P
54556 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
54557
54558 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
54559 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
54560 ix86_libgcc_floating_mode_supported_p
54561
54562 #undef TARGET_C_MODE_FOR_SUFFIX
54563 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
54564
54565 #ifdef HAVE_AS_TLS
54566 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
54567 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
54568 #endif
54569
54570 #ifdef SUBTARGET_INSERT_ATTRIBUTES
54571 #undef TARGET_INSERT_ATTRIBUTES
54572 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
54573 #endif
54574
54575 #undef TARGET_MANGLE_TYPE
54576 #define TARGET_MANGLE_TYPE ix86_mangle_type
54577
54578 #if !TARGET_MACHO
54579 #undef TARGET_STACK_PROTECT_FAIL
54580 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
54581 #endif
54582
54583 #undef TARGET_FUNCTION_VALUE
54584 #define TARGET_FUNCTION_VALUE ix86_function_value
54585
54586 #undef TARGET_FUNCTION_VALUE_REGNO_P
54587 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
54588
54589 #undef TARGET_PROMOTE_FUNCTION_MODE
54590 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
54591
54592 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
54593 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
54594
54595 #undef TARGET_MEMBER_TYPE_FORCES_BLK
54596 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
54597
54598 #undef TARGET_INSTANTIATE_DECLS
54599 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
54600
54601 #undef TARGET_SECONDARY_RELOAD
54602 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
54603
54604 #undef TARGET_CLASS_MAX_NREGS
54605 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
54606
54607 #undef TARGET_PREFERRED_RELOAD_CLASS
54608 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
54609 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
54610 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
54611 #undef TARGET_CLASS_LIKELY_SPILLED_P
54612 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
54613
54614 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
54615 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
54616 ix86_builtin_vectorization_cost
54617 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
54618 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
54619 ix86_vectorize_vec_perm_const_ok
54620 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
54621 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
54622 ix86_preferred_simd_mode
54623 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
54624 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
54625 ix86_autovectorize_vector_sizes
54626 #undef TARGET_VECTORIZE_GET_MASK_MODE
54627 #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
54628 #undef TARGET_VECTORIZE_INIT_COST
54629 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
54630 #undef TARGET_VECTORIZE_ADD_STMT_COST
54631 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
54632 #undef TARGET_VECTORIZE_FINISH_COST
54633 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
54634 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
54635 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
54636
54637 #undef TARGET_SET_CURRENT_FUNCTION
54638 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
54639
54640 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
54641 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
54642
54643 #undef TARGET_OPTION_SAVE
54644 #define TARGET_OPTION_SAVE ix86_function_specific_save
54645
54646 #undef TARGET_OPTION_RESTORE
54647 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
54648
54649 #undef TARGET_OPTION_POST_STREAM_IN
54650 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
54651
54652 #undef TARGET_OPTION_PRINT
54653 #define TARGET_OPTION_PRINT ix86_function_specific_print
54654
54655 #undef TARGET_OPTION_FUNCTION_VERSIONS
54656 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
54657
54658 #undef TARGET_CAN_INLINE_P
54659 #define TARGET_CAN_INLINE_P ix86_can_inline_p
54660
54661 #undef TARGET_LEGITIMATE_ADDRESS_P
54662 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
54663
54664 #undef TARGET_LRA_P
54665 #define TARGET_LRA_P hook_bool_void_true
54666
54667 #undef TARGET_REGISTER_PRIORITY
54668 #define TARGET_REGISTER_PRIORITY ix86_register_priority
54669
54670 #undef TARGET_REGISTER_USAGE_LEVELING_P
54671 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
54672
54673 #undef TARGET_LEGITIMATE_CONSTANT_P
54674 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
54675
54676 #undef TARGET_FRAME_POINTER_REQUIRED
54677 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
54678
54679 #undef TARGET_CAN_ELIMINATE
54680 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
54681
54682 #undef TARGET_EXTRA_LIVE_ON_ENTRY
54683 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
54684
54685 #undef TARGET_ASM_CODE_END
54686 #define TARGET_ASM_CODE_END ix86_code_end
54687
54688 #undef TARGET_CONDITIONAL_REGISTER_USAGE
54689 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
54690
54691 #if TARGET_MACHO
54692 #undef TARGET_INIT_LIBFUNCS
54693 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
54694 #endif
54695
54696 #undef TARGET_LOOP_UNROLL_ADJUST
54697 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
54698
54699 #undef TARGET_SPILL_CLASS
54700 #define TARGET_SPILL_CLASS ix86_spill_class
54701
54702 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
54703 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
54704 ix86_simd_clone_compute_vecsize_and_simdlen
54705
54706 #undef TARGET_SIMD_CLONE_ADJUST
54707 #define TARGET_SIMD_CLONE_ADJUST \
54708 ix86_simd_clone_adjust
54709
54710 #undef TARGET_SIMD_CLONE_USABLE
54711 #define TARGET_SIMD_CLONE_USABLE \
54712 ix86_simd_clone_usable
54713
54714 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
54715 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
54716 ix86_float_exceptions_rounding_supported_p
54717
54718 #undef TARGET_MODE_EMIT
54719 #define TARGET_MODE_EMIT ix86_emit_mode_set
54720
54721 #undef TARGET_MODE_NEEDED
54722 #define TARGET_MODE_NEEDED ix86_mode_needed
54723
54724 #undef TARGET_MODE_AFTER
54725 #define TARGET_MODE_AFTER ix86_mode_after
54726
54727 #undef TARGET_MODE_ENTRY
54728 #define TARGET_MODE_ENTRY ix86_mode_entry
54729
54730 #undef TARGET_MODE_EXIT
54731 #define TARGET_MODE_EXIT ix86_mode_exit
54732
54733 #undef TARGET_MODE_PRIORITY
54734 #define TARGET_MODE_PRIORITY ix86_mode_priority
54735
54736 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
54737 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
54738
54739 #undef TARGET_LOAD_BOUNDS_FOR_ARG
54740 #define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
54741
54742 #undef TARGET_STORE_BOUNDS_FOR_ARG
54743 #define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
54744
54745 #undef TARGET_LOAD_RETURNED_BOUNDS
54746 #define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
54747
54748 #undef TARGET_STORE_RETURNED_BOUNDS
54749 #define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
54750
54751 #undef TARGET_CHKP_BOUND_MODE
54752 #define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
54753
54754 #undef TARGET_BUILTIN_CHKP_FUNCTION
54755 #define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
54756
54757 #undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
54758 #define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
54759
54760 #undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
54761 #define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
54762
54763 #undef TARGET_CHKP_INITIALIZE_BOUNDS
54764 #define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
54765
54766 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
54767 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
54768
54769 #undef TARGET_OFFLOAD_OPTIONS
54770 #define TARGET_OFFLOAD_OPTIONS \
54771 ix86_offload_options
54772
54773 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
54774 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
54775
54776 #undef TARGET_OPTAB_SUPPORTED_P
54777 #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
54778
54779 struct gcc_target targetm = TARGET_INITIALIZER;
54780 \f
54781 #include "gt-i386.h"